From: Stefano Zacchiroli Date: Wed, 25 Dec 2002 20:48:30 +0000 (+0000) Subject: added http_getter OCaml implementation X-Git-Tag: v0_3_99~123 X-Git-Url: http://matita.cs.unibo.it/gitweb/?p=helm.git;a=commitdiff_plain;h=86e80bc65186bf4c2824dc94f5f4dd5966843f14 added http_getter OCaml implementation --- diff --git a/helm/http_getter/.cvsignore b/helm/http_getter/.cvsignore index a29a8a157..060f87336 100644 --- a/helm/http_getter/.cvsignore +++ b/helm/http_getter/.cvsignore @@ -1 +1,9 @@ Makefile configure config.log config.cache config.status http_getter.pl helm_http_getter.spec +*.cmi +*.cmo +*.cma +*.cmx +*.o +*.a +http_getter +http_getter.opt diff --git a/helm/http_getter/.depend b/helm/http_getter/.depend new file mode 100644 index 000000000..72e02203f --- /dev/null +++ b/helm/http_getter/.depend @@ -0,0 +1,29 @@ +http_getter.cmo: http_getter_cache.cmi http_getter_common.cmi \ + http_getter_const.cmi http_getter_env.cmi http_getter_map.cmi \ + http_getter_misc.cmi http_getter_types.cmo +http_getter.cmx: http_getter_cache.cmx http_getter_common.cmx \ + http_getter_const.cmx http_getter_env.cmx http_getter_map.cmx \ + http_getter_misc.cmx http_getter_types.cmx +http_getter_cache.cmo: http_getter_common.cmi http_getter_env.cmi \ + http_getter_types.cmo http_getter_cache.cmi +http_getter_cache.cmx: http_getter_common.cmx http_getter_env.cmx \ + http_getter_types.cmx http_getter_cache.cmi +http_getter_common.cmo: http_getter_env.cmi http_getter_misc.cmi \ + http_getter_types.cmo http_getter_common.cmi +http_getter_common.cmx: http_getter_env.cmx http_getter_misc.cmx \ + http_getter_types.cmx http_getter_common.cmi +http_getter_const.cmo: http_getter_const.cmi +http_getter_const.cmx: http_getter_const.cmi +http_getter_env.cmo: http_getter_misc.cmi http_getter_types.cmo \ + http_getter_env.cmi +http_getter_env.cmx: http_getter_misc.cmx http_getter_types.cmx \ + http_getter_env.cmi +http_getter_map.cmo: threadSafe.cmi http_getter_map.cmi +http_getter_map.cmx: threadSafe.cmx http_getter_map.cmi +http_getter_misc.cmo: http_getter_misc.cmi +http_getter_misc.cmx: http_getter_misc.cmi +threadSafe.cmo: threadSafe.cmi +threadSafe.cmx: threadSafe.cmi +http_getter_cache.cmi: http_getter_types.cmo +http_getter_common.cmi: http_getter_types.cmo +http_getter_env.cmi: http_getter_types.cmo diff --git a/helm/http_getter/Makefile.in b/helm/http_getter/Makefile.in deleted file mode 100644 index f4e6b592d..000000000 --- a/helm/http_getter/Makefile.in +++ /dev/null @@ -1,41 +0,0 @@ -INSTALL_DIR=@RESOLVED_EXEC_PREFIX@/bin -CGI_DIR=@HELM_CGI_DIR@ - -all: - -install: installgetter installcgi - -uninstall: uninstallcgi uninstallgetter - -installgetter: - cp http_getter.pl $(INSTALL_DIR)/ - -installcgi: - for f in *.cgi; do \ - if [ -f $$f ]; then \ - cp -f $$f $(CGI_DIR)/; \ - fi; \ - done - -uninstallgetter: - rm $(INSTALL_DIR)/http_getter.pl - -uninstallcgi: - for f in *.cgi; do \ - rm -f $(CGI_DIR)/$$f; \ - done - -clean: - -distclean: clean - rm -f Makefile configure config.log config.cache config.status \ - http_getter.pl *.spec - -dist: clean - rm -rf ../@PACKAGE@-@VERSION@ - mkdir ../@PACKAGE@-@VERSION@ - cp -r * ../@PACKAGE@-@VERSION@ - (cd .. ; tar cvfz @PACKAGE@-@VERSION@.tar.gz @PACKAGE@-@VERSION@ ; rm -rf @PACKAGE@-@VERSION@) - -.PHONY: all install distclean - diff --git a/helm/http_getter/http_getter.ml b/helm/http_getter/http_getter.ml new file mode 100644 index 000000000..bb9c78e7e --- /dev/null +++ b/helm/http_getter/http_getter.ml @@ -0,0 +1,356 @@ +(* + * Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://cs.unibo.it/helm/. + *) + +(* TODO optimization: precompile almost all regexp *) + +open Http_getter_common;; +open Http_getter_types;; +open Printf;; + + (* debugging settings *) +let debug = true;; +let debug_print s = if debug then prerr_endline ("[HTTP-Getter] " ^ s);; +let http_debug = false;; +Http_common.debug := http_debug;; + +let http_get url = + debug_print ("Downloading URL: " ^ url); + try + Some (Http_client.Convenience.http_get url) + with Http_client.Http_error (code, _) -> + (debug_print + (sprintf "Failed to download %s, HTTP response was %d" url code); + None) +;; + +let parse_format (req: Http_types.request) = + try + (match req#param "format" with + | "normal" -> Enc_normal + | "gz" -> Enc_gzipped + | s -> raise (Http_getter_bad_request ("Invalid format: " ^ s))) + with Http_types.Param_not_found _ -> Enc_normal +;; +let parse_patch_dtd (req: Http_types.request) = + try + (match req#param "patch_dtd" with + | s when String.lowercase s = "yes" -> true + | s when String.lowercase s = "no" -> false + | s -> raise (Http_getter_bad_request ("Invalid patch_dtd value: " ^ s))) + with Http_types.Param_not_found _ -> true +;; +let parse_ls_format (req: Http_types.request) = + match req#param "format" with + | s when String.lowercase s = "txt" -> Fmt_text + | s when String.lowercase s = "xml" -> Fmt_xml + | s -> raise (Http_getter_bad_request ("Invalid /ls format: " ^ s)) +;; +let parse_ls_uri (req: Http_types.request) = + let baseuri = req#param "baseuri" in + let subs = + Pcre.extract ~pat:"^(\\w+):(.*)$" (Pcre.replace ~pat:"/+$" baseuri) + in + match (subs.(1), subs.(2)) with + | "cic", uri -> Cic uri + | "theory", uri -> Theory uri + | _ -> raise (Http_getter_bad_request ("Invalid /ls baseuri: " ^ baseuri)) +;; + +let xml_map = new Http_getter_map.map Http_getter_env.xml_dbm in +let rdf_map = new Http_getter_map.map Http_getter_env.rdf_dbm in +let xsl_map = new Http_getter_map.map Http_getter_env.xsl_dbm in + +let resolve uri = (* use global maps *) + let map = + match uri with + | uri when is_xml_uri uri -> xml_map + | uri when is_rdf_uri uri -> rdf_map + | uri when is_xsl_uri uri -> xsl_map + | uri -> raise (Http_getter_unresolvable_URI uri) + in + map#resolve uri +in +let return_all_foo_uris map doctype filter outchan = + Http_daemon.send_basic_headers outchan; + Http_daemon.send_header "Content-Type" "text/xml" outchan; + Http_daemon.send_CRLF outchan; + output_string + outchan + (sprintf +" +; + +<%s> +" + doctype + Http_getter_env.my_own_url + doctype + doctype); + map#iter + (fun uri _ -> + if filter uri then + output_string outchan (sprintf "\t\n" uri)); + output_string outchan (sprintf "\n" doctype) +in +let return_all_uris = return_all_foo_uris xml_map "alluris" in +let return_all_rdf_uris = return_all_foo_uris rdf_map "allrdfuris" in +let return_ls = + let (++) (oldann, oldtypes, oldbody) (newann, newtypes, newbody) = + ((if newann > oldann then newann else oldann), + (if newtypes > oldtypes then newtypes else oldtypes), + (if newbody > oldbody then newbody else oldbody)) + in + let basepartRE = + Pcre.regexp "^([^.]*\\.[^.]*)((\\.body)|(\\.types))?(\\.ann)?" + in + fun lsuri format outchan -> + let pat = + "^" ^ (match lsuri with Cic p -> ("cic:" ^ p) | Theory p -> ("theory:" ^ p)) + in + let dirs = ref [] in + let objs = Hashtbl.create 17 in + xml_map#iter (* BLEARGH Dbm module lacks support for fold-like functions *) + (fun _ -> function + | uri when Pcre.pmatch ~pat:(pat ^ "/") uri -> (* directory hit *) + let dir = + List.hd (Pcre.split ~pat:"/" (Pcre.replace ~pat:(pat ^ "/") uri)) + in + dirs := dir :: !dirs + | uri when Pcre.pmatch ~pat:(pat ^ "(\\.|$)") uri -> (* object hit *) + let localpart = Pcre.replace ~pat:"^.*/" uri in + let basepart = Pcre.replace ~rex:basepartRE ~templ:"$1" localpart in + let oldflags = + try + Hashtbl.find objs basepart + with Not_found -> (false, No, No) (* no ann, no types no body *) + in + let newflags = + match localpart with + | s when Pcre.pmatch ~pat:"\\.types" s -> (false, Yes, No) + | s when Pcre.pmatch ~pat:"\\.types.ann" s -> (true, Ann, No) + | s when Pcre.pmatch ~pat:"\\.body" s -> (false, No, Yes) + | s when Pcre.pmatch ~pat:"\\.body.ann" s -> (true, No, Ann) + | s -> + raise + (Http_getter_internal_error ("Invalid /ls localpart: " ^ s)) + in + Hashtbl.replace objs basepart (oldflags ++ newflags) + | _ -> () (* miss *)); + match format with + | Fmt_text -> + let body = + "dir, " ^ (String.concat "\ndir, " (List.sort compare !dirs)) ^ "\n" ^ + (Http_getter_misc.hashtbl_sorted_fold + (fun uri (annflag, typesflag, bodyflag) cont -> + sprintf + "%sobject, %s, <%s,%s,%s>\n" + cont uri (if annflag then "YES" else "NO") + (string_of_ls_flag typesflag) (string_of_ls_flag bodyflag)) + objs "") + in Http_daemon.respond ~headers:["Content-Type", "text/txt"] ~body outchan + | Fmt_xml -> + let body = + sprintf +" + +%s + +" + Http_getter_env.my_own_url + ("\n" ^ + (String.concat + "\n" + (List.map + (fun d -> "
" ^ d ^ "
") + (List.sort compare !dirs))) ^ "\n" ^ + (Http_getter_misc.hashtbl_sorted_fold + (fun uri (annflag, typesflag, bodyflag) cont -> + sprintf +"%s +\t +\t +\t + +" + cont uri (if annflag then "YES" else "NO") + (string_of_ls_flag typesflag) + (string_of_ls_flag bodyflag)) + objs "")) + in Http_daemon.respond ~headers:["Content-Type", "text/xml"] ~body outchan +in +let update_from_server logmsg server_url = (* use global maps *) + let xml_url_of_uri = function + (* TODO missing sanity checks on server_url, e.g. it can contains $1 *) + | uri when (Pcre.pmatch ~pat:"^cic://" uri) -> + Pcre.replace ~pat:"^cic://" ~templ:server_url uri + | uri when (Pcre.pmatch ~pat:"^theory://" uri) -> + Pcre.replace ~pat:"^theory://" ~templ:server_url uri + | uri -> raise (Http_getter_invalid_URI uri) + in + let rdf_url_of_uri = function (* TODO as above *) + | uri when (Pcre.pmatch ~pat:"^helm:rdf.*//cic:" uri) -> + Pcre.replace ~pat:"^helm:rdf.*//cic:" ~templ:server_url uri + | uri when (Pcre.pmatch ~pat:"^helm:rdf.*//theory:" uri) -> + Pcre.replace ~pat:"^helm:rdf.*//theory:" ~templ:server_url uri + | uri -> raise (Http_getter_invalid_URI uri) + in + let log = ref ("Processing server: " ^ server_url ^ "
\n") in + let (xml_index, rdf_index, xsl_index) = + (* TODO keeps index in memory, is better to keep them on temp files? *) + (http_get (server_url ^ "/" ^ Http_getter_env.xml_index), + http_get (server_url ^ "/" ^ Http_getter_env.rdf_index), + http_get (server_url ^ "/" ^ Http_getter_env.xsl_index)) + in + if (xml_index = None && rdf_index = None && xsl_index = None) then + debug_print (sprintf "Warning: useless server %s" server_url); + (match xml_index with + | Some xml_index -> + (log := !log ^ "Updating XML db ...
\n"; + List.iter + (fun l -> + try + (match Pcre.split ~pat:"[ \\t]+" l with + | [uri; "gz"] -> xml_map#add uri ((xml_url_of_uri uri) ^ ".xml.gz") + | [uri] -> xml_map#add uri ((xml_url_of_uri uri) ^ ".xml") + | _ -> log := !log ^ "Ignoring invalid line: " ^ l ^ "
\n") + with Http_getter_invalid_URI uri -> + log := !log ^ "Ignoring invalid XML URI: " ^ uri) + (Pcre.split ~pat:"\n+" xml_index)) (* xml_index lines *) + | None -> ()); + (match rdf_index with + | Some rdf_index -> + (log := !log ^ "Updating RDF db ...
\n"; + List.iter + (fun l -> + try + (match Pcre.split ~pat:"[ \\t]+" l with + | [uri; "gz"] -> rdf_map#add uri ((rdf_url_of_uri uri) ^ ".xml.gz") + | [uri] -> rdf_map#add uri ((rdf_url_of_uri uri) ^ ".xml") + | _ -> log := !log ^ "Ignoring invalid line: " ^ l ^ "
\n") + with Http_getter_invalid_URI uri -> + log := !log ^ "Ignoring invalid RDF URI: " ^ uri) + (Pcre.split ~pat:"\n+" rdf_index)) (* rdf_index lines *) + | None -> ()); + (match xsl_index with + | Some xsl_index -> + (log := !log ^ "Updating XSLT db ...
\n"; + List.iter + (fun l -> xsl_map#add l (server_url ^ "/" ^ l)) + (Pcre.split ~pat:"\n+" xsl_index); + log := !log ^ "All done!
\n") + | None -> ()); + !log +in + + (* thread action *) +let callback (req: Http_types.request) outchan = + try + debug_print ("Connection from " ^ req#clientAddr); + debug_print ("Received request: " ^ req#path); + (match req#path with + | "/help" -> return_html_msg Http_getter_const.usage_string outchan + | "/getxml" | "/getxslt" | "/getdtd" | "/resolve" | "/register" -> + (let uri = req#param "uri" in (* common parameter *) + match req#path with + | "/getxml" -> + let enc = parse_format req in + let patch_dtd = parse_patch_dtd req in + Http_getter_cache.respond_xml + ~url:(resolve uri) ~uri ~enc ~patch_dtd outchan + | "/getxslt" -> +(* let patch_dtd = parse_patch_dtd req in *) + (* TODO add support and default value for patch_dtd *) + Http_getter_cache.respond_xsl ~url:(resolve uri) outchan + | "/getdtd" -> + let patch_dtd = parse_patch_dtd req in + Http_getter_cache.respond_dtd + ~patch_dtd ~url:(Http_getter_env.dtd_dir ^ "/" ^ uri) outchan + | "/resolve" -> + (try + return_xml_msg + (sprintf "\n" (resolve uri)) + outchan + with Http_getter_unresolvable_URI uri -> + return_xml_msg "\n" outchan) + | "/register" -> + let url = req#param "url" in + xml_map#add uri url; + return_html_msg "Register done" outchan + | _ -> assert false) + | "/update" -> + (xml_map#clear; rdf_map#clear; xsl_map#clear; + let log = + List.fold_left + update_from_server + "" (* initial logmsg: empty *) + (* reverse order: 1st server is the most important one *) + (List.rev Http_getter_env.servers) + in + return_html_msg log outchan) + | "/getalluris" -> + return_all_uris + (fun uri -> + (Pcre.pmatch ~pat:"^cic:" uri) && + not (Pcre.pmatch ~pat:"\\.types$" uri)) + outchan + | "/getallrdfuris" -> + (let classs = req#param "class" in + try + let filter = + let base = "^helm:rdf:www\\.cs\\.unibo\\.it/helm/rdf/" in + match classs with + | ("forward" as c) | ("backward" as c) -> + (fun uri -> Pcre.pmatch ~pat:(base ^ c) uri) + | c -> raise (Http_getter_invalid_RDF_class c) + in + return_all_rdf_uris filter outchan + with Http_getter_invalid_RDF_class c -> + raise (Http_getter_bad_request ("Invalid RDF class: " ^ c))) + | "/ls" -> return_ls (parse_ls_uri req) (parse_ls_format req) outchan + | "/getempty" -> + Http_daemon.respond ~body:Http_getter_const.empty_xml outchan + | invalid_request -> + Http_daemon.respond_error ~status:(`Client_error `Bad_request) outchan) + with + | Http_types.Param_not_found attr_name -> + return_400 (sprintf "Parameter '%s' is missing" attr_name) outchan + | Http_getter_bad_request msg -> return_html_error msg outchan + | Http_getter_internal_error msg -> return_html_internal_error msg outchan + | exc -> + return_html_error + ("Uncaught exception: " ^ (Printexc.to_string exc)) + outchan +in + + (* daemon initialization *) +Http_getter_env.dump_env (); +flush stdout; +Unix.putenv "http_proxy" ""; +Http_daemon.start' + ~timeout:None ~port:Http_getter_env.port ~mode:`Thread callback + diff --git a/helm/http_getter/http_getter_cache.ml b/helm/http_getter/http_getter_cache.ml new file mode 100644 index 000000000..6d2fe6e06 --- /dev/null +++ b/helm/http_getter/http_getter_cache.ml @@ -0,0 +1,118 @@ +(* + * Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://cs.unibo.it/helm/. + *) + +open Http_getter_common;; +open Http_getter_types;; +open Printf;; + +let resource_type_of_url = function + | url when Pcre.pmatch ~pat:"\\.xml\\.gz$" url -> Enc_gzipped + | url when Pcre.pmatch ~pat:"\\.xml$" url -> Enc_normal + | url -> raise (Http_getter_invalid_URL url) +let extension_of_resource_type = function + | Enc_normal -> "xml" + | Enc_gzipped -> "xml.gz" + + (* basename = resource name without trailing ".gz", if any *) +let is_in_cache basename = + Sys.file_exists + (match Http_getter_env.cache_mode with + | Enc_normal -> basename + | Enc_gzipped -> basename ^ ".gz") + +let respond_xml ?(enc = Enc_normal) ?(patch_dtd = true) ~url ~uri outchan = + let resource_type = resource_type_of_url url in + let extension = extension_of_resource_type resource_type in + let downloadname = + match http_getter_uri_of_string uri with + | Xml_uri (Cic baseuri) | Xml_uri (Theory baseuri) -> + (* assumption: baseuri starts with "/" *) + sprintf "%s%s.%s" Http_getter_env.xml_dir baseuri extension + | Rdf_uri (prefix, ((Cic baseuri) as qbaseuri)) + | Rdf_uri (prefix, ((Theory baseuri) as qbaseuri)) -> + let escaped_prefix = + (Pcre.replace ~pat:"/" ~templ:"_" + (Pcre.replace ~pat:"_" ~templ:"__" + (prefix ^ + (match qbaseuri with + | Cic _ -> "//cic:" + | Theory _ -> "//theory:")))) + in + sprintf "%s/%s%s.%s" + Http_getter_env.rdf_dir escaped_prefix baseuri extension + in + let patch_fun = + if patch_dtd then Http_getter_common.patch_xml else (fun x -> x) + in + let basename = Pcre.replace ~pat:"\\.gz$" downloadname in + if not (is_in_cache basename) then begin (* download and fill cache *) + wget ~output:downloadname url; + match (resource_type, Http_getter_env.cache_mode) with + | Enc_normal, Enc_normal -> + (if enc = Enc_gzipped then gzip ~keep:true downloadname) + | Enc_gzipped, Enc_gzipped -> + (if enc = Enc_normal then gunzip ~keep:true downloadname) + | Enc_normal, Enc_gzipped -> gzip ~keep:(enc = Enc_normal) downloadname + | Enc_gzipped, Enc_normal -> gunzip ~keep:(enc = Enc_gzipped) downloadname + end else begin (* resource already in cache *) + match (enc, Http_getter_env.cache_mode) with + | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped -> () + | Enc_normal, Enc_gzipped -> gunzip ~keep:true (basename ^ ".gz") + | Enc_gzipped, Enc_normal -> gzip ~keep:true basename + end; (* now resource is in cache *) + (* invariant: file to be sent back to client is available on disk in the + format the client likes *) + (match enc with (* send file to client *) + | Enc_normal -> + return_file ~fname:basename ~contype:"text/xml" ~patch_fun outchan + | Enc_gzipped -> + return_file + ~fname:(basename ^ ".gz") ~contype:"text/xml" ~contenc:"x-gzip" + ~patch_fun outchan); + match (enc, Http_getter_env.cache_mode) with (* remove temp files *) + | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped -> () + | Enc_normal, Enc_gzipped -> Sys.remove basename + | Enc_gzipped, Enc_normal -> Sys.remove (basename ^ ".gz") + + (* TODO enc is not yet supported *) +let respond_xsl ?(enc = Enc_normal) ?(patch_dtd = true) ~url outchan = + let patch_fun = + if patch_dtd then Http_getter_common.patch_xsl else (fun x -> x) + in + let fname = tempfile () in + wget ~output:fname url; + return_file ~fname ~contype:"text/xml" ~patch_fun outchan; + Sys.remove fname + + (* TODO patch_dtd and format are fooish, implement them! *) +let respond_dtd ?(enc = Enc_normal) ?(patch_dtd = true) ~url outchan = + if Sys.file_exists url then + return_file + ~fname:url ~contype:"text/xml" ~patch_fun:Http_getter_common.patch_dtd + outchan + else + return_html_error ("Can't find DTD: " ^ url) outchan + diff --git a/helm/http_getter/http_getter_cache.mli b/helm/http_getter/http_getter_cache.mli new file mode 100644 index 000000000..d4deb6795 --- /dev/null +++ b/helm/http_getter/http_getter_cache.mli @@ -0,0 +1,43 @@ +(* + * Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://cs.unibo.it/helm/. + *) + +open Http_getter_types;; + +val respond_xml: + ?enc:http_getter_encoding -> ?patch_dtd:bool -> url:string -> uri:string -> + out_channel -> + unit + +val respond_xsl: + ?enc:http_getter_encoding -> ?patch_dtd:bool -> url:string -> + out_channel -> + unit + +val respond_dtd: + ?enc:http_getter_encoding -> ?patch_dtd:bool -> url:string -> + out_channel -> + unit + diff --git a/helm/http_getter/http_getter_common.ml b/helm/http_getter/http_getter_common.ml new file mode 100644 index 000000000..67d72f4f1 --- /dev/null +++ b/helm/http_getter/http_getter_common.ml @@ -0,0 +1,158 @@ +(* + * Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://cs.unibo.it/helm/. + *) + +open Http_getter_types;; +open Printf;; + +let string_of_ls_flag = function No -> "NO" | Yes -> "YES" | Ann -> "ANN" +let string_of_encoding = function + | Enc_normal -> "Normal" + | Enc_gzipped -> "GZipped" + +let is_cic_uri uri = Pcre.pmatch ~pat:"^cic:" uri +let is_theory_uri uri = Pcre.pmatch ~pat:"^theory:" uri +let is_xml_uri uri = is_cic_uri uri || is_theory_uri uri +let is_rdf_uri uri = Pcre.pmatch ~pat:"^helm:rdf(.*):(.*)//(.*)" uri +let is_xsl_uri uri = Pcre.pmatch ~pat:"^\\w+\\.xsl" uri + +let rec http_getter_uri_of_string = function + | uri when is_rdf_uri uri -> + (match Pcre.split ~pat:"//" uri with + | [ prefix; uri ] -> + let rest = + match http_getter_uri_of_string uri with + | Xml_uri xmluri -> xmluri + | _ -> raise (Http_getter_invalid_URI uri) + in + Rdf_uri (prefix, rest) + | _ -> raise (Http_getter_invalid_URI uri)) + | uri when is_cic_uri uri -> Xml_uri (Cic (Pcre.replace ~pat:"^cic:" uri)) + | uri when is_theory_uri uri -> + Xml_uri (Theory (Pcre.replace ~pat:"^theory:" uri)) + | uri -> raise (Http_getter_invalid_URI uri) + +let patch_xml line = + Pcre.replace + ~pat:(sprintf "DOCTYPE (.*) SYSTEM\\s+\"%s/" Http_getter_env.dtd_base_url) + ~templ:( + sprintf "DOCTYPE $1 SYSTEM \"%s/getdtd?uri=" Http_getter_env.my_own_url) + line +let patch_xsl = + let mk_patch_fun tag line = + Pcre.replace + ~pat:(sprintf "%s\\s+href=\"" tag) + ~templ:( + sprintf "%s href=\"%s/getxslt?uri=" Http_getter_env.my_own_url tag) + line + in + let (patch_import, patch_include) = + (mk_patch_fun "xsl:import", mk_patch_fun "xsl:include") + in + fun line -> patch_include (patch_import line) +let patch_dtd line = + Pcre.replace + ~pat:"ENTITY (.*) SYSTEM\\s+\"" + ~templ:( + sprintf "ENTITY $1 SYSTEM \"%s/getdtd?uri=" Http_getter_env.my_own_url) + line + +let pp_error = + sprintf "

Http Getter error: %s

" +let pp_internal_error = + sprintf "

Http Getter Internal error: %s

" +let pp_msg = sprintf "

%s

" + +let mk_return_fun pp_fun contype msg outchan = + Http_daemon.respond + ~body:(pp_fun msg) + ~headers:["Content-Type", contype] + outchan + +let return_html_error = mk_return_fun pp_error "text/html" +let return_html_internal_error = mk_return_fun pp_internal_error "text/html" +let return_html_msg = mk_return_fun pp_msg "text/html" +let return_xml_msg = mk_return_fun pp_msg "text/xml" + (** + @param fname name of the file to be sent + @param contype Content-Type header value + @param contenc Content-Enconding header value + @param patch_fun function used to patch file contents + @param outchan output channel over which sent file fname *) +let return_file ~fname ?contype ?contenc ?(patch_fun = fun x -> x) outchan = + let headers = + match (contype, contenc) with + | (Some t, Some e) -> [ "Content-Type", t; "Content-Enconding", e ] + | (Some t, None) -> [ "Content-Type" , t ] + | (None, Some e) -> [ "Content-Enconding", e ] + | (None, None) -> [] + in + Http_daemon.send_basic_headers outchan; + Http_daemon.send_headers headers outchan; + Http_daemon.send_CRLF outchan; + Http_getter_misc.iter_file + (fun line -> output_string outchan (patch_fun line ^ "\n")) + fname + (* return a bad request http response *) +let return_400 body outchan = Http_daemon.respond_error ~code:400 ~body outchan + +let wget ?output url = + let flags = + (match output with Some file -> ["-O " ^ file] | None -> []) @ [url] + in + Shell.call + ~stdout:Shell.to_dev_null ~stderr:Shell.to_dev_null [Shell.cmd "wget" flags] + + (* TODO gzip and gunzip create executables file, but umask seems to be + correctly inherited from the shell .... boh *) + + (* stderr shown as usual *) +let gzip ?(keep = false) fname = + if keep then (* keep original file *) + Shell.call + ~stdout:(Shell.to_file (fname ^ ".gz")) + [Shell.cmd "gzip" ["-f"; "-c"; fname]] + else (* don't keep original file *) + Shell.call [Shell.cmd "gzip" ["-f"; fname]] + + (* stderr shown as usual *) +let gunzip ?(keep = false) fname = + if not (Pcre.pmatch ~pat:"\\.gz$" fname) then + failwith "gunzip: source file doesn't end with '.gz'"; + let basename = Pcre.replace ~pat:"\\.gz$" fname in + if keep then (* keep original file *) + Shell.call + ~stdout:(Shell.to_file basename) + [Shell.cmd "gunzip" ["-f"; "-c"; fname]] + else (* don't keep original file *) + Shell.call [Shell.cmd "gunzip" ["-f"; fname]] + +let tempfile () = + let buf = Buffer.create 28 in (* strlen("/tmp/fileSzb3Mw_http_getter") *) + Shell.call + ~stdout:(Shell.to_buffer buf) + [Shell.cmd "tempfile" ["--suffix=_http_getter"]]; + Pcre.replace ~pat:"\n" (Buffer.contents buf) + diff --git a/helm/http_getter/http_getter_common.mli b/helm/http_getter/http_getter_common.mli new file mode 100644 index 000000000..323c71d21 --- /dev/null +++ b/helm/http_getter/http_getter_common.mli @@ -0,0 +1,57 @@ +(* + * Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://cs.unibo.it/helm/. + *) + +open Http_getter_types;; + +val string_of_ls_flag: http_getter_ls_flag -> string +val string_of_encoding: http_getter_encoding -> string + +val is_xml_uri: string -> bool +val is_rdf_uri: string -> bool +val is_xsl_uri: string -> bool + +val http_getter_uri_of_string: string -> http_getter_uri + +val patch_xml : string -> string +val patch_xsl : string -> string +val patch_dtd : string -> string + +val return_html_error: string -> out_channel -> unit +val return_html_internal_error: string -> out_channel -> unit +val return_html_msg: string -> out_channel -> unit +val return_xml_msg: string -> out_channel -> unit +val return_400: string -> out_channel -> unit +val return_file: + fname:string -> + ?contype:string -> ?contenc:string -> ?patch_fun:(string -> string) -> + out_channel -> + unit + +val wget: ?output: string -> string -> unit +val gzip: ?keep: bool -> string -> unit +val gunzip: ?keep: bool -> string -> unit +val tempfile: unit -> string + diff --git a/helm/http_getter/http_getter_const.ml b/helm/http_getter/http_getter_const.ml new file mode 100644 index 000000000..93c2d5d6a --- /dev/null +++ b/helm/http_getter/http_getter_const.ml @@ -0,0 +1,86 @@ +(* + * Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://cs.unibo.it/helm/. + *) + + (* TODO *) +let usage_string = +" + + + HTTP Getter's help message + + +

+ Usage: http://hostname:getterport/command +

+

+ Available commands: +

+

+ help
+ display this help message +

+

+ getxml?uri=URI[&format=(normal|gz)][&patch_dtd=(yes|no)]
+

+

+ register?uri=URI&url=URL
+

+

+ resolve?uri=URI
+

+

+ getdtd?uri=URI[&patch_dtd=(yes|no)]
+

+

+ getxslt?uri=URI[&patch_dtd=(yes|no)]
+

+

+ update
+

+

+ getalluris
+

+

+ getallrdfuris
+

+

+ ls?baseuri=URI&format=(txt|xml)
+

+

+ getempty
+

+ + +" + +let empty_xml = +" + +]> + +" + diff --git a/helm/http_getter/http_getter_const.mli b/helm/http_getter/http_getter_const.mli new file mode 100644 index 000000000..7f7500269 --- /dev/null +++ b/helm/http_getter/http_getter_const.mli @@ -0,0 +1,28 @@ +(* + * Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://cs.unibo.it/helm/. + *) + +val usage_string: string +val empty_xml: string diff --git a/helm/http_getter/http_getter_env.ml b/helm/http_getter/http_getter_env.ml new file mode 100644 index 000000000..377d305f4 --- /dev/null +++ b/helm/http_getter/http_getter_env.ml @@ -0,0 +1,143 @@ +(* + * Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://cs.unibo.it/helm/. + *) + +open Http_getter_types;; +open Printf;; +open Pxp_document;; +open Pxp_types;; +open Pxp_yacc;; + +type setting_src = + | Environment (* read setting from environment variables *) + | Conffile (* read setting from configuration file *) + | Both (* read setting from both; environment override config file *) + +let conf_file_tree = ref None + +let (conf_file, conf_dir) = + try + let conf_dir = + Pcre.replace ~pat:"/$" (Sys.getenv "HELM_CONFIGURATION_DIR") + in + (conf_dir ^ "/configuration.xml", conf_dir) + with Not_found -> failwith "HELM_CONFIGURATION_DIR undefined" + +let safe_getenv ?(from = Both) var = + (let rec read_from_file () = + (match !conf_file_tree with + | None -> + conf_file_tree := + Some + (parse_wfcontent_entity + default_config (from_file conf_file) default_spec); + read_from_file () + | Some t -> + (try + Some (find_element (String.lowercase var) t)#data + with Not_found -> None)) + in + let read_from_env () = try Some (Sys.getenv var) with Not_found -> None in + let return_value name = function + | Some v -> v + | None -> failwith ("Setting " ^ name ^ " is not defined") + in + (match from with + | Environment -> return_value var (read_from_env ()) + | Conffile -> return_value var (read_from_file ()) + | Both -> + (match read_from_env () with + | None -> return_value var (read_from_file ()) + | v -> return_value var v))) + +let servers_file = safe_getenv "HTTP_GETTER_SERVERS_FILE" +let servers = + let cons hd tl = hd @ [ tl ] in + Http_getter_misc.fold_file cons [] servers_file + +let xml_dbm = safe_getenv "HTTP_GETTER_XML_DBM" +let rdf_dbm = safe_getenv "HTTP_GETTER_RDF_DBM" +let xsl_dbm = safe_getenv "HTTP_GETTER_XSL_DBM" +let xml_index = safe_getenv "HTTP_GETTER_XML_INDEXNAME" +let rdf_index = safe_getenv "HTTP_GETTER_RDF_INDEXNAME" +let xsl_index = safe_getenv "HTTP_GETTER_XSL_INDEXNAME" +let xml_dir = safe_getenv "HTTP_GETTER_XML_DIR" +let rdf_dir = safe_getenv "HTTP_GETTER_RDF_DIR" +let xsl_dir = safe_getenv "HTTP_GETTER_XSL_DIR" +let dtd_dir = safe_getenv "HTTP_GETTER_DTD_DIR" + +let port = + let port = safe_getenv "HTTP_GETTER_PORT" in + try + int_of_string port + with Failure "int_of_string" -> + failwith ("Invalid port value: " ^ port) +let host = + let buf = Buffer.create 20 in + Shell.call ~stdout:(Shell.to_buffer buf) [Shell.cmd "hostname" ["-f"]]; + Pcre.replace ~pat:"\n+$" (Buffer.contents buf) +let my_own_url = + sprintf + "http://%s%s/" + host + (if port = 80 then "" else (sprintf ":%d" port)) +let dtd_base_url = safe_getenv "HTTP_GETTER_DTD_BASE_URL" + +let cache_mode = + match String.lowercase (safe_getenv "HTTP_GETTER_CACHE_MODE") with + | "normal" -> Enc_normal + | "gz" -> Enc_gzipped + | mode -> failwith ("Invalid cache mode: " ^ mode) + +let dump_env () = + printf +"xml_dbm:\t%s +rdf_dbm:\t%s +xsl_dbm:\t%s +xml_index:\t%s +rdf_index:\t%s +xsl_index:\t%s +xml_dir:\t%s +rdf_dir:\t%s +xsl_dir:\t%s +dtd_dir:\t%s +servers_file:\t%s +host:\t\t%s +port:\t\t%d +my_own_url:\t%s +dtd_base_url:\t%s +cache_mode:\t%s +conf_file:\t%s +conf_dir:\t%s +servers: +\t%s +" + xml_dbm rdf_dbm xsl_dbm xml_index rdf_index xsl_index + xml_dir rdf_dir xsl_dir dtd_dir servers_file host port + my_own_url dtd_base_url + (match cache_mode with Enc_normal -> "Normal" | Enc_gzipped -> "GZipped") + conf_file conf_dir (String.concat "\n\t" servers); + flush stdout + diff --git a/helm/http_getter/http_getter_env.mli b/helm/http_getter/http_getter_env.mli new file mode 100644 index 000000000..e47cc3d3c --- /dev/null +++ b/helm/http_getter/http_getter_env.mli @@ -0,0 +1,58 @@ +(* + * Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://cs.unibo.it/helm/. + *) + + open Http_getter_types;; + + (* environment gathered data *) + + val xml_dbm : string (* XML map DBM file *) + val rdf_dbm : string (* RDF map DBM file *) + val xsl_dbm : string (* XSL map DBM file *) + val xml_index : string (* XMLs' index *) + val rdf_index : string (* RDFs' index *) + val xsl_index : string (* XSLTs' index *) + val xml_dir : string (* XMLs' directory *) + val rdf_dir : string (* RDFs' directory *) + val xsl_dir : string (* XSLs' directory *) + val dtd_dir : string (* DTDs' root directory *) + val servers_file : string (* servers.txt file *) + val port : int (* port on which getter listens *) + val dtd_base_url : string (* base URL for DTD downloading *) + + (* derived data *) + + val host : string (* host on which getter listens *) + val my_own_url : string (* URL at which contact getter *) + val servers : string list (* servers list (i.e. servers_file lines) *) + val cache_mode : http_getter_encoding (* cached files encoding *) + val conf_file : string (* configuration file's full path *) + val conf_dir : string (* directory where conf_file resides *) + + (* misc *) + +val dump_env : unit -> unit (* dump a textual representation of the + current http_getter settings *) + diff --git a/helm/http_getter/http_getter_map.ml b/helm/http_getter/http_getter_map.ml new file mode 100644 index 000000000..9ee876181 --- /dev/null +++ b/helm/http_getter/http_getter_map.ml @@ -0,0 +1,69 @@ +(* + * Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://cs.unibo.it/helm/. + *) + +exception Key_already_in of string;; +exception Key_not_found of string;; + +class map dbname = + let perm = 420 in (* permission 644 in decimal notation *) + let db = Dbm.opendbm dbname [ Dbm.Dbm_rdwr; Dbm.Dbm_create ] perm in + object (self) + + inherit ThreadSafe.threadSafe + + initializer Gc.finalise Dbm.close db (* close db on GC *) + + method add key value = + self#doWriter (lazy ( + try + Dbm.add db key value + with Dbm.Dbm_error "Entry already exists" -> raise (Key_already_in key) + )) + + method remove key = + self#doWriter (lazy ( + try + Dbm.remove db key + with Dbm.Dbm_error "dbm_delete" -> raise (Key_not_found key) + )) + + method resolve key = + self#doReader (lazy ( + try Dbm.find db key with Not_found -> raise (Key_not_found key) + )) + + method iter (f: string -> string -> unit) = + self#doReader (lazy ( + Dbm.iter f db + )) + + method clear = + self#doWriter (lazy ( + Dbm.iter (fun k _ -> Dbm.remove db k) db + )) + + end + diff --git a/helm/http_getter/http_getter_map.mli b/helm/http_getter/http_getter_map.mli new file mode 100644 index 000000000..d3402eea3 --- /dev/null +++ b/helm/http_getter/http_getter_map.mli @@ -0,0 +1,38 @@ +(* + * Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://cs.unibo.it/helm/. + *) + +exception Key_already_in of string +exception Key_not_found of string + +class map: + string -> + object + method add: string -> string -> unit + method remove: string -> unit + method resolve: string -> string + method iter: (string -> string -> unit) -> unit + method clear: unit + end diff --git a/helm/http_getter/http_getter_misc.ml b/helm/http_getter/http_getter_misc.ml new file mode 100644 index 000000000..d40fbf4f6 --- /dev/null +++ b/helm/http_getter/http_getter_misc.ml @@ -0,0 +1,45 @@ +(* + * Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://cs.unibo.it/helm/. + *) + +let fold_file f init fname = + let inchan = open_in fname in + let rec fold_lines' value = + try + let line = input_line inchan in + fold_lines' (f value line) + with End_of_file -> value + in + let res = (try fold_lines' init with e -> (close_in inchan; raise e)) in + close_in inchan; + res +let iter_file f = fold_file (fun _ line -> f line) () + +let hashtbl_sorted_fold f tbl init = + let sorted_keys = + List.sort compare (Hashtbl.fold (fun key _ keys -> key::keys) tbl []) + in + List.fold_left (fun acc k -> f k (Hashtbl.find tbl k) acc) init sorted_keys + diff --git a/helm/http_getter/http_getter_misc.mli b/helm/http_getter/http_getter_misc.mli new file mode 100644 index 000000000..c1afb198f --- /dev/null +++ b/helm/http_getter/http_getter_misc.mli @@ -0,0 +1,33 @@ +(* + * Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://cs.unibo.it/helm/. + *) + +val fold_file : ('a -> string -> 'a) -> 'a -> string -> 'a +val iter_file : (string -> unit) -> string -> unit + + (** like Hashtbl.fold but keys are processed ordered *) +val hashtbl_sorted_fold : + ('a -> 'b -> 'c -> 'c) -> ('a, 'b) Hashtbl.t -> 'c -> 'c + diff --git a/helm/http_getter/http_getter_types.ml b/helm/http_getter/http_getter_types.ml new file mode 100644 index 000000000..96977b6c5 --- /dev/null +++ b/helm/http_getter/http_getter_types.ml @@ -0,0 +1,45 @@ +(* + * Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://cs.unibo.it/helm/. + *) + +exception Http_getter_bad_request of string +exception Http_getter_unresolvable_URI of string +exception Http_getter_invalid_URI of string +exception Http_getter_invalid_URL of string +exception Http_getter_invalid_RDF_class of string +exception Http_getter_internal_error of string + +type http_getter_encoding = Enc_normal | Enc_gzipped +type http_getter_answer_format = Fmt_text | Fmt_xml +type http_getter_ls_flag = No | Yes | Ann + +type http_getter_xml_uri = + | Cic of string + | Theory of string +type http_getter_rdf_uri = string * http_getter_xml_uri +type http_getter_uri = + | Xml_uri of http_getter_xml_uri + | Rdf_uri of http_getter_rdf_uri + diff --git a/helm/http_getter/threadSafe.ml b/helm/http_getter/threadSafe.ml new file mode 100644 index 000000000..72494ef43 --- /dev/null +++ b/helm/http_getter/threadSafe.ml @@ -0,0 +1,96 @@ +(* + * Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://cs.unibo.it/helm/. + *) + +let debug = false +let debug_print msg = if debug then prerr_endline msg + +class threadSafe = + object (self) + + val mutex = Mutex.create () + + (** condition variable: 'no readers is currently reading' *) + val noReaders = Condition.create () + + (** readers count *) + val mutable readersCount = 0 + + method private incrReadersCount = (* internal, not exported *) + self#doCritical (lazy ( + readersCount <- readersCount + 1 + )) + + method private decrReadersCount = (* internal, not exported *) + self#doCritical (lazy ( + if readersCount > 0 then readersCount <- readersCount - 1; + )) + + method private signalNoReaders = (* internal, not exported *) + self#doCritical (lazy ( + if readersCount = 0 then Condition.signal noReaders + )) + + method private doCritical: 'a. 'a lazy_t -> 'a = + fun action -> + debug_print ""; + (try + Mutex.lock mutex; + let res = Lazy.force action in + Mutex.unlock mutex; + debug_print ""; + res + with e -> + Mutex.unlock mutex; + raise e); + + method private doReader: 'a. 'a lazy_t -> 'a = + fun action -> + debug_print ""; + let cleanup () = + self#decrReadersCount; + self#signalNoReaders + in + self#incrReadersCount; + let res = (try Lazy.force action with e -> (cleanup (); raise e)) in + cleanup (); + debug_print ""; + res + + (* TODO may starve!!!! is what we want or not? *) + method private doWriter: 'a. 'a lazy_t -> 'a = + fun action -> + debug_print ""; + self#doCritical (lazy ( + while readersCount > 0 do + Condition.wait noReaders mutex + done; + let res = Lazy.force action in + debug_print ""; + res + )) + + end + diff --git a/helm/http_getter/threadSafe.mli b/helm/http_getter/threadSafe.mli new file mode 100644 index 000000000..6f6d04716 --- /dev/null +++ b/helm/http_getter/threadSafe.mli @@ -0,0 +1,42 @@ +(* + * Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://cs.unibo.it/helm/. + *) + +class threadSafe: + object + + (** execute 'action' in mutual exclusion between all other threads *) + method private doCritical: 'a lazy_t -> 'a + + (** execute 'action' acting as a 'reader' i.e.: multiple readers can act + at the same time but no writer can act until no readers are acting *) + method private doReader: 'a lazy_t -> 'a + + (** execute 'action' acting as a 'writer' i.e.: when a writer is acting, + no readers or writer can act, beware that writers can starve *) + method private doWriter: 'a lazy_t -> 'a + + end +