From 384c369d4cbf5dd6cf1013902d3a218260400e73 Mon Sep 17 00:00:00 2001 From: Stefano Zacchiroli Date: Wed, 11 Feb 2004 11:57:00 +0000 Subject: [PATCH] - getter revolution: split backend and frontend (this is the backend) - working remote part but not yet the local one (extra unneeded headers) --- helm/ocaml/getter/.depend | 38 +- helm/ocaml/getter/Makefile | 29 +- helm/ocaml/getter/configuration.ml | 122 ------ helm/ocaml/getter/configuration.mli | 41 -- helm/ocaml/getter/getter.ml | 95 ----- helm/ocaml/getter/getter.mli | 62 --- helm/ocaml/getter/http_getter.ml | 458 +++++++++++++++++++++ helm/ocaml/getter/http_getter.mli | 58 +++ helm/ocaml/getter/http_getter_cache.ml | 211 ++++++++++ helm/ocaml/getter/http_getter_cache.mli | 44 ++ helm/ocaml/getter/http_getter_common.ml | 138 +++++++ helm/ocaml/getter/http_getter_common.mli | 73 ++++ helm/ocaml/getter/http_getter_const.ml | 111 +++++ helm/ocaml/getter/http_getter_const.mli | 36 ++ helm/ocaml/getter/http_getter_debugger.ml | 58 +++ helm/ocaml/getter/http_getter_debugger.mli | 40 ++ helm/ocaml/getter/http_getter_env.ml | 159 +++++++ helm/ocaml/getter/http_getter_env.mli | 75 ++++ helm/ocaml/getter/http_getter_map.ml | 96 +++++ helm/ocaml/getter/http_getter_map.mli | 44 ++ helm/ocaml/getter/http_getter_misc.ml | 250 +++++++++++ helm/ocaml/getter/http_getter_misc.mli | 80 ++++ helm/ocaml/getter/http_getter_types.ml | 62 +++ 23 files changed, 2050 insertions(+), 330 deletions(-) delete mode 100644 helm/ocaml/getter/configuration.ml delete mode 100644 helm/ocaml/getter/configuration.mli delete mode 100644 helm/ocaml/getter/getter.ml delete mode 100644 helm/ocaml/getter/getter.mli create mode 100644 helm/ocaml/getter/http_getter.ml create mode 100644 helm/ocaml/getter/http_getter.mli create mode 100644 helm/ocaml/getter/http_getter_cache.ml create mode 100644 helm/ocaml/getter/http_getter_cache.mli create mode 100644 helm/ocaml/getter/http_getter_common.ml create mode 100644 helm/ocaml/getter/http_getter_common.mli create mode 100644 helm/ocaml/getter/http_getter_const.ml create mode 100644 helm/ocaml/getter/http_getter_const.mli create mode 100644 helm/ocaml/getter/http_getter_debugger.ml create mode 100644 helm/ocaml/getter/http_getter_debugger.mli create mode 100644 helm/ocaml/getter/http_getter_env.ml create mode 100644 helm/ocaml/getter/http_getter_env.mli create mode 100644 helm/ocaml/getter/http_getter_map.ml create mode 100644 helm/ocaml/getter/http_getter_map.mli create mode 100644 helm/ocaml/getter/http_getter_misc.ml create mode 100644 helm/ocaml/getter/http_getter_misc.mli create mode 100644 helm/ocaml/getter/http_getter_types.ml diff --git a/helm/ocaml/getter/.depend b/helm/ocaml/getter/.depend index c51f1a8e4..45bc04e7b 100644 --- a/helm/ocaml/getter/.depend +++ b/helm/ocaml/getter/.depend @@ -1,6 +1,36 @@ -configuration.cmo: configuration.cmi -configuration.cmx: configuration.cmi +http_getter_env.cmi: http_getter_types.cmo +http_getter_common.cmi: http_getter_types.cmo +http_getter_cache.cmi: http_getter_types.cmo +http_getter.cmi: http_getter_types.cmo clientHTTP.cmo: clientHTTP.cmi clientHTTP.cmx: clientHTTP.cmi -getter.cmo: clientHTTP.cmi configuration.cmi getter.cmi -getter.cmx: clientHTTP.cmx configuration.cmx getter.cmi +http_getter_debugger.cmo: http_getter_debugger.cmi +http_getter_debugger.cmx: http_getter_debugger.cmi +http_getter_misc.cmo: http_getter_debugger.cmi http_getter_misc.cmi +http_getter_misc.cmx: http_getter_debugger.cmx http_getter_misc.cmi +http_getter_const.cmo: http_getter_const.cmi +http_getter_const.cmx: http_getter_const.cmi +http_getter_env.cmo: http_getter_const.cmi http_getter_misc.cmi \ + http_getter_types.cmo http_getter_env.cmi +http_getter_env.cmx: http_getter_const.cmx http_getter_misc.cmx \ + http_getter_types.cmx http_getter_env.cmi +http_getter_common.cmo: http_getter_env.cmi http_getter_misc.cmi \ + http_getter_types.cmo http_getter_common.cmi +http_getter_common.cmx: http_getter_env.cmx http_getter_misc.cmx \ + http_getter_types.cmx http_getter_common.cmi +http_getter_map.cmo: http_getter_map.cmi +http_getter_map.cmx: http_getter_map.cmi +http_getter_cache.cmo: http_getter_common.cmi http_getter_debugger.cmi \ + http_getter_env.cmi http_getter_misc.cmi http_getter_types.cmo \ + http_getter_cache.cmi +http_getter_cache.cmx: http_getter_common.cmx http_getter_debugger.cmx \ + http_getter_env.cmx http_getter_misc.cmx http_getter_types.cmx \ + http_getter_cache.cmi +http_getter.cmo: http_getter_cache.cmi http_getter_common.cmi \ + http_getter_const.cmi http_getter_debugger.cmi http_getter_env.cmi \ + http_getter_map.cmi http_getter_misc.cmi http_getter_types.cmo \ + http_getter.cmi +http_getter.cmx: http_getter_cache.cmx http_getter_common.cmx \ + http_getter_const.cmx http_getter_debugger.cmx http_getter_env.cmx \ + http_getter_map.cmx http_getter_misc.cmx http_getter_types.cmx \ + http_getter.cmi diff --git a/helm/ocaml/getter/Makefile b/helm/ocaml/getter/Makefile index af3674f66..ac3abcce4 100644 --- a/helm/ocaml/getter/Makefile +++ b/helm/ocaml/getter/Makefile @@ -1,10 +1,27 @@ + PACKAGE = getter -REQUIRES = pxp helm-urimanager http -PREDICATES = -INTERFACE_FILES = configuration.mli clientHTTP.mli getter.mli -IMPLEMENTATION_FILES = $(INTERFACE_FILES:%.mli=%.ml) -EXTRA_OBJECTS_TO_INSTALL = -EXTRA_OBJECTS_TO_CLEAN = +REQUIRES = \ + http dbm pcre pxp shell zip \ + helm-thread helm-logger helm-urimanager helm-registry + +INTERFACE_FILES = \ + clientHTTP.mli \ + http_getter_debugger.mli \ + http_getter_misc.mli \ + http_getter_const.mli \ + http_getter_env.mli \ + http_getter_common.mli \ + http_getter_map.mli \ + http_getter_cache.mli \ + http_getter.mli + +IMPLEMENTATION_FILES = \ + http_getter_types.ml \ + $(INTERFACE_FILES:%.mli=%.ml) include ../Makefile.common + +test: getter.cma test.ml + $(OCAMLC) -linkpkg -o $@ $^ + diff --git a/helm/ocaml/getter/configuration.ml b/helm/ocaml/getter/configuration.ml deleted file mode 100644 index 1eb4ab65e..000000000 --- a/helm/ocaml/getter/configuration.ml +++ /dev/null @@ -1,122 +0,0 @@ -(******************************************************************************) -(* *) -(* PROJECT HELM *) -(* *) -(* Claudio Sacerdoti Coen *) -(* 28/12/2000 *) -(* *) -(* This is the parser that reads the configuration file of helm *) -(* *) -(******************************************************************************) - -exception MalformedDir of string - -(* this should be the only hard coded constant *) -let filename = - let prefix = - try - Sys.getenv "HELM_CONFIGURATION_DIR" - with - Not_found -> "/projects/helm/V7/phd/local/etc/helm" - in - if prefix.[(String.length prefix) - 1] = '/' then - raise (MalformedDir prefix) ; - prefix ^ "/configuration.xml";; - -exception Warnings;; - -class warner = - object - method warn w = - print_endline ("WARNING: " ^ w) ; - (raise Warnings : unit) - end -;; - -let xml_document () = - let error e = - prerr_endline ("Warning: configuration file not found, or incorrect: " ^ - Pxp_types.string_of_exn e) ; - None - in - let module Y = Pxp_yacc in - try - let config = {Y.default_config with Y.warner = new warner} in - Some (Y.parse_document_entity config (Y.from_file filename) Y.default_spec) - with - | (Pxp_types.Error _) as e -> error e - | (Pxp_types.At _) as e -> error e - | (Pxp_types.Validation_error _) as e -> error e - | (Pxp_types.WF_error _) as e -> error e - | (Pxp_types.Namespace_error _) as e -> error e - | (Pxp_types.Character_not_supported) as e -> error e -;; - -exception Impossible;; - -let vars = Hashtbl.create 14;; - -(* resolve tags and returns the string values of the variable tags *) -let rec resolve = - let module D = Pxp_document in - function - [] -> "" - | he::tl when he#node_type = D.T_element "value-of" -> - (match he#attribute "var" with - Pxp_types.Value var -> Hashtbl.find vars var - | _ -> raise Impossible - ) ^ resolve tl - | he::tl when he#node_type = D.T_data -> - he#data ^ resolve tl - | _ -> raise Impossible -;; - -(* we trust the xml file to be valid because of the validating xml parser *) -let _ = - match xml_document () with - None -> () - | Some d -> - List.iter - (function - n -> - match n#node_type with - Pxp_document.T_element var -> - Hashtbl.add vars var (resolve (n#sub_nodes)) - | _ -> raise Impossible - ) - (d#root#sub_nodes) -;; - -(* try to read a configuration variable, given its name into the - * configuration.xml file and its name into the shell environment. - * The shell variable, if present, has precedence over configuration.xml - *) -let read_configuration_var_env xml_name env_name = - try - try - Sys.getenv env_name - with - Not_found -> Hashtbl.find vars xml_name - with - Not_found -> - Printf.printf "Sorry, cannot find variable `%s', please check your configuration\n" xml_name ; - flush stdout ; - raise Not_found - -let read_configuration_var xml_name = - try - Hashtbl.find vars xml_name - with - Not_found -> - Printf.printf "Sorry, cannot find variable `%s', please check your configuration\n" xml_name ; - flush stdout ; - raise Not_found - -(* Zack: no longer used *) -(* let tmp_dir = read_configuration_var_env "tmp_dir" "HELM_TMP_DIR";; *) -let getter_url = read_configuration_var_env "getter_url" "HELM_GETTER_URL";; -let processor_url = read_configuration_var_env "processor_url" "HELM_PROCESSOR_URL";; -let annotations_dir = read_configuration_var_env "annotations_dir" "HELM_ANNOTATIONS_DIR" -let annotations_url = read_configuration_var_env "annotations_url" "HELM_ANNOTATIONS_URL" - -let _ = Hashtbl.clear vars;; diff --git a/helm/ocaml/getter/configuration.mli b/helm/ocaml/getter/configuration.mli deleted file mode 100644 index 20daaa411..000000000 --- a/helm/ocaml/getter/configuration.mli +++ /dev/null @@ -1,41 +0,0 @@ -(* Copyright (C) 2000, HELM Team. - * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://cs.unibo.it/helm/. - *) - -(******************************************************************************) -(* *) -(* PROJECT HELM *) -(* *) -(* Claudio Sacerdoti Coen *) -(* 29/11/2000 *) -(* *) -(* *) -(******************************************************************************) - -(* Zack: no longer needed *) -(* val tmp_dir : string *) -val getter_url : string -val processor_url : string -val annotations_dir : string -val annotations_url : string diff --git a/helm/ocaml/getter/getter.ml b/helm/ocaml/getter/getter.ml deleted file mode 100644 index e53d0a3b4..000000000 --- a/helm/ocaml/getter/getter.ml +++ /dev/null @@ -1,95 +0,0 @@ -(* Copyright (C) 2000, HELM Team. - * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://cs.unibo.it/helm/. - *) - -(******************************************************************************) -(* *) -(* PROJECT HELM *) -(* *) -(* Claudio Sacerdoti Coen *) -(* 24/01/2000 *) -(* *) -(******************************************************************************) - -let getter_url = ref Configuration.getter_url;; - -let update () = - (* deliver update request to http_getter *) - ClientHTTP.send (!getter_url ^ "update") -;; - -type format = - Normal - | GZipped -;; - -let getxml ?(format=Normal) ?(patchdtd=true) uri = - (* deliver getxml request to http_getter *) - ClientHTTP.get_and_save_to_tmp - (!getter_url ^ "getxml" ^ - "?uri=" ^ UriManager.string_of_uri uri ^ - "&format=" ^ (match format with Normal -> "normal" | GZipped -> "gzipped") ^ - "&patch_dtd=" ^ (match patchdtd with true -> "yes" | false -> "no") - ) -;; - -let register uri url = - (* deliver register request to http_getter *) - ClientHTTP.send - (!getter_url ^ "register" ^ - "?uri=" ^ (UriManager.string_of_uri uri) ^ - "&url=" ^ url) -;; - -exception Unresolved;; -exception UnexpectedGetterOutput;; - -(* resolve_result is needed because it is not possible to raise *) -(* an exception in a pxp ever-processing callback. Too bad. *) -type resolve_result = - Unknown - | Exception of exn - | Resolved of string - -let resolve uri = - (* deliver resolve request to http_getter *) - let doc = - ClientHTTP.get - (!getter_url ^ "resolve" ^ "?uri=" ^ (UriManager.string_of_uri uri)) - in - let res = ref Unknown in - Pxp_yacc.process_entity Pxp_yacc.default_config (`Entry_content []) - (Pxp_yacc.create_entity_manager ~is_document:true Pxp_yacc.default_config - (Pxp_yacc.from_string doc)) - (function - Pxp_yacc.E_start_tag ("url",["value",url],_) -> res := Resolved url - | Pxp_yacc.E_start_tag ("unresolved",[],_) -> res := Exception Unresolved - | Pxp_yacc.E_start_tag _ -> res := Exception UnexpectedGetterOutput - | _ -> () - ) ; - match !res with - Unknown -> raise UnexpectedGetterOutput - | Exception e -> raise e - | Resolved url -> url -;; diff --git a/helm/ocaml/getter/getter.mli b/helm/ocaml/getter/getter.mli deleted file mode 100644 index 3fbec8070..000000000 --- a/helm/ocaml/getter/getter.mli +++ /dev/null @@ -1,62 +0,0 @@ -(* Copyright (C) 2000, HELM Team. - * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://cs.unibo.it/helm/. - *) - -(******************************************************************************) -(* *) -(* PROJECT HELM *) -(* *) -(* Claudio Sacerdoti Coen *) -(* 24/01/2000 *) -(* *) -(* *) -(******************************************************************************) - -(* THE URL OF THE HTTP GETTER *) -val getter_url : string ref - -(* SIMPLE BINDINGS TO THE HTTP GETTER *) -(* synchronize with the servers *) -val update : unit -> unit - -type format = - Normal - | GZipped -;; - -(* get the xml file *) -(* defaults: format = Normal and patchdtd = true *) -val getxml : ?format:format -> ?patchdtd:bool -> UriManager.uri -> string - -(* adds an (URI -> URL) entry in the map from URIs to URLs *) -val register : UriManager.uri -> string -> unit - -exception Unresolved -exception UnexpectedGetterOutput - -(* resolves an URI to its corresponding URL. *) -(* Unresolved is raised if there is no URL for the given URI. *) -(* UnexceptedGetterOutput is raised if the output of the real *) -(* getter has not the expected format. *) -val resolve: UriManager.uri -> string diff --git a/helm/ocaml/getter/http_getter.ml b/helm/ocaml/getter/http_getter.ml new file mode 100644 index 000000000..2883c383c --- /dev/null +++ b/helm/ocaml/getter/http_getter.ml @@ -0,0 +1,458 @@ +(* + * Copyright (C) 2003-2004: + * Stefano Zacchiroli + * for the HELM Team http://helm.cs.unibo.it/ + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +open Printf + +open Http_getter_common +open Http_getter_misc +open Http_getter_debugger +open Http_getter_types + +exception Not_implemented of string +exception UnexpectedGetterOutput + +(* resolve_result is needed because it is not possible to raise *) +(* an exception in a pxp ever-processing callback. Too bad. *) +type resolve_result = + | Unknown + | Exception of exn + | Resolved of string + +let not_implemented s = raise (Not_implemented ("Http_getter." ^ s)) + +let (index_line_sep_RE, index_sep_RE, trailing_types_RE, + heading_cic_RE, heading_theory_RE, heading_nuprl_RE, + heading_rdf_cic_RE, heading_rdf_theory_RE) = + (Pcre.regexp "[ \t]+", Pcre.regexp "\r\n|\r|\n", + Pcre.regexp "\\.types$", + Pcre.regexp "^cic:", Pcre.regexp "^theory:", Pcre.regexp "^nuprl:", + Pcre.regexp "^helm:rdf.*//cic:", Pcre.regexp "^helm:rdf.*//theory:") + + (* global maps, shared by all threads *) + +let cic_map = + lazy (new Http_getter_map.map (Lazy.force Http_getter_env.cic_dbm)) +let nuprl_map = + lazy (new Http_getter_map.map (Lazy.force Http_getter_env.nuprl_dbm)) +let rdf_map = + lazy (new Http_getter_map.map (Lazy.force Http_getter_env.rdf_dbm)) +let xsl_map = + lazy (new Http_getter_map.map (Lazy.force Http_getter_env.xsl_dbm)) + +let maps = [ cic_map; nuprl_map; rdf_map; xsl_map ] +let close_maps () = List.iter (fun m -> (Lazy.force m) # close) maps +let clear_maps () = List.iter (fun m -> (Lazy.force m) # clear) maps +let sync_maps () = List.iter (fun m -> (Lazy.force m) # sync) maps + +let map_of_uri = function + | uri when is_cic_uri uri -> Lazy.force cic_map + | uri when is_nuprl_uri uri -> Lazy.force nuprl_map + | uri when is_rdf_uri uri -> Lazy.force rdf_map + | uri when is_xsl_uri uri -> Lazy.force xsl_map + | uri -> raise (Unresolvable_URI uri) + +let update_from_server logmsg server_url = (* use global maps *) + debug_print ("Updating information from " ^ server_url); + let xml_url_of_uri = function + (* TODO missing sanity checks on server_url, e.g. it can contains $1 *) + | uri when (Pcre.pmatch ~rex:heading_cic_RE uri) -> + Pcre.replace ~rex:heading_cic_RE ~templ:server_url uri + | uri when (Pcre.pmatch ~rex:heading_theory_RE uri) -> + Pcre.replace ~rex:heading_theory_RE ~templ:server_url uri + | uri when (Pcre.pmatch ~rex:heading_nuprl_RE uri) -> + Pcre.replace ~rex:heading_nuprl_RE ~templ:server_url uri + | uri -> raise (Invalid_URI uri) + in + let rdf_url_of_uri = function (* TODO as above *) + | uri when (Pcre.pmatch ~rex:heading_rdf_cic_RE uri) -> + Pcre.replace ~rex:heading_rdf_cic_RE ~templ:server_url uri + | uri when (Pcre.pmatch ~rex:heading_rdf_theory_RE uri) -> + Pcre.replace ~rex:heading_rdf_theory_RE ~templ:server_url uri + | uri -> raise (Invalid_URI uri) + in + let log = ref (`T ("Processing server: " ^ server_url) :: logmsg) in + let (xml_index, rdf_index, xsl_index) = + (* TODO keeps index in memory, is better to keep them on temp files? *) + (http_get (server_url ^ "/" ^ (Lazy.force Http_getter_env.xml_index)), + http_get (server_url ^ "/" ^ (Lazy.force Http_getter_env.rdf_index)), + http_get (server_url ^ "/" ^ (Lazy.force Http_getter_env.xsl_index))) + in + if (xml_index = None && rdf_index = None && xsl_index = None) then + debug_print (sprintf "Warning: useless server %s" server_url); + (match xml_index with + | Some xml_index -> + (log := `T "Updating XML db ...
" :: !log; + List.iter + (function + | l when is_blank_line l -> () (* skip blank and commented lines *) + | l -> + (try + (match Pcre.split ~rex:index_line_sep_RE l with + | [uri; "gz"] -> + assert (is_cic_uri uri || is_nuprl_uri uri) ; + (map_of_uri uri)#replace + uri ((xml_url_of_uri uri) ^ ".xml.gz") + | [uri] -> + assert (is_cic_uri uri || is_nuprl_uri uri) ; + (map_of_uri uri)#replace + uri ((xml_url_of_uri uri) ^ ".xml") + | _ -> log := `T ("Ignoring invalid line: '" ^ l) :: !log) + with Invalid_URI uri -> + log := `T ("Ignoring invalid XML URI: '" ^ l) :: !log)) + (Pcre.split ~rex:index_sep_RE xml_index); (* xml_index lines *) + log := `T "All done" :: !log) + | None -> ()); + (match rdf_index with + | Some rdf_index -> + (log := `T "Updating RDF db ..." :: !log; + List.iter + (fun l -> + try + (match Pcre.split ~rex:index_line_sep_RE l with + | [uri; "gz"] -> + (Lazy.force rdf_map) # replace uri + ((rdf_url_of_uri uri) ^ ".xml.gz") + | [uri] -> + (Lazy.force rdf_map) # replace uri + ((rdf_url_of_uri uri) ^ ".xml") + | _ -> log := `T ("Ignoring invalid line: '" ^ l) :: !log) + with Invalid_URI uri -> + log := `T ("Ignoring invalid RDF URI: '" ^ l) :: !log) + (Pcre.split ~rex:index_sep_RE rdf_index); (* rdf_index lines *) + log := `T "All done" :: !log) + | None -> ()); + (match xsl_index with + | Some xsl_index -> + (log := `T "Updating XSLT db ..." :: !log; + List.iter + (fun l -> (Lazy.force xsl_map) # replace l (server_url ^ "/" ^ l)) + (Pcre.split ~rex:index_sep_RE xsl_index); + log := `T "All done" :: !log) + | None -> ()); + debug_print "done with this server"; + !log + +let update_from_all_servers () = (* use global maps *) + clear_maps (); + let log = + List.fold_left + update_from_server + [] (* initial logmsg: empty *) + (* reverse order: 1st server is the most important one *) + (List.map snd (List.rev (Lazy.force Http_getter_env.servers))) + in + sync_maps (); + `Msg (`L (List.rev log)) + +let update_from_one_server server_url = + let log = update_from_server [] server_url in + `Msg (`L (List.rev log)) + +let temp_file_of_uri uri = + let flat_string s s' c = + let cs = String.copy s in + for i = 0 to (String.length s) - 1 do + if String.contains s' s.[i] then cs.[i] <- c + done; + cs + in + let user = try Unix.getlogin () with _ -> "" in + Filename.open_temp_file (user ^ flat_string uri ".-=:;!?/&" '_') "" + + (* should we use a remote getter or not *) +let remote () = + try + Helm_registry.get "getter.mode" = "remote" + with Helm_registry.Key_not_found _ -> false +let getter_url () = Helm_registry.get "getter.url" + +(* Remote interface: getter methods implemented using a remote getter *) + + (* *) +let getxslt_remote ~patch_dtd uri = not_implemented "getxslt_remote" +let getdtd_remote ~patch_dtd uri = not_implemented "getdtd_remote" +let clean_cache_remote () = not_implemented "clean_cache_remote" +let list_servers_remote () = not_implemented "list_servers_remote" +let add_server_remote ~position name = not_implemented "add_server_remote" +let remove_server_remote position = not_implemented "remove_server_remote" +let getalluris_remote () = not_implemented "getalluris_remote" +let getallrdfuris_remote () = not_implemented "getallrdfuris_remote" +let ls_remote lsuri = not_implemented "ls_remote" + (* *) + +let resolve_remote uri = + (* deliver resolve request to http_getter *) + let doc = ClientHTTP.get (sprintf "%sresolve?uri=%s" (getter_url ()) uri) in + let res = ref Unknown in + Pxp_yacc.process_entity Pxp_yacc.default_config (`Entry_content []) + (Pxp_yacc.create_entity_manager ~is_document:true Pxp_yacc.default_config + (Pxp_yacc.from_string doc)) + (function + | Pxp_yacc.E_start_tag ("url",["value",url],_) -> res := Resolved url + | Pxp_yacc.E_start_tag ("unresolved",[],_) -> + res := Exception (Unresolvable_URI uri) + | Pxp_yacc.E_start_tag _ -> res := Exception UnexpectedGetterOutput + | _ -> ()); + match !res with + | Unknown -> raise UnexpectedGetterOutput + | Exception e -> raise e + | Resolved url -> url + +let register_remote ~uri ~url = + ClientHTTP.send (sprintf "%sregister?uri=%s&url=%s" (getter_url ()) uri url) + +let register_remote ~uri ~url = + ClientHTTP.send (sprintf "%sregister?uri=%s&url=%s" (getter_url ()) uri url) + +let update_remote () = + let answer = ClientHTTP.get (getter_url () ^ "update") in + `Msg (`T answer) + +let getxml_remote ~format ~patch_dtd uri = + ClientHTTP.get_and_save_to_tmp + (sprintf "%sgetxml?uri=%s&format=%s&patch_dtd=%s" + (getter_url ()) uri + (match format with Enc_normal -> "normal" | Enc_gzipped -> "gzipped") + (match patch_dtd with true -> "yes" | false -> "no")) + +(* API *) + +let help () = Http_getter_const.usage_string (Http_getter_env.env_to_string ()) + +let resolve uri = + if remote () then + resolve_remote uri + else + try + (map_of_uri uri)#resolve uri + with Http_getter_map.Key_not_found _ -> raise (Unresolvable_URI uri) + + (* Warning: this fail if uri is already registered *) +let register ~uri ~url = + if remote () then + register_remote ~uri ~url + else + (map_of_uri uri)#add uri url + +let update () = + if remote () then + update_remote () + else + update_from_all_servers () + +let getxml ?(format = Enc_normal) ?(patch_dtd = true) uri = + if remote () then + getxml_remote ~format ~patch_dtd uri + else begin + let url = resolve uri in + let (fname, outchan) = temp_file_of_uri uri in + Http_getter_cache.respond_xml ~uri ~url ~enc:format ~patch:patch_dtd + outchan; + close_out outchan; + fname + end + +let getxslt ?(patch_dtd = true) uri = + if remote () then + getxslt_remote ~patch_dtd uri + else begin + let url = resolve uri in + let (fname, outchan) = temp_file_of_uri uri in + Http_getter_cache.respond_xsl ~url ~patch:patch_dtd outchan; + close_out outchan; + fname + end + +let getdtd ?(patch_dtd = true) uri = + if remote () then + getdtd_remote ~patch_dtd uri + else begin + let url = Lazy.force Http_getter_env.dtd_dir ^ "/" ^ uri in + let (fname, outchan) = temp_file_of_uri uri in + Http_getter_cache.respond_dtd ~url ~patch:patch_dtd outchan; + close_out outchan; + fname + end + +let clean_cache () = + if remote () then + clean_cache_remote () + else + Http_getter_cache.clean () + +let list_servers () = + if remote () then + list_servers_remote () + else + Lazy.force Http_getter_env.servers + +let add_server ?(position = 0) name = + if remote () then + add_server_remote ~position name + else begin + if position = 0 then begin + Http_getter_env.add_server ~position:0 name; + update_from_one_server name (* quick update (new server only) *) + end else if position > 0 then begin + Http_getter_env.add_server ~position name; + update () + end else (* already checked bt parse_position *) + assert false + end + +let remove_server position = + if remote () then + remove_server_remote () + else begin + let server_name = + try + List.assoc position (Lazy.force Http_getter_env.servers) + with Not_found -> + raise (Invalid_argument (sprintf "no server with position %d" position)) + in + Http_getter_env.remove_server position; + update () + end + +let return_uris map filter = + let uris = ref [] in + map#iter (fun uri _ -> if filter uri then uris := uri :: !uris); + List.rev !uris + +let getalluris () = + if remote () then + getalluris_remote () + else + let filter uri = + (Pcre.pmatch ~rex:heading_cic_RE uri) && + not (Pcre.pmatch ~rex:trailing_types_RE uri) + in + return_uris (Lazy.force cic_map) filter + +let getallrdfuris classs = + if remote () then + getallrdfuris_remote () + else + let filter = + let base = "^helm:rdf:www\\.cs\\.unibo\\.it/helm/rdf/" in + match classs with + | `Forward -> (fun uri -> Pcre.pmatch ~pat:(base ^ "forward") uri) + | `Backward -> (fun uri -> Pcre.pmatch ~pat:(base ^ "backward") uri) + in + return_uris (Lazy.force rdf_map) filter + +let ls = + let (++) (oldann, oldtypes, oldbody, oldtree) + (newann, newtypes, newbody, newtree) = + ((if newann > oldann then newann else oldann), + (if newtypes > oldtypes then newtypes else oldtypes), + (if newbody > oldbody then newbody else oldbody), + (if newtree > oldtree then newtree else oldtree)) + in + let basepart_RE = + Pcre.regexp + "^([^.]*\\.[^.]*)((\\.body)|(\\.proof_tree)|(\\.types))?(\\.ann)?$" + in + let (types_RE, types_ann_RE, body_RE, body_ann_RE, + proof_tree_RE, proof_tree_ann_RE) = + (Pcre.regexp "\\.types$", Pcre.regexp "\\.types\\.ann$", + Pcre.regexp "\\.body$", Pcre.regexp "\\.body\\.ann$", + Pcre.regexp "\\.proof_tree$", Pcre.regexp "\\.proof_tree\\.ann$") + in + let (slash_RE, til_slash_RE, no_slashes_RE) = + (Pcre.regexp "/", Pcre.regexp "^.*/", Pcre.regexp "^[^/]*$") + in + fun lsuri -> + if remote () then + ls_remote lsuri + else begin + let pat = + "^" ^ + (match lsuri with Cic p -> ("cic:" ^ p) | Theory p -> ("theory:" ^ p)) + in + let (dir_RE, obj_RE) = + (Pcre.regexp (pat ^ "/"), Pcre.regexp (pat ^ "(\\.|$)")) + in + let dirs = ref StringSet.empty in + let objs = Hashtbl.create 17 in + let store_dir d = + dirs := StringSet.add (List.hd (Pcre.split ~rex:slash_RE d)) !dirs + in + let store_obj o = + let basepart = Pcre.replace ~rex:basepart_RE ~templ:"$1" o in + let no_flags = false, No, No, No in + let oldflags = + try + Hashtbl.find objs basepart + with Not_found -> (* no ann, no types, no body, no proof tree *) + no_flags + in + let newflags = + match o with + | s when Pcre.pmatch ~rex:types_RE s -> (false, Yes, No, No) + | s when Pcre.pmatch ~rex:types_ann_RE s -> (true, Ann, No, No) + | s when Pcre.pmatch ~rex:body_RE s -> (false, No, Yes, No) + | s when Pcre.pmatch ~rex:body_ann_RE s -> (true, No, Ann, No) + | s when Pcre.pmatch ~rex:proof_tree_RE s -> (false, No, No, Yes) + | s when Pcre.pmatch ~rex:proof_tree_ann_RE s -> (true, No, No, Ann) + | s -> no_flags + in + Hashtbl.replace objs basepart (oldflags ++ newflags) + in + (Lazy.force cic_map) # iter + (* BLEARGH Dbm module lacks support for fold-like functions *) + (fun key _ -> + match key with + | uri when Pcre.pmatch ~rex:dir_RE uri -> (* directory hit *) + let localpart = Pcre.replace ~rex:dir_RE uri in + if Pcre.pmatch ~rex:no_slashes_RE localpart then + store_obj localpart + else + store_dir localpart + | uri when Pcre.pmatch ~rex:obj_RE uri -> (* file hit *) + store_obj (Pcre.replace ~rex:til_slash_RE uri) + | uri -> () (* miss *)); + let ls_items = ref [] in + StringSet.iter (fun dir -> ls_items := Ls_section dir :: !ls_items) !dirs; + Http_getter_misc.hashtbl_sorted_iter + (fun uri (annflag, typesflag, bodyflag, treeflag) -> + ls_items := + Ls_object { + uri = uri; ann = annflag; + types = typesflag; body = typesflag; proof_tree = treeflag + } :: !ls_items) + objs; + List.rev !ls_items + end + + (* Shorthands from now on *) + +let getxml' uri = getxml (UriManager.string_of_uri uri) +let resolve' uri = resolve (UriManager.string_of_uri uri) +let register' uri url = register ~uri:(UriManager.string_of_uri uri) ~url + diff --git a/helm/ocaml/getter/http_getter.mli b/helm/ocaml/getter/http_getter.mli new file mode 100644 index 000000000..e6a41117f --- /dev/null +++ b/helm/ocaml/getter/http_getter.mli @@ -0,0 +1,58 @@ +(* + * Copyright (C) 2003-2004: + * Stefano Zacchiroli + * for the HELM Team http://helm.cs.unibo.it/ + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +open Http_getter_types + + (** {2 Getter Web Service interface as API *) + +val help: unit -> string +val resolve: string -> string (* uri -> url *) +val register: uri:string -> url:string -> unit +val update: unit -> HelmLogger.html_msg +val getxml : ?format:encoding -> ?patch_dtd:bool -> string -> string +val getxslt : ?patch_dtd:bool -> string -> string +val getdtd : ?patch_dtd:bool -> string -> string +val clean_cache: unit -> unit +val list_servers: unit -> (int * string) list +val add_server: ?position:int -> string -> HelmLogger.html_msg +val remove_server: int -> HelmLogger.html_msg +val getalluris: unit -> string list +val getallrdfuris: [ `Forward | `Backward ] -> string list +val ls: xml_uri -> ls_item list + + (** {2 Shorthands} *) + +val getxml' : UriManager.uri -> string +val resolve' : UriManager.uri -> string +val register' : UriManager.uri -> string -> unit + + (** {2 Misc} *) + +val close_maps: unit -> unit +val update_from_one_server: string -> HelmLogger.html_msg + diff --git a/helm/ocaml/getter/http_getter_cache.ml b/helm/ocaml/getter/http_getter_cache.ml new file mode 100644 index 000000000..a3f91220e --- /dev/null +++ b/helm/ocaml/getter/http_getter_cache.ml @@ -0,0 +1,211 @@ +(* + * Copyright (C) 2003-2004: + * Stefano Zacchiroli + * for the HELM Team http://helm.cs.unibo.it/ + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +(* TODO cache expires control!!! *) +(* TODO uwobo loop: + if two large proof (already in cache) are requested at the same time by two + clients, uwobo (java implementation, not yet tested with the OCaml one) + starts looping sending output to one of the client *) + +open Http_getter_common;; +open Http_getter_debugger;; +open Http_getter_misc;; +open Http_getter_types;; +open Printf;; + + (* expose ThreadSafe.threadSafe methods *) +class threadSafe = + object + inherit ThreadSafe.threadSafe + method virtual doCritical : 'a. 'a lazy_t -> 'a + method virtual doReader : 'a. 'a lazy_t -> 'a + method virtual doWriter : 'a. 'a lazy_t -> 'a + end +;; +let threadSafe = new threadSafe ;; + +let resource_type_of_url = function + | url when Pcre.pmatch ~pat:"\\.xml\\.gz$" url -> Enc_gzipped + | url when Pcre.pmatch ~pat:"\\.xml$" url -> Enc_normal + | url -> raise (Invalid_URL url) +let extension_of_resource_type = function + | Enc_normal -> "xml" + | Enc_gzipped -> "xml.gz" + + (* basename = resource name without trailing ".gz", if any *) +let is_in_cache basename = + Sys.file_exists + (match Lazy.force Http_getter_env.cache_mode with + | Enc_normal -> basename + | Enc_gzipped -> basename ^ ".gz") + +let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan = + let resource_type = resource_type_of_url url in + let extension = extension_of_resource_type resource_type in + let downloadname = + match uri_of_string uri with (* parse uri *) + | Cic_uri (Cic baseuri) | Cic_uri (Theory baseuri) -> + (* assumption: baseuri starts with "/" *) + sprintf "%s%s.%s" (Lazy.force Http_getter_env.cic_dir) baseuri extension + | Nuprl_uri baseuri -> + (* assumption: baseuri starts with "/" *) + sprintf "%s%s.%s" (Lazy.force Http_getter_env.nuprl_dir) baseuri extension + | Rdf_uri (prefix, ((Cic baseuri) as qbaseuri)) + | Rdf_uri (prefix, ((Theory baseuri) as qbaseuri)) -> + let escaped_prefix = + (Pcre.replace ~pat:"/" ~templ:"_" + (Pcre.replace ~pat:"_" ~templ:"__" + (prefix ^ + (match qbaseuri with + | Cic _ -> "//cic:" + | Theory _ -> "//theory:")))) + in + sprintf "%s/%s%s.%s" + (Lazy.force Http_getter_env.rdf_dir) escaped_prefix baseuri extension + in + let patch_fun = + if patch then Http_getter_common.patch_xml else (fun x -> x) + in + let basename = Pcre.replace ~pat:"\\.gz$" downloadname in + let contype = "text/xml" in + (* Fill cache if needed and return a short circuit file. + Short circuit is needed in situations like: + resource type = normal, cache type = gzipped, required encoding = normal + we would like to avoid normal -> gzipped -> normal conversions. To avoid + this tmp_short_circuit is used to remember the name of the intermediate + file name *) + let fill_cache () = + threadSafe#doWriter (lazy( + if not (is_in_cache basename) then begin (* cache MISS *) + debug_print "Cache MISS :-("; + mkdir ~parents:true (Filename.dirname downloadname); + match (resource_type, Lazy.force Http_getter_env.cache_mode) with + | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped -> + wget ~output:downloadname url; + None + | Enc_normal, Enc_gzipped -> (* resource normal, cache gzipped *) + let tmp = tempfile () in + wget ~output:tmp url; + gzip ~output:(basename ^ ".gz") ~keep:true tmp; (* fill cache *) + if enc = Enc_normal then (* user wants normal: don't delete it! *) + Some (tmp, enc) + else begin + Sys.remove tmp; + None + end + | Enc_gzipped, Enc_normal -> (* resource gzipped, cache normal *) + let tmp = tempfile () in + wget ~output:tmp url; + gunzip ~output:basename ~keep:true tmp; (* fill cache *) + if enc = Enc_gzipped then (* user wants gzipped: don't delete it! *) + Some (tmp, enc) + else begin + Sys.remove tmp; + None + end + end else begin + debug_print "Cache HIT :-)"; + None + end + )) in + let tmp_short_circuit = fill_cache () in + threadSafe#doReader (lazy( + assert (is_in_cache basename); + match (enc, Lazy.force Http_getter_env.cache_mode) with + | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped -> + (* resource in cache is already in the required format *) + (match enc with + | Enc_normal -> + debug_print "No format mangling required (encoding = normal)"; + return_file ~fname:basename ~contype ~patch_fun outchan + | Enc_gzipped -> + debug_print "No format mangling required (encoding = gzipped)"; + return_file + ~fname:(basename ^ ".gz") ~contype ~contenc:"x-gzip" + ~patch_fun ~gunzip:true + outchan) + | Enc_normal, Enc_gzipped | Enc_gzipped, Enc_normal -> + (match tmp_short_circuit with + | None -> (* no short circuit possible, use cache *) + debug_print "No short circuit available, use cache"; + let tmp = tempfile () in + (match enc with + | Enc_normal -> + (* required format is normal, cached version is gzipped *) + gunzip (* gunzip to tmp *) + ~output:tmp ~keep:true (basename ^ ".gz"); + return_file ~fname:tmp ~contype ~patch_fun outchan; + | Enc_gzipped -> + (* required format is gzipped, cached version is normal *) + gzip ~output:tmp ~keep:true basename; (* gzip to tmp *) + return_file + ~fname:tmp ~contype ~contenc:"x-gzip" + ~patch_fun ~gunzip:true + outchan); + Sys.remove tmp + | Some (fname, Enc_normal) -> (* short circuit available, use it! *) + debug_print "Using short circuit (encoding = normal)"; + return_file ~fname ~contype ~patch_fun outchan; + Sys.remove fname + | Some (fname, Enc_gzipped) -> (* short circuit available, use it! *) + debug_print "Using short circuit (encoding = gzipped)"; + return_file + ~fname ~contype ~contenc:"x-gzip" ~patch_fun ~gunzip:true outchan; + Sys.remove fname) + )) +;; + + (* TODO enc is not yet supported *) +let respond_xsl ?(enc = Enc_normal) ?(patch = true) ~url outchan = + let patch_fun = + if patch then Http_getter_common.patch_xsl else (fun x -> x) + in + let fname = tempfile () in + wget ~output:fname url; + return_file ~fname ~contype:"text/xml" ~patch_fun outchan; + Sys.remove fname +;; + + (* TODO enc is not yet supported *) +let respond_dtd ?(enc = Enc_normal) ?(patch = true) ~url outchan = + let patch_fun = + if patch then Http_getter_common.patch_dtd else (fun x -> x) + in + if Sys.file_exists url then + (* TODO check this: old getter here used text/xml *) + return_file ~fname:url ~contype:"text/plain" ~patch_fun outchan + else + return_html_error ("Can't find DTD: " ^ url) outchan +;; + +let clean () = + let module E = Http_getter_env in + List.iter + (function dir -> ignore (Unix.system ("rm -rf " ^ dir ^ "/*"))) + [ Lazy.force E.cic_dir; Lazy.force E.nuprl_dir; Lazy.force E.rdf_dir ] +;; diff --git a/helm/ocaml/getter/http_getter_cache.mli b/helm/ocaml/getter/http_getter_cache.mli new file mode 100644 index 000000000..11211288a --- /dev/null +++ b/helm/ocaml/getter/http_getter_cache.mli @@ -0,0 +1,44 @@ +(* + * Copyright (C) 2003-2004: + * Stefano Zacchiroli + * for the HELM Team http://helm.cs.unibo.it/ + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +open Http_getter_types;; + +val respond_xml: + ?enc:encoding -> ?patch:bool -> url:string -> uri:string -> out_channel -> + unit + +val respond_xsl: + ?enc:encoding -> ?patch:bool -> url:string -> out_channel -> + unit + +val respond_dtd: + ?enc:encoding -> ?patch:bool -> url:string -> out_channel -> + unit + +val clean: unit -> unit + diff --git a/helm/ocaml/getter/http_getter_common.ml b/helm/ocaml/getter/http_getter_common.ml new file mode 100644 index 000000000..6ecc75f81 --- /dev/null +++ b/helm/ocaml/getter/http_getter_common.ml @@ -0,0 +1,138 @@ +(* + * Copyright (C) 2003-2004: + * Stefano Zacchiroli + * for the HELM Team http://helm.cs.unibo.it/ + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +open Http_getter_types;; +open Printf;; + +let string_of_ls_flag = function No -> "NO" | Yes -> "YES" | Ann -> "ANN" +let string_of_encoding = function + | Enc_normal -> "Normal" + | Enc_gzipped -> "GZipped" + +let is_cic_obj_uri uri = Pcre.pmatch ~pat:"^cic:" uri +let is_theory_uri uri = Pcre.pmatch ~pat:"^theory:" uri +let is_cic_uri uri = is_cic_obj_uri uri || is_theory_uri uri +let is_nuprl_uri uri = Pcre.pmatch ~pat:"^nuprl:" uri +let is_rdf_uri uri = Pcre.pmatch ~pat:"^helm:rdf(.*):(.*)//(.*)" uri +let is_xsl_uri uri = Pcre.pmatch ~pat:"^\\w+\\.xsl" uri + +let rec uri_of_string = function + | uri when is_rdf_uri uri -> + (match Pcre.split ~pat:"//" uri with + | [ prefix; uri ] -> + let rest = + match uri_of_string uri with + | Cic_uri xmluri -> xmluri + | _ -> raise (Invalid_URI uri) + in + Rdf_uri (prefix, rest) + | _ -> raise (Invalid_URI uri)) + | uri when is_cic_uri uri -> Cic_uri (Cic (Pcre.replace ~pat:"^cic:" uri)) + | uri when is_nuprl_uri uri -> Nuprl_uri (Pcre.replace ~pat:"^nuprl:" uri) + | uri when is_theory_uri uri -> + Cic_uri (Theory (Pcre.replace ~pat:"^theory:" uri)) + | uri -> raise (Invalid_URI uri) + +let patch_xml line = + Pcre.replace + ~pat:(sprintf "DOCTYPE (.*) SYSTEM\\s+\"%s/" + (Lazy.force Http_getter_env.dtd_base_url)) + ~templ:(sprintf "DOCTYPE $1 SYSTEM \"%s/getdtd?uri=" + (Lazy.force Http_getter_env.my_own_url)) + line +let patch_xsl line = + let mk_patch_fun tag line = + Pcre.replace + ~pat:(sprintf "%s\\s+href=\"" tag) + ~templ:(sprintf "%s href=\"%s/getxslt?uri=" + tag (Lazy.force Http_getter_env.my_own_url)) + line + in + let (patch_import, patch_include) = + (mk_patch_fun "xsl:import", mk_patch_fun "xsl:include") + in + patch_include (patch_import line) +let patch_dtd line = + Pcre.replace + ~pat:"ENTITY (.*) SYSTEM\\s+\"" + ~templ:(sprintf "ENTITY $1 SYSTEM \"%s/getdtd?uri=" + (Lazy.force Http_getter_env.my_own_url)) + line + +let pp_error s = + sprintf "Http Getter error: %s" s +let pp_internal_error s = + sprintf "Http Getter Internal error: %s" s +let pp_msg s = sprintf "%s" s +let null_pp s = s + +let mk_return_fun pp_fun contype msg outchan = + Http_daemon.respond + ~body:(pp_fun msg) ~headers:["Content-Type", contype] outchan + +let return_html_error = mk_return_fun pp_error "text/html" +let return_html_internal_error = mk_return_fun pp_internal_error "text/html" +let return_html_msg = mk_return_fun pp_msg "text/html" +let return_html_raw = mk_return_fun null_pp "text/html" +let return_xml_raw = mk_return_fun null_pp "text/xml" +let return_file + ~fname ?contype ?contenc ?(patch_fun = fun x -> x) ?(gunzip = false) outchan + = + let headers = + match (contype, contenc) with + | (Some t, Some e) -> ["Content-Encoding", e; "Content-Type", t] + | (Some t, None) -> ["Content-Type" , t] + | (None, Some e) -> ["Content-Encoding", e] + | (None, None) -> [] + in + Http_daemon.send_basic_headers ~code:200 outchan; + Http_daemon.send_headers headers outchan; + Http_daemon.send_CRLF outchan; + if gunzip then begin (* gunzip needed, uncompress file, apply patch_fun to + it, compress the result and sent it to client *) + let (tmp1, tmp2) = + (Http_getter_misc.tempfile (), Http_getter_misc.tempfile ()) + in + Http_getter_misc.gunzip ~keep:true ~output:tmp1 fname; (* gunzip to tmp1 *) + let new_file = open_out tmp2 in + Http_getter_misc.iter_file (* tmp2 = patch(tmp1) *) + (fun line -> output_string new_file (patch_fun line ^ "\n")) + tmp1; + close_out new_file; + Http_getter_misc.gzip ~output:tmp1 tmp2; (* tmp1 = gzip(tmp2); rm tmp2 *) + Http_getter_misc.iter_file (* send tmp1 to client as is*) + (fun line -> output_string outchan (line ^ "\n")) + tmp1; + Sys.remove tmp1 (* rm tmp1 *) + end else (* no need to gunzip, apply patch_fun directly to file *) + Http_getter_misc.iter_file + (fun line -> output_string outchan (patch_fun line ^ "\n")) + fname +;; +let return_400 body outchan = Http_daemon.respond_error ~code:400 ~body outchan + diff --git a/helm/ocaml/getter/http_getter_common.mli b/helm/ocaml/getter/http_getter_common.mli new file mode 100644 index 000000000..f4ecb3dc8 --- /dev/null +++ b/helm/ocaml/getter/http_getter_common.mli @@ -0,0 +1,73 @@ +(* + * Copyright (C) 2003-2004: + * Stefano Zacchiroli + * for the HELM Team http://helm.cs.unibo.it/ + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +open Http_getter_types;; + +val string_of_ls_flag: ls_flag -> string +val string_of_encoding: encoding -> string + +val is_cic_uri: string -> bool +val is_nuprl_uri: string -> bool +val is_rdf_uri: string -> bool +val is_xsl_uri: string -> bool + +val uri_of_string: string -> uri + +val patch_xml : string -> string +val patch_xsl : string -> string +val patch_dtd : string -> string + +val return_html_error: string -> out_channel -> unit +val return_html_internal_error: string -> out_channel -> unit + (** return an HTML HTTP response from the given string, embedding it in an + "H1" element of an HTML page; content-type is set to text/html *) +val return_html_msg: string -> out_channel -> unit + (** return an HTTP response using given string as content; content-type is set + to text/html *) +val return_html_raw: string -> out_channel -> unit + (** return an HTTP response using given string as content; content-type is set + to text/xml *) +val return_xml_raw: string -> out_channel -> unit + (** return a bad request http response *) +val return_400: string -> out_channel -> unit + (** + @param fname name of the file to be sent + @param contype Content-Type header value + @param contenc Content-Enconding header value + @param patch_fun function used to patch file contents + @param gunzip is meaningful only if a patch function is provided. If gunzip + is true patch_fun is applied to the uncompressed version of the file. The file + is then compressed again and send to client + @param outchan output channel over which sent file fname *) +val return_file: + fname:string -> + ?contype:string -> ?contenc:string -> + ?patch_fun:(string -> string) -> ?gunzip:bool -> + out_channel -> + unit + diff --git a/helm/ocaml/getter/http_getter_const.ml b/helm/ocaml/getter/http_getter_const.ml new file mode 100644 index 000000000..a4eac83e5 --- /dev/null +++ b/helm/ocaml/getter/http_getter_const.ml @@ -0,0 +1,111 @@ +(* + * Copyright (C) 2003-2004: + * Stefano Zacchiroli + * for the HELM Team http://helm.cs.unibo.it/ + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +open Printf;; + +let version = "0.3.0" +let conffile = "http_getter.conf.xml" + + (* TODO provide a better usage string *) +let usage_string configuration = + sprintf +" + + + HTTP Getter's help message + + +

HTTP Getter, version %s

+

Usage information

+

+ Usage: http://hostname:getterport/command +

+

+ Available commands: +

+

+ help
+ display this help message +

+

+ getxml?uri=URI[&format=(normal|gz)][&patch_dtd=(yes|no)]
+

+

+ register?uri=URI&url=URL
+

+

+ resolve?uri=URI
+

+

+ getdtd?uri=URI[&patch_dtd=(yes|no)]
+

+

+ getxslt?uri=URI[&patch_dtd=(yes|no)]
+

+

+ list_servers
+

+

+ add_server?url=URL&position=POSITION
+

+

+ remove_server?position=POSITION
+

+

+ update
+

+

+ clean_cache
+

+

+ getalluris
+

+

+ getallrdfuris
+

+

+ ls?baseuri=URI&format=(txt|xml)
+

+

+ getempty
+

+

Current configuration

+
%s
+ + +" + version configuration + +let empty_xml = +" + +]> + +" + diff --git a/helm/ocaml/getter/http_getter_const.mli b/helm/ocaml/getter/http_getter_const.mli new file mode 100644 index 000000000..894ccd645 --- /dev/null +++ b/helm/ocaml/getter/http_getter_const.mli @@ -0,0 +1,36 @@ +(* + * Copyright (C) 2003-2004: + * Stefano Zacchiroli + * for the HELM Team http://helm.cs.unibo.it/ + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +val version: string +val conffile: string +val empty_xml: string + + (** @return an HTML usage string including configuration information passed as + input parameter *) +val usage_string: string -> string + diff --git a/helm/ocaml/getter/http_getter_debugger.ml b/helm/ocaml/getter/http_getter_debugger.ml new file mode 100644 index 000000000..3f9afd78c --- /dev/null +++ b/helm/ocaml/getter/http_getter_debugger.ml @@ -0,0 +1,58 @@ +(* + * Copyright (C) 2003-2004: + * Stefano Zacchiroli + * for the HELM Team http://helm.cs.unibo.it/ + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +let debug = ref true + +(* invariant: if logfile is set, then logchan is set too *) +let logfile = ref None +let logchan = ref None + +let set_logfile f = + (match !logchan with None -> () | Some oc -> close_out oc); + match f with + | Some f -> + logfile := Some f; + logchan := Some (open_out f) + | None -> + logfile := None; + logchan := None + +let get_logfile () = !logfile + +let close_logfile () = set_logfile None + +let debug_print s = + let msg = "[HTTP-Getter] " ^ s in + if !debug then + match (!logfile, !logchan) with + | None, _ -> prerr_endline msg + | Some fname, Some oc -> + output_string oc msg; + flush oc + | Some _, None -> assert false + diff --git a/helm/ocaml/getter/http_getter_debugger.mli b/helm/ocaml/getter/http_getter_debugger.mli new file mode 100644 index 000000000..461e2a1a7 --- /dev/null +++ b/helm/ocaml/getter/http_getter_debugger.mli @@ -0,0 +1,40 @@ +(* + * Copyright (C) 2003-2004: + * Stefano Zacchiroli + * for the HELM Team http://helm.cs.unibo.it/ + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + + (** enable/disable debugging messages *) +val debug: bool ref + + (** output a debugging message *) +val debug_print: string -> unit + + (** if set to Some fname, fname will be used as a logfile, otherwise stderr + * will be used *) +val get_logfile: unit -> string option +val set_logfile: string option -> unit +val close_logfile: unit -> unit + diff --git a/helm/ocaml/getter/http_getter_env.ml b/helm/ocaml/getter/http_getter_env.ml new file mode 100644 index 000000000..39e83a965 --- /dev/null +++ b/helm/ocaml/getter/http_getter_env.ml @@ -0,0 +1,159 @@ +(* + * Copyright (C) 2003-2004: + * Stefano Zacchiroli + * for the HELM Team http://helm.cs.unibo.it/ + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +open Printf +open Pxp_document +open Pxp_types +open Pxp_yacc + +open Http_getter_types + +let version = Http_getter_const.version + +let servers_file = lazy (Helm_registry.get "getter.servers_file") +let cic_dbm = lazy (Helm_registry.get "getter.cic_dbm") +let nuprl_dbm = lazy (Helm_registry.get "getter.nuprl_dbm") +let rdf_dbm = lazy (Helm_registry.get "getter.rdf_dbm") +let xsl_dbm = lazy (Helm_registry.get "getter.xsl_dbm") +let xml_index = lazy (Helm_registry.get "getter.xml_indexname") +let rdf_index = lazy (Helm_registry.get "getter.rdf_indexname") +let xsl_index = lazy (Helm_registry.get "getter.xsl_indexname") +let cic_dir = lazy (Helm_registry.get "getter.cic_dir") +let nuprl_dir = lazy (Helm_registry.get "getter.nuprl_dir") +let rdf_dir = lazy (Helm_registry.get "getter.rdf_dir") +let dtd_dir = lazy (Helm_registry.get "getter.dtd_dir") +let dtd_base_url = lazy (Helm_registry.get "getter.dtd_base_url") +let port = lazy (Helm_registry.get_int "getter.port") + +let _servers = ref None + +let servers = + lazy + (match !_servers with + | None -> failwith "Getter not yet initialized: servers not available" + | Some servers -> servers) + +let load_servers () = + let pos = ref (-1) in + List.rev (Http_getter_misc.fold_file + (fun line servers -> + if Http_getter_misc.is_blank_line line then + servers + else + (incr pos; (!pos, line) :: servers)) + [] + (Lazy.force servers_file)) + +let reload_servers () = _servers := Some (load_servers ()) + +let save_servers () = + let oc = open_out (Lazy.force servers_file) in + List.iter (fun (_,server) -> output_string oc (server ^ "\n")) + (Lazy.force servers); + close_out oc + +let host = + lazy + (let buf = Buffer.create 20 in + Shell.call ~stdout:(Shell.to_buffer buf) [Shell.cmd "hostname" ["-f"]]; + Pcre.replace ~pat:"\n+$" (Buffer.contents buf)) + +let my_own_url = + lazy + (let (host, port) = (Lazy.force host, Lazy.force port) in + sprintf "http://%s%s" (* without trailing '/' *) + host (if port = 80 then "" else (sprintf ":%d" port))) + +let cache_mode = + lazy + (match String.lowercase (Helm_registry.get "getter.cache_mode") with + | "normal" -> Enc_normal + | "gz" -> Enc_gzipped + | mode -> failwith ("Invalid cache mode: " ^ mode)) + +let reload () = reload_servers () + +let env_to_string () = + sprintf +"HTTP Getter %s (the OCaml one!) + +cic_dbm:\t%s +nuprl_dbm:\t%s +rdf_dbm:\t%s +xsl_dbm:\t%s +xml_index:\t%s +rdf_index:\t%s +xsl_index:\t%s +cic_dir:\t%s +nuprl_dir:\t%s +rdf_dir:\t%s +dtd_dir:\t%s +servers_file:\t%s +host:\t\t%s +port:\t\t%d +my_own_url:\t%s +dtd_base_url:\t%s +cache_mode:\t%s +servers: +\t%s +" + version (Lazy.force cic_dbm) (Lazy.force nuprl_dbm) (Lazy.force rdf_dbm) + (Lazy.force xsl_dbm) (Lazy.force xml_index) + (Lazy.force rdf_index) (Lazy.force xsl_index) (Lazy.force cic_dir) + (Lazy.force nuprl_dir) (Lazy.force rdf_dir) + (Lazy.force dtd_dir) (Lazy.force servers_file) (Lazy.force host) + (Lazy.force port) (Lazy.force my_own_url) + (Lazy.force dtd_base_url) + (match Lazy.force cache_mode with Enc_normal -> "Normal" | Enc_gzipped -> "GZipped") + (String.concat "\n\t" (* (position * server) list *) + (List.map (fun (pos, server) -> sprintf "%3d: %s" pos server) + (Lazy.force servers))) + +let add_server ?position url = + let new_servers = + let servers = Lazy.force servers in + match position with + | None -> servers @ [-1, url]; + | Some p when p > 0 -> + let rec add_after pos = function + | [] -> [-1, url] + | hd :: tl when p = 1 -> hd :: (-1, url) :: tl + | hd :: tl (* when p > 1 *) -> hd :: (add_after (pos - 1) tl) + in + add_after p servers + | Some _ -> assert false + in + _servers := Some new_servers; + save_servers (); + reload_servers () + +let remove_server position = + _servers := Some (List.remove_assoc position (Lazy.force servers)); + save_servers (); + reload_servers () + diff --git a/helm/ocaml/getter/http_getter_env.mli b/helm/ocaml/getter/http_getter_env.mli new file mode 100644 index 000000000..cb1731792 --- /dev/null +++ b/helm/ocaml/getter/http_getter_env.mli @@ -0,0 +1,75 @@ +(* + * Copyright (C) 2003-2004: + * Stefano Zacchiroli + * for the HELM Team http://helm.cs.unibo.it/ + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +open Http_getter_types;; + + (* {2 general information} *) + +val version : string (* getter version *) + + (* {2 environment gathered data} *) + +val cic_dbm : string lazy_t (* XML map DBM file for CIC *) +val nuprl_dbm : string lazy_t (* XML map DBM file for NuPRL *) +val rdf_dbm : string lazy_t (* RDF map DBM file *) +val xsl_dbm : string lazy_t (* XSL map DBM file *) +val xml_index : string lazy_t (* XMLs' index *) +val rdf_index : string lazy_t (* RDFs' index *) +val xsl_index : string lazy_t (* XSLTs' index *) +val cic_dir : string lazy_t (* XMLs' directory for CIC*) +val nuprl_dir : string lazy_t (* XMLs' directory for NuPRL*) +val rdf_dir : string lazy_t (* RDFs' directory *) +val dtd_dir : string lazy_t (* DTDs' root directory *) +val servers_file : string lazy_t (* servers.txt file *) +val port : int lazy_t (* port on which getter listens *) +val dtd_base_url : string lazy_t (* base URL for DTD downloading *) + + (* {2 derived data} *) + +val host : string lazy_t (* host on which getter listens *) +val my_own_url : string lazy_t (* URL at which contact getter *) +val servers : (int * string) list lazy_t + (* (position * server) list *) +val cache_mode : encoding lazy_t (* cached files encoding *) + + (* {2 dynamic configuration changes} *) + + (* add a server to servers list in a given position (defaults to "after the + last server", change servers file accordingly and reload servers list *) +val add_server: ?position:int -> string -> unit + (* remove a server from servers list, change servers file accordingly and + reload servers list *) +val remove_server: int -> unit + + (* {2 misc} *) + +val reload: unit -> unit (* reload configuration information *) +val env_to_string : unit -> string (* dump a textual representation of the + current http_getter settings on an output + channel *) + diff --git a/helm/ocaml/getter/http_getter_map.ml b/helm/ocaml/getter/http_getter_map.ml new file mode 100644 index 000000000..57ec92736 --- /dev/null +++ b/helm/ocaml/getter/http_getter_map.ml @@ -0,0 +1,96 @@ +(* + * Copyright (C) 2003-2004: + * Stefano Zacchiroli + * for the HELM Team http://helm.cs.unibo.it/ + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +exception Key_already_in of string;; +exception Key_not_found of string;; + +class map dbname = + let perm = 420 in (* permission 644 in decimal notation *) + let open_dbm () = Dbm.opendbm dbname [ Dbm.Dbm_rdwr; Dbm.Dbm_create ] perm in + + object (self) + + inherit ThreadSafe.threadSafe + + val mutable db = open_dbm () + + (*initializer Gc.finalise Dbm.close db (* close db on GC *)*) + + method add key value = + self#doWriter (lazy ( + try + Dbm.add db key value + with Dbm.Dbm_error "Entry already exists" -> raise (Key_already_in key) + )) + + method replace key value = + self#doWriter (lazy ( + Dbm.replace db key value + )) + + method remove key = + self#doWriter (lazy ( + try + Dbm.remove db key + with Dbm.Dbm_error "dbm_delete" -> raise (Key_not_found key) + )) + + method resolve key = + self#doReader (lazy ( + try Dbm.find db key with Not_found -> raise (Key_not_found key) + )) + + method iter (f: string -> string -> unit) = + self#doReader (lazy ( + Dbm.iter f db + )) + + method sync = + self#doWriter (lazy ( + Dbm.close db; + db <- open_dbm () + )) + + method clear = + self#doWriter (lazy ( + Dbm.close db; + List.iter + (fun ext -> + let file = dbname ^ ext in + if Sys.file_exists file then Sys.remove file) + [".dir"; ".pag"; ".db"]; + db <- open_dbm () + )) + + method close = + self#doWriter (lazy ( + Dbm.close db + )) + + end + diff --git a/helm/ocaml/getter/http_getter_map.mli b/helm/ocaml/getter/http_getter_map.mli new file mode 100644 index 000000000..7081f1962 --- /dev/null +++ b/helm/ocaml/getter/http_getter_map.mli @@ -0,0 +1,44 @@ +(* + * Copyright (C) 2003-2004: + * Stefano Zacchiroli + * for the HELM Team http://helm.cs.unibo.it/ + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +exception Key_already_in of string +exception Key_not_found of string + +class map: + string -> + object + method add: string -> string -> unit + method replace: string -> string -> unit + method remove: string -> unit + method resolve: string -> string + method iter: (string -> string -> unit) -> unit + method sync: unit + method clear: unit + + method close: unit (* use with caution! *) + end diff --git a/helm/ocaml/getter/http_getter_misc.ml b/helm/ocaml/getter/http_getter_misc.ml new file mode 100644 index 000000000..c983c2988 --- /dev/null +++ b/helm/ocaml/getter/http_getter_misc.ml @@ -0,0 +1,250 @@ +(* + * Copyright (C) 2003-2004: + * Stefano Zacchiroli + * for the HELM Team http://helm.cs.unibo.it/ + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +open Printf + +open Http_getter_debugger + +let trailing_dot_gz_RE = Pcre.regexp "\\.gz$" (* for g{,un}zip *) +let url_RE = Pcre.regexp "^([\\w.-]+)(:(\\d+))?(/.*)?$" +let http_scheme_RE = Pcre.regexp ~flags:[`CASELESS] "^http://" +let file_scheme_RE = Pcre.regexp ~flags:[`CASELESS] "^file://" +let dir_sep_RE = Pcre.regexp "/" +let heading_slash_RE = Pcre.regexp "^/" + +let bufsiz = 16384 (* for file system I/O *) +let tcp_bufsiz = 4096 (* for TCP I/O *) + +let fold_file f init fname = + let ic = open_in fname in + let rec aux acc = + let line = try Some (input_line ic) with End_of_file -> None in + match line with + | None -> acc + | Some line -> aux (f line acc) + in + let res = try aux init with e -> close_in ic; raise e in + close_in ic; + res + +let iter_file f = fold_file (fun line _ -> f line) () + +let hashtbl_sorted_fold f tbl init = + let sorted_keys = + List.sort compare (Hashtbl.fold (fun key _ keys -> key::keys) tbl []) + in + List.fold_left (fun acc k -> f k (Hashtbl.find tbl k) acc) init sorted_keys + +let hashtbl_sorted_iter f tbl = + let sorted_keys = + List.sort compare (Hashtbl.fold (fun key _ keys -> key::keys) tbl []) + in + List.iter (fun k -> f k (Hashtbl.find tbl k)) sorted_keys + +let cp src dst = + let (ic, oc) = (open_in src, open_out dst) in + let buf = String.create bufsiz in + (try + while true do + let bytes = input ic buf 0 bufsiz in + if bytes = 0 then raise End_of_file else output oc buf 0 bytes + done + with End_of_file -> ()); + close_in ic; close_out oc + +let parse_url url = + try + let subs = + Pcre.extract ~rex:url_RE (Pcre.replace ~rex:http_scheme_RE url) + in + (subs.(1), + (if subs.(2) = "" then 80 else int_of_string subs.(3)), + (if subs.(4) = "" then "/" else subs.(4))) + with exc -> + failwith + (sprintf "Can't parse url: %s (exception: %s)" + url (Printexc.to_string exc)) +let init_socket addr port = + let inet_addr = (Unix.gethostbyname addr).Unix.h_addr_list.(0) in + let sockaddr = Unix.ADDR_INET (inet_addr, port) in + let suck = Unix.socket Unix.PF_INET Unix.SOCK_STREAM 0 in + Unix.connect suck sockaddr; + let outchan = Unix.out_channel_of_descr suck in + let inchan = Unix.in_channel_of_descr suck in + (inchan, outchan) +let http_get_iter_buf ~callback url = + let (address, port, path) = parse_url url in + let buf = String.create tcp_bufsiz in + let (inchan, outchan) = init_socket address port in + output_string outchan (sprintf "GET %s\r\n" path); + flush outchan; + (try + while true do + match input inchan buf 0 tcp_bufsiz with + | 0 -> raise End_of_file + | bytes when bytes = tcp_bufsiz -> (* buffer full, no need to slice it *) + callback buf + | bytes when bytes < tcp_bufsiz -> (* buffer not full, slice it *) + callback (String.sub buf 0 bytes) + | _ -> (* ( bytes < 0 ) || ( bytes > tcp_bufsiz ) *) + assert false + done + with End_of_file -> ()); + close_in inchan (* close also outchan, same fd *) + +let wget ?output url = + debug_print + (sprintf "wgetting %s (output: %s)" url + (match output with None -> "default" | Some f -> f)); + match url with + | url when Pcre.pmatch ~rex:file_scheme_RE url -> (* file:// *) + (let src_fname = Pcre.replace ~rex:file_scheme_RE url in + match output with + | Some dst_fname -> cp src_fname dst_fname + | None -> + let dst_fname = Filename.basename src_fname in + if src_fname <> dst_fname then + cp src_fname dst_fname + else (* src and dst are the same: do nothing *) + ()) + | url when Pcre.pmatch ~rex:http_scheme_RE url -> (* http:// *) + (let oc = + open_out (match output with Some f -> f | None -> Filename.basename url) + in + http_get_iter_buf ~callback:(fun data -> output_string oc data) url; + close_out oc) + | scheme -> (* unsupported scheme *) + failwith ("Http_getter_misc.wget: unsupported scheme: " ^ scheme) + +let gzip ?(keep = false) ?output fname = + let output = match output with None -> fname ^ ".gz" | Some fname -> fname in + debug_print (sprintf "gzipping %s (keep: %b, output: %s)" fname keep output); + let (ic, oc) = (open_in fname, Gzip.open_out output) in + let buf = String.create bufsiz in + (try + while true do + let bytes = input ic buf 0 bufsiz in + if bytes = 0 then raise End_of_file else Gzip.output oc buf 0 bytes + done + with End_of_file -> ()); + close_in ic; Gzip.close_out oc; + if not keep then Sys.remove fname +;; + +let gunzip ?(keep = false) ?output fname = + (* assumption: given file name ends with ".gz" or output is set *) + let output = + match output with + | None -> + if (Pcre.pmatch ~rex:trailing_dot_gz_RE fname) then + Pcre.replace ~rex:trailing_dot_gz_RE fname + else + failwith + "Http_getter_misc.gunzip: unable to determine output file name" + | Some fname -> fname + in + debug_print (sprintf "gunzipping %s (keep: %b, output: %s)" + fname keep output); + let (ic, oc) = (Gzip.open_in fname, open_out output) in + let buf = String.create bufsiz in + (try + while true do + let bytes = Gzip.input ic buf 0 bufsiz in + if bytes = 0 then raise End_of_file else Pervasives.output oc buf 0 bytes + done + with End_of_file -> ()); + Gzip.close_in ic; close_out oc; + if not keep then Sys.remove fname +;; + +let tempfile () = Filename.temp_file "http_getter_" "" + +exception Mkdir_failure of string * string;; (* dirname, failure reason *) +let dir_perm = 0o755 + +let mkdir ?(parents = false) dirname = + let mkdirhier () = + let (pieces, hd) = + let split = Pcre.split ~rex:dir_sep_RE dirname in + if Pcre.pmatch ~rex:heading_slash_RE dirname then + (List.tl split, "/") + else + (split, "") + in + ignore + (List.fold_left + (fun pre dir -> + let next_dir = + sprintf "%s%s%s" pre (match pre with "/" | "" -> "" | _ -> "/") dir + in + (try + (match (Unix.stat next_dir).Unix.st_kind with + | Unix.S_DIR -> () (* dir component already exists, go on! *) + | _ -> (* dir component already exists but isn't a dir, abort! *) + raise + (Mkdir_failure (dirname, + sprintf "'%s' already exists but is not a dir" next_dir))) + with Unix.Unix_error (Unix.ENOENT, "stat", _) -> + (* dir component doesn't exists, create it and go on! *) + Unix.mkdir next_dir dir_perm); + next_dir) + hd pieces) + in + if parents then mkdirhier () else Unix.mkdir dirname dir_perm + +let string_of_proc_status = function + | Unix.WEXITED code -> sprintf "[Exited: %d]" code + | Unix.WSIGNALED sg -> sprintf "[Killed: %d]" sg + | Unix.WSTOPPED sg -> sprintf "[Stopped: %d]" sg + +let http_get url = + if Pcre.pmatch ~rex:file_scheme_RE url then begin + (* file:// URL. Read data from file system *) + let fname = Pcre.replace ~rex:file_scheme_RE url in + try + let size = (Unix.stat fname).Unix.st_size in + let buf = String.create size in + let ic = open_in fname in + really_input ic buf 0 size; + close_in ic; + Some buf + with Unix.Unix_error (Unix.ENOENT, "stat", _) -> None + end else (* other URL, pass it to Http_client *) + try + Some (Http_client.http_get url) + with e -> + prerr_endline (sprintf + "Warning: Http_client failed on url %s with exception: %s" + url (Printexc.to_string e)); + None + +let is_blank_line = + let blank_line_RE = Pcre.regexp "(^#)|(^\\s*$)" in + fun line -> + Pcre.pmatch ~rex:blank_line_RE line + diff --git a/helm/ocaml/getter/http_getter_misc.mli b/helm/ocaml/getter/http_getter_misc.mli new file mode 100644 index 000000000..b328742be --- /dev/null +++ b/helm/ocaml/getter/http_getter_misc.mli @@ -0,0 +1,80 @@ +(* + * Copyright (C) 2003-2004: + * Stefano Zacchiroli + * for the HELM Team http://helm.cs.unibo.it/ + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + + (** 'mkdir' failed, arguments are: name of the directory to be created and + failure reason *) +exception Mkdir_failure of string * string + + (** "fold_left" like function on file lines, trailing newline is not passed to + the given function *) +val fold_file : (string -> 'a -> 'a) -> 'a -> string -> 'a + (* "iter" like function on file lines, trailing newline is not passed to the + given function *) +val iter_file : (string -> unit) -> string -> unit + + (** like Hashtbl.fold but keys are processed ordered *) +val hashtbl_sorted_fold : + ('a -> 'b -> 'c -> 'c) -> ('a, 'b) Hashtbl.t -> 'c -> 'c + (** like Hashtbl.iter but keys are processed ordered *) +val hashtbl_sorted_iter : ('a -> 'b -> unit) -> ('a, 'b) Hashtbl.t -> unit + + (** cp frontend *) +val cp: string -> string -> unit + (** wget frontend, if output is given it is the destination file, otherwise + standard wget rules are used. Additionally this function support also the + "file://" scheme for file system addressing *) +val wget: ?output: string -> string -> unit + (** gzip frontend. If keep = true original file will be kept, default is + false. output is the file on which gzipped data will be saved, default is + given file with an added ".gz" suffix *) +val gzip: ?keep: bool -> ?output: string -> string -> unit + (** gunzip frontend. If keep = true original file will be kept, default is + false. output is the file on which gunzipped data will be saved, default is + given file name without trailing ".gz" *) +val gunzip: ?keep: bool -> ?output: string -> string -> unit + (** tempfile frontend, return the name of created file. A special purpose + suffix is used (actually "_http_getter" *) +val tempfile: unit -> string + (** mkdir frontend, if parents = true also parent directories will be created. + If the given directory already exists doesn't act *) +val mkdir: ?parents: bool -> string -> unit + + (** pretty printer for Unix.process_status values *) +val string_of_proc_status : Unix.process_status -> string + + (** raw HTTP downloader, return Some the contents of downloaded resource or + None if an error occured while downloading. This function support also + "file://" scheme for filesystem resources *) +val http_get: string -> string option + (** 'iter' like method that iter over string slices (unspecified length) of a + remote resources fetched via HTTP GET requests *) +val http_get_iter_buf: callback:(string -> unit) -> string -> unit + + (** true on blanks-only and #-commented lines, false otherwise *) +val is_blank_line: string -> bool + diff --git a/helm/ocaml/getter/http_getter_types.ml b/helm/ocaml/getter/http_getter_types.ml new file mode 100644 index 000000000..bf584f6ce --- /dev/null +++ b/helm/ocaml/getter/http_getter_types.ml @@ -0,0 +1,62 @@ +(* + * Copyright (C) 2003-2004: + * Stefano Zacchiroli + * for the HELM Team http://helm.cs.unibo.it/ + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +exception Bad_request of string +exception Unresolvable_URI of string +exception Invalid_URI of string +exception Invalid_URL of string +exception Invalid_RDF_class of string +exception Internal_error of string + +type encoding = Enc_normal | Enc_gzipped +type answer_format = Fmt_text | Fmt_xml +type ls_flag = Yes | No | Ann +type ls_object = + { + uri: string; + ann: bool; + types: ls_flag; + body: ls_flag; + proof_tree: ls_flag; + } +type ls_item = + | Ls_section of string + | Ls_object of ls_object + +type xml_uri = + | Cic of string + | Theory of string +type rdf_uri = string * xml_uri +type nuprl_uri = string +type uri = + | Cic_uri of xml_uri + | Nuprl_uri of nuprl_uri + | Rdf_uri of rdf_uri + +module StringSet = Set.Make (String) + -- 2.39.2