-configuration.cmo: configuration.cmi
-configuration.cmx: configuration.cmi
+http_getter_env.cmi: http_getter_types.cmo
+http_getter_common.cmi: http_getter_types.cmo
+http_getter_cache.cmi: http_getter_types.cmo
+http_getter.cmi: http_getter_types.cmo
clientHTTP.cmo: clientHTTP.cmi
clientHTTP.cmx: clientHTTP.cmi
-getter.cmo: clientHTTP.cmi configuration.cmi getter.cmi
-getter.cmx: clientHTTP.cmx configuration.cmx getter.cmi
+http_getter_debugger.cmo: http_getter_debugger.cmi
+http_getter_debugger.cmx: http_getter_debugger.cmi
+http_getter_misc.cmo: http_getter_debugger.cmi http_getter_misc.cmi
+http_getter_misc.cmx: http_getter_debugger.cmx http_getter_misc.cmi
+http_getter_const.cmo: http_getter_const.cmi
+http_getter_const.cmx: http_getter_const.cmi
+http_getter_env.cmo: http_getter_const.cmi http_getter_misc.cmi \
+ http_getter_types.cmo http_getter_env.cmi
+http_getter_env.cmx: http_getter_const.cmx http_getter_misc.cmx \
+ http_getter_types.cmx http_getter_env.cmi
+http_getter_common.cmo: http_getter_env.cmi http_getter_misc.cmi \
+ http_getter_types.cmo http_getter_common.cmi
+http_getter_common.cmx: http_getter_env.cmx http_getter_misc.cmx \
+ http_getter_types.cmx http_getter_common.cmi
+http_getter_map.cmo: http_getter_map.cmi
+http_getter_map.cmx: http_getter_map.cmi
+http_getter_cache.cmo: http_getter_common.cmi http_getter_debugger.cmi \
+ http_getter_env.cmi http_getter_misc.cmi http_getter_types.cmo \
+ http_getter_cache.cmi
+http_getter_cache.cmx: http_getter_common.cmx http_getter_debugger.cmx \
+ http_getter_env.cmx http_getter_misc.cmx http_getter_types.cmx \
+ http_getter_cache.cmi
+http_getter.cmo: http_getter_cache.cmi http_getter_common.cmi \
+ http_getter_const.cmi http_getter_debugger.cmi http_getter_env.cmi \
+ http_getter_map.cmi http_getter_misc.cmi http_getter_types.cmo \
+ http_getter.cmi
+http_getter.cmx: http_getter_cache.cmx http_getter_common.cmx \
+ http_getter_const.cmx http_getter_debugger.cmx http_getter_env.cmx \
+ http_getter_map.cmx http_getter_misc.cmx http_getter_types.cmx \
+ http_getter.cmi
+
PACKAGE = getter
-REQUIRES = pxp helm-urimanager http
-PREDICATES =
-INTERFACE_FILES = configuration.mli clientHTTP.mli getter.mli
-IMPLEMENTATION_FILES = $(INTERFACE_FILES:%.mli=%.ml)
-EXTRA_OBJECTS_TO_INSTALL =
-EXTRA_OBJECTS_TO_CLEAN =
+REQUIRES = \
+ http dbm pcre pxp shell zip \
+ helm-thread helm-logger helm-urimanager helm-registry
+
+INTERFACE_FILES = \
+ clientHTTP.mli \
+ http_getter_debugger.mli \
+ http_getter_misc.mli \
+ http_getter_const.mli \
+ http_getter_env.mli \
+ http_getter_common.mli \
+ http_getter_map.mli \
+ http_getter_cache.mli \
+ http_getter.mli
+
+IMPLEMENTATION_FILES = \
+ http_getter_types.ml \
+ $(INTERFACE_FILES:%.mli=%.ml)
include ../Makefile.common
+
+test: getter.cma test.ml
+ $(OCAMLC) -linkpkg -o $@ $^
+
+++ /dev/null
-(******************************************************************************)
-(* *)
-(* PROJECT HELM *)
-(* *)
-(* Claudio Sacerdoti Coen <sacerdot@cs.unibo.it> *)
-(* 28/12/2000 *)
-(* *)
-(* This is the parser that reads the configuration file of helm *)
-(* *)
-(******************************************************************************)
-
-exception MalformedDir of string
-
-(* this should be the only hard coded constant *)
-let filename =
- let prefix =
- try
- Sys.getenv "HELM_CONFIGURATION_DIR"
- with
- Not_found -> "/projects/helm/V7/phd/local/etc/helm"
- in
- if prefix.[(String.length prefix) - 1] = '/' then
- raise (MalformedDir prefix) ;
- prefix ^ "/configuration.xml";;
-
-exception Warnings;;
-
-class warner =
- object
- method warn w =
- print_endline ("WARNING: " ^ w) ;
- (raise Warnings : unit)
- end
-;;
-
-let xml_document () =
- let error e =
- prerr_endline ("Warning: configuration file not found, or incorrect: " ^
- Pxp_types.string_of_exn e) ;
- None
- in
- let module Y = Pxp_yacc in
- try
- let config = {Y.default_config with Y.warner = new warner} in
- Some (Y.parse_document_entity config (Y.from_file filename) Y.default_spec)
- with
- | (Pxp_types.Error _) as e -> error e
- | (Pxp_types.At _) as e -> error e
- | (Pxp_types.Validation_error _) as e -> error e
- | (Pxp_types.WF_error _) as e -> error e
- | (Pxp_types.Namespace_error _) as e -> error e
- | (Pxp_types.Character_not_supported) as e -> error e
-;;
-
-exception Impossible;;
-
-let vars = Hashtbl.create 14;;
-
-(* resolve <value-of> tags and returns the string values of the variable tags *)
-let rec resolve =
- let module D = Pxp_document in
- function
- [] -> ""
- | he::tl when he#node_type = D.T_element "value-of" ->
- (match he#attribute "var" with
- Pxp_types.Value var -> Hashtbl.find vars var
- | _ -> raise Impossible
- ) ^ resolve tl
- | he::tl when he#node_type = D.T_data ->
- he#data ^ resolve tl
- | _ -> raise Impossible
-;;
-
-(* we trust the xml file to be valid because of the validating xml parser *)
-let _ =
- match xml_document () with
- None -> ()
- | Some d ->
- List.iter
- (function
- n ->
- match n#node_type with
- Pxp_document.T_element var ->
- Hashtbl.add vars var (resolve (n#sub_nodes))
- | _ -> raise Impossible
- )
- (d#root#sub_nodes)
-;;
-
-(* try to read a configuration variable, given its name into the
- * configuration.xml file and its name into the shell environment.
- * The shell variable, if present, has precedence over configuration.xml
- *)
-let read_configuration_var_env xml_name env_name =
- try
- try
- Sys.getenv env_name
- with
- Not_found -> Hashtbl.find vars xml_name
- with
- Not_found ->
- Printf.printf "Sorry, cannot find variable `%s', please check your configuration\n" xml_name ;
- flush stdout ;
- raise Not_found
-
-let read_configuration_var xml_name =
- try
- Hashtbl.find vars xml_name
- with
- Not_found ->
- Printf.printf "Sorry, cannot find variable `%s', please check your configuration\n" xml_name ;
- flush stdout ;
- raise Not_found
-
-(* Zack: no longer used *)
-(* let tmp_dir = read_configuration_var_env "tmp_dir" "HELM_TMP_DIR";; *)
-let getter_url = read_configuration_var_env "getter_url" "HELM_GETTER_URL";;
-let processor_url = read_configuration_var_env "processor_url" "HELM_PROCESSOR_URL";;
-let annotations_dir = read_configuration_var_env "annotations_dir" "HELM_ANNOTATIONS_DIR"
-let annotations_url = read_configuration_var_env "annotations_url" "HELM_ANNOTATIONS_URL"
-
-let _ = Hashtbl.clear vars;;
+++ /dev/null
-(* Copyright (C) 2000, HELM Team.
- *
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://cs.unibo.it/helm/.
- *)
-
-(******************************************************************************)
-(* *)
-(* PROJECT HELM *)
-(* *)
-(* Claudio Sacerdoti Coen <sacerdot@cs.unibo.it> *)
-(* 29/11/2000 *)
-(* *)
-(* *)
-(******************************************************************************)
-
-(* Zack: no longer needed *)
-(* val tmp_dir : string *)
-val getter_url : string
-val processor_url : string
-val annotations_dir : string
-val annotations_url : string
+++ /dev/null
-(* Copyright (C) 2000, HELM Team.
- *
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://cs.unibo.it/helm/.
- *)
-
-(******************************************************************************)
-(* *)
-(* PROJECT HELM *)
-(* *)
-(* Claudio Sacerdoti Coen <sacerdot@cs.unibo.it> *)
-(* 24/01/2000 *)
-(* *)
-(******************************************************************************)
-
-let getter_url = ref Configuration.getter_url;;
-
-let update () =
- (* deliver update request to http_getter *)
- ClientHTTP.send (!getter_url ^ "update")
-;;
-
-type format =
- Normal
- | GZipped
-;;
-
-let getxml ?(format=Normal) ?(patchdtd=true) uri =
- (* deliver getxml request to http_getter *)
- ClientHTTP.get_and_save_to_tmp
- (!getter_url ^ "getxml" ^
- "?uri=" ^ UriManager.string_of_uri uri ^
- "&format=" ^ (match format with Normal -> "normal" | GZipped -> "gzipped") ^
- "&patch_dtd=" ^ (match patchdtd with true -> "yes" | false -> "no")
- )
-;;
-
-let register uri url =
- (* deliver register request to http_getter *)
- ClientHTTP.send
- (!getter_url ^ "register" ^
- "?uri=" ^ (UriManager.string_of_uri uri) ^
- "&url=" ^ url)
-;;
-
-exception Unresolved;;
-exception UnexpectedGetterOutput;;
-
-(* resolve_result is needed because it is not possible to raise *)
-(* an exception in a pxp ever-processing callback. Too bad. *)
-type resolve_result =
- Unknown
- | Exception of exn
- | Resolved of string
-
-let resolve uri =
- (* deliver resolve request to http_getter *)
- let doc =
- ClientHTTP.get
- (!getter_url ^ "resolve" ^ "?uri=" ^ (UriManager.string_of_uri uri))
- in
- let res = ref Unknown in
- Pxp_yacc.process_entity Pxp_yacc.default_config (`Entry_content [])
- (Pxp_yacc.create_entity_manager ~is_document:true Pxp_yacc.default_config
- (Pxp_yacc.from_string doc))
- (function
- Pxp_yacc.E_start_tag ("url",["value",url],_) -> res := Resolved url
- | Pxp_yacc.E_start_tag ("unresolved",[],_) -> res := Exception Unresolved
- | Pxp_yacc.E_start_tag _ -> res := Exception UnexpectedGetterOutput
- | _ -> ()
- ) ;
- match !res with
- Unknown -> raise UnexpectedGetterOutput
- | Exception e -> raise e
- | Resolved url -> url
-;;
+++ /dev/null
-(* Copyright (C) 2000, HELM Team.
- *
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://cs.unibo.it/helm/.
- *)
-
-(******************************************************************************)
-(* *)
-(* PROJECT HELM *)
-(* *)
-(* Claudio Sacerdoti Coen <sacerdot@cs.unibo.it> *)
-(* 24/01/2000 *)
-(* *)
-(* *)
-(******************************************************************************)
-
-(* THE URL OF THE HTTP GETTER *)
-val getter_url : string ref
-
-(* SIMPLE BINDINGS TO THE HTTP GETTER *)
-(* synchronize with the servers *)
-val update : unit -> unit
-
-type format =
- Normal
- | GZipped
-;;
-
-(* get the xml file *)
-(* defaults: format = Normal and patchdtd = true *)
-val getxml : ?format:format -> ?patchdtd:bool -> UriManager.uri -> string
-
-(* adds an (URI -> URL) entry in the map from URIs to URLs *)
-val register : UriManager.uri -> string -> unit
-
-exception Unresolved
-exception UnexpectedGetterOutput
-
-(* resolves an URI to its corresponding URL. *)
-(* Unresolved is raised if there is no URL for the given URI. *)
-(* UnexceptedGetterOutput is raised if the output of the real *)
-(* getter has not the expected format. *)
-val resolve: UriManager.uri -> string
--- /dev/null
+(*
+ * Copyright (C) 2003-2004:
+ * Stefano Zacchiroli <zack@cs.unibo.it>
+ * for the HELM Team http://helm.cs.unibo.it/
+ *
+ * This file is part of HELM, an Hypertextual, Electronic
+ * Library of Mathematics, developed at the Computer Science
+ * Department, University of Bologna, Italy.
+ *
+ * HELM is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * HELM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HELM; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ * For details, see the HELM World-Wide-Web page,
+ * http://helm.cs.unibo.it/
+ *)
+
+open Printf
+
+open Http_getter_common
+open Http_getter_misc
+open Http_getter_debugger
+open Http_getter_types
+
+exception Not_implemented of string
+exception UnexpectedGetterOutput
+
+(* resolve_result is needed because it is not possible to raise *)
+(* an exception in a pxp ever-processing callback. Too bad. *)
+type resolve_result =
+ | Unknown
+ | Exception of exn
+ | Resolved of string
+
+let not_implemented s = raise (Not_implemented ("Http_getter." ^ s))
+
+let (index_line_sep_RE, index_sep_RE, trailing_types_RE,
+ heading_cic_RE, heading_theory_RE, heading_nuprl_RE,
+ heading_rdf_cic_RE, heading_rdf_theory_RE) =
+ (Pcre.regexp "[ \t]+", Pcre.regexp "\r\n|\r|\n",
+ Pcre.regexp "\\.types$",
+ Pcre.regexp "^cic:", Pcre.regexp "^theory:", Pcre.regexp "^nuprl:",
+ Pcre.regexp "^helm:rdf.*//cic:", Pcre.regexp "^helm:rdf.*//theory:")
+
+ (* global maps, shared by all threads *)
+
+let cic_map =
+ lazy (new Http_getter_map.map (Lazy.force Http_getter_env.cic_dbm))
+let nuprl_map =
+ lazy (new Http_getter_map.map (Lazy.force Http_getter_env.nuprl_dbm))
+let rdf_map =
+ lazy (new Http_getter_map.map (Lazy.force Http_getter_env.rdf_dbm))
+let xsl_map =
+ lazy (new Http_getter_map.map (Lazy.force Http_getter_env.xsl_dbm))
+
+let maps = [ cic_map; nuprl_map; rdf_map; xsl_map ]
+let close_maps () = List.iter (fun m -> (Lazy.force m) # close) maps
+let clear_maps () = List.iter (fun m -> (Lazy.force m) # clear) maps
+let sync_maps () = List.iter (fun m -> (Lazy.force m) # sync) maps
+
+let map_of_uri = function
+ | uri when is_cic_uri uri -> Lazy.force cic_map
+ | uri when is_nuprl_uri uri -> Lazy.force nuprl_map
+ | uri when is_rdf_uri uri -> Lazy.force rdf_map
+ | uri when is_xsl_uri uri -> Lazy.force xsl_map
+ | uri -> raise (Unresolvable_URI uri)
+
+let update_from_server logmsg server_url = (* use global maps *)
+ debug_print ("Updating information from " ^ server_url);
+ let xml_url_of_uri = function
+ (* TODO missing sanity checks on server_url, e.g. it can contains $1 *)
+ | uri when (Pcre.pmatch ~rex:heading_cic_RE uri) ->
+ Pcre.replace ~rex:heading_cic_RE ~templ:server_url uri
+ | uri when (Pcre.pmatch ~rex:heading_theory_RE uri) ->
+ Pcre.replace ~rex:heading_theory_RE ~templ:server_url uri
+ | uri when (Pcre.pmatch ~rex:heading_nuprl_RE uri) ->
+ Pcre.replace ~rex:heading_nuprl_RE ~templ:server_url uri
+ | uri -> raise (Invalid_URI uri)
+ in
+ let rdf_url_of_uri = function (* TODO as above *)
+ | uri when (Pcre.pmatch ~rex:heading_rdf_cic_RE uri) ->
+ Pcre.replace ~rex:heading_rdf_cic_RE ~templ:server_url uri
+ | uri when (Pcre.pmatch ~rex:heading_rdf_theory_RE uri) ->
+ Pcre.replace ~rex:heading_rdf_theory_RE ~templ:server_url uri
+ | uri -> raise (Invalid_URI uri)
+ in
+ let log = ref (`T ("Processing server: " ^ server_url) :: logmsg) in
+ let (xml_index, rdf_index, xsl_index) =
+ (* TODO keeps index in memory, is better to keep them on temp files? *)
+ (http_get (server_url ^ "/" ^ (Lazy.force Http_getter_env.xml_index)),
+ http_get (server_url ^ "/" ^ (Lazy.force Http_getter_env.rdf_index)),
+ http_get (server_url ^ "/" ^ (Lazy.force Http_getter_env.xsl_index)))
+ in
+ if (xml_index = None && rdf_index = None && xsl_index = None) then
+ debug_print (sprintf "Warning: useless server %s" server_url);
+ (match xml_index with
+ | Some xml_index ->
+ (log := `T "Updating XML db ...<br />" :: !log;
+ List.iter
+ (function
+ | l when is_blank_line l -> () (* skip blank and commented lines *)
+ | l ->
+ (try
+ (match Pcre.split ~rex:index_line_sep_RE l with
+ | [uri; "gz"] ->
+ assert (is_cic_uri uri || is_nuprl_uri uri) ;
+ (map_of_uri uri)#replace
+ uri ((xml_url_of_uri uri) ^ ".xml.gz")
+ | [uri] ->
+ assert (is_cic_uri uri || is_nuprl_uri uri) ;
+ (map_of_uri uri)#replace
+ uri ((xml_url_of_uri uri) ^ ".xml")
+ | _ -> log := `T ("Ignoring invalid line: '" ^ l) :: !log)
+ with Invalid_URI uri ->
+ log := `T ("Ignoring invalid XML URI: '" ^ l) :: !log))
+ (Pcre.split ~rex:index_sep_RE xml_index); (* xml_index lines *)
+ log := `T "All done" :: !log)
+ | None -> ());
+ (match rdf_index with
+ | Some rdf_index ->
+ (log := `T "Updating RDF db ..." :: !log;
+ List.iter
+ (fun l ->
+ try
+ (match Pcre.split ~rex:index_line_sep_RE l with
+ | [uri; "gz"] ->
+ (Lazy.force rdf_map) # replace uri
+ ((rdf_url_of_uri uri) ^ ".xml.gz")
+ | [uri] ->
+ (Lazy.force rdf_map) # replace uri
+ ((rdf_url_of_uri uri) ^ ".xml")
+ | _ -> log := `T ("Ignoring invalid line: '" ^ l) :: !log)
+ with Invalid_URI uri ->
+ log := `T ("Ignoring invalid RDF URI: '" ^ l) :: !log)
+ (Pcre.split ~rex:index_sep_RE rdf_index); (* rdf_index lines *)
+ log := `T "All done" :: !log)
+ | None -> ());
+ (match xsl_index with
+ | Some xsl_index ->
+ (log := `T "Updating XSLT db ..." :: !log;
+ List.iter
+ (fun l -> (Lazy.force xsl_map) # replace l (server_url ^ "/" ^ l))
+ (Pcre.split ~rex:index_sep_RE xsl_index);
+ log := `T "All done" :: !log)
+ | None -> ());
+ debug_print "done with this server";
+ !log
+
+let update_from_all_servers () = (* use global maps *)
+ clear_maps ();
+ let log =
+ List.fold_left
+ update_from_server
+ [] (* initial logmsg: empty *)
+ (* reverse order: 1st server is the most important one *)
+ (List.map snd (List.rev (Lazy.force Http_getter_env.servers)))
+ in
+ sync_maps ();
+ `Msg (`L (List.rev log))
+
+let update_from_one_server server_url =
+ let log = update_from_server [] server_url in
+ `Msg (`L (List.rev log))
+
+let temp_file_of_uri uri =
+ let flat_string s s' c =
+ let cs = String.copy s in
+ for i = 0 to (String.length s) - 1 do
+ if String.contains s' s.[i] then cs.[i] <- c
+ done;
+ cs
+ in
+ let user = try Unix.getlogin () with _ -> "" in
+ Filename.open_temp_file (user ^ flat_string uri ".-=:;!?/&" '_') ""
+
+ (* should we use a remote getter or not *)
+let remote () =
+ try
+ Helm_registry.get "getter.mode" = "remote"
+ with Helm_registry.Key_not_found _ -> false
+let getter_url () = Helm_registry.get "getter.url"
+
+(* Remote interface: getter methods implemented using a remote getter *)
+
+ (* <TODO> *)
+let getxslt_remote ~patch_dtd uri = not_implemented "getxslt_remote"
+let getdtd_remote ~patch_dtd uri = not_implemented "getdtd_remote"
+let clean_cache_remote () = not_implemented "clean_cache_remote"
+let list_servers_remote () = not_implemented "list_servers_remote"
+let add_server_remote ~position name = not_implemented "add_server_remote"
+let remove_server_remote position = not_implemented "remove_server_remote"
+let getalluris_remote () = not_implemented "getalluris_remote"
+let getallrdfuris_remote () = not_implemented "getallrdfuris_remote"
+let ls_remote lsuri = not_implemented "ls_remote"
+ (* </TODO> *)
+
+let resolve_remote uri =
+ (* deliver resolve request to http_getter *)
+ let doc = ClientHTTP.get (sprintf "%sresolve?uri=%s" (getter_url ()) uri) in
+ let res = ref Unknown in
+ Pxp_yacc.process_entity Pxp_yacc.default_config (`Entry_content [])
+ (Pxp_yacc.create_entity_manager ~is_document:true Pxp_yacc.default_config
+ (Pxp_yacc.from_string doc))
+ (function
+ | Pxp_yacc.E_start_tag ("url",["value",url],_) -> res := Resolved url
+ | Pxp_yacc.E_start_tag ("unresolved",[],_) ->
+ res := Exception (Unresolvable_URI uri)
+ | Pxp_yacc.E_start_tag _ -> res := Exception UnexpectedGetterOutput
+ | _ -> ());
+ match !res with
+ | Unknown -> raise UnexpectedGetterOutput
+ | Exception e -> raise e
+ | Resolved url -> url
+
+let register_remote ~uri ~url =
+ ClientHTTP.send (sprintf "%sregister?uri=%s&url=%s" (getter_url ()) uri url)
+
+let register_remote ~uri ~url =
+ ClientHTTP.send (sprintf "%sregister?uri=%s&url=%s" (getter_url ()) uri url)
+
+let update_remote () =
+ let answer = ClientHTTP.get (getter_url () ^ "update") in
+ `Msg (`T answer)
+
+let getxml_remote ~format ~patch_dtd uri =
+ ClientHTTP.get_and_save_to_tmp
+ (sprintf "%sgetxml?uri=%s&format=%s&patch_dtd=%s"
+ (getter_url ()) uri
+ (match format with Enc_normal -> "normal" | Enc_gzipped -> "gzipped")
+ (match patch_dtd with true -> "yes" | false -> "no"))
+
+(* API *)
+
+let help () = Http_getter_const.usage_string (Http_getter_env.env_to_string ())
+
+let resolve uri =
+ if remote () then
+ resolve_remote uri
+ else
+ try
+ (map_of_uri uri)#resolve uri
+ with Http_getter_map.Key_not_found _ -> raise (Unresolvable_URI uri)
+
+ (* Warning: this fail if uri is already registered *)
+let register ~uri ~url =
+ if remote () then
+ register_remote ~uri ~url
+ else
+ (map_of_uri uri)#add uri url
+
+let update () =
+ if remote () then
+ update_remote ()
+ else
+ update_from_all_servers ()
+
+let getxml ?(format = Enc_normal) ?(patch_dtd = true) uri =
+ if remote () then
+ getxml_remote ~format ~patch_dtd uri
+ else begin
+ let url = resolve uri in
+ let (fname, outchan) = temp_file_of_uri uri in
+ Http_getter_cache.respond_xml ~uri ~url ~enc:format ~patch:patch_dtd
+ outchan;
+ close_out outchan;
+ fname
+ end
+
+let getxslt ?(patch_dtd = true) uri =
+ if remote () then
+ getxslt_remote ~patch_dtd uri
+ else begin
+ let url = resolve uri in
+ let (fname, outchan) = temp_file_of_uri uri in
+ Http_getter_cache.respond_xsl ~url ~patch:patch_dtd outchan;
+ close_out outchan;
+ fname
+ end
+
+let getdtd ?(patch_dtd = true) uri =
+ if remote () then
+ getdtd_remote ~patch_dtd uri
+ else begin
+ let url = Lazy.force Http_getter_env.dtd_dir ^ "/" ^ uri in
+ let (fname, outchan) = temp_file_of_uri uri in
+ Http_getter_cache.respond_dtd ~url ~patch:patch_dtd outchan;
+ close_out outchan;
+ fname
+ end
+
+let clean_cache () =
+ if remote () then
+ clean_cache_remote ()
+ else
+ Http_getter_cache.clean ()
+
+let list_servers () =
+ if remote () then
+ list_servers_remote ()
+ else
+ Lazy.force Http_getter_env.servers
+
+let add_server ?(position = 0) name =
+ if remote () then
+ add_server_remote ~position name
+ else begin
+ if position = 0 then begin
+ Http_getter_env.add_server ~position:0 name;
+ update_from_one_server name (* quick update (new server only) *)
+ end else if position > 0 then begin
+ Http_getter_env.add_server ~position name;
+ update ()
+ end else (* already checked bt parse_position *)
+ assert false
+ end
+
+let remove_server position =
+ if remote () then
+ remove_server_remote ()
+ else begin
+ let server_name =
+ try
+ List.assoc position (Lazy.force Http_getter_env.servers)
+ with Not_found ->
+ raise (Invalid_argument (sprintf "no server with position %d" position))
+ in
+ Http_getter_env.remove_server position;
+ update ()
+ end
+
+let return_uris map filter =
+ let uris = ref [] in
+ map#iter (fun uri _ -> if filter uri then uris := uri :: !uris);
+ List.rev !uris
+
+let getalluris () =
+ if remote () then
+ getalluris_remote ()
+ else
+ let filter uri =
+ (Pcre.pmatch ~rex:heading_cic_RE uri) &&
+ not (Pcre.pmatch ~rex:trailing_types_RE uri)
+ in
+ return_uris (Lazy.force cic_map) filter
+
+let getallrdfuris classs =
+ if remote () then
+ getallrdfuris_remote ()
+ else
+ let filter =
+ let base = "^helm:rdf:www\\.cs\\.unibo\\.it/helm/rdf/" in
+ match classs with
+ | `Forward -> (fun uri -> Pcre.pmatch ~pat:(base ^ "forward") uri)
+ | `Backward -> (fun uri -> Pcre.pmatch ~pat:(base ^ "backward") uri)
+ in
+ return_uris (Lazy.force rdf_map) filter
+
+let ls =
+ let (++) (oldann, oldtypes, oldbody, oldtree)
+ (newann, newtypes, newbody, newtree) =
+ ((if newann > oldann then newann else oldann),
+ (if newtypes > oldtypes then newtypes else oldtypes),
+ (if newbody > oldbody then newbody else oldbody),
+ (if newtree > oldtree then newtree else oldtree))
+ in
+ let basepart_RE =
+ Pcre.regexp
+ "^([^.]*\\.[^.]*)((\\.body)|(\\.proof_tree)|(\\.types))?(\\.ann)?$"
+ in
+ let (types_RE, types_ann_RE, body_RE, body_ann_RE,
+ proof_tree_RE, proof_tree_ann_RE) =
+ (Pcre.regexp "\\.types$", Pcre.regexp "\\.types\\.ann$",
+ Pcre.regexp "\\.body$", Pcre.regexp "\\.body\\.ann$",
+ Pcre.regexp "\\.proof_tree$", Pcre.regexp "\\.proof_tree\\.ann$")
+ in
+ let (slash_RE, til_slash_RE, no_slashes_RE) =
+ (Pcre.regexp "/", Pcre.regexp "^.*/", Pcre.regexp "^[^/]*$")
+ in
+ fun lsuri ->
+ if remote () then
+ ls_remote lsuri
+ else begin
+ let pat =
+ "^" ^
+ (match lsuri with Cic p -> ("cic:" ^ p) | Theory p -> ("theory:" ^ p))
+ in
+ let (dir_RE, obj_RE) =
+ (Pcre.regexp (pat ^ "/"), Pcre.regexp (pat ^ "(\\.|$)"))
+ in
+ let dirs = ref StringSet.empty in
+ let objs = Hashtbl.create 17 in
+ let store_dir d =
+ dirs := StringSet.add (List.hd (Pcre.split ~rex:slash_RE d)) !dirs
+ in
+ let store_obj o =
+ let basepart = Pcre.replace ~rex:basepart_RE ~templ:"$1" o in
+ let no_flags = false, No, No, No in
+ let oldflags =
+ try
+ Hashtbl.find objs basepart
+ with Not_found -> (* no ann, no types, no body, no proof tree *)
+ no_flags
+ in
+ let newflags =
+ match o with
+ | s when Pcre.pmatch ~rex:types_RE s -> (false, Yes, No, No)
+ | s when Pcre.pmatch ~rex:types_ann_RE s -> (true, Ann, No, No)
+ | s when Pcre.pmatch ~rex:body_RE s -> (false, No, Yes, No)
+ | s when Pcre.pmatch ~rex:body_ann_RE s -> (true, No, Ann, No)
+ | s when Pcre.pmatch ~rex:proof_tree_RE s -> (false, No, No, Yes)
+ | s when Pcre.pmatch ~rex:proof_tree_ann_RE s -> (true, No, No, Ann)
+ | s -> no_flags
+ in
+ Hashtbl.replace objs basepart (oldflags ++ newflags)
+ in
+ (Lazy.force cic_map) # iter
+ (* BLEARGH Dbm module lacks support for fold-like functions *)
+ (fun key _ ->
+ match key with
+ | uri when Pcre.pmatch ~rex:dir_RE uri -> (* directory hit *)
+ let localpart = Pcre.replace ~rex:dir_RE uri in
+ if Pcre.pmatch ~rex:no_slashes_RE localpart then
+ store_obj localpart
+ else
+ store_dir localpart
+ | uri when Pcre.pmatch ~rex:obj_RE uri -> (* file hit *)
+ store_obj (Pcre.replace ~rex:til_slash_RE uri)
+ | uri -> () (* miss *));
+ let ls_items = ref [] in
+ StringSet.iter (fun dir -> ls_items := Ls_section dir :: !ls_items) !dirs;
+ Http_getter_misc.hashtbl_sorted_iter
+ (fun uri (annflag, typesflag, bodyflag, treeflag) ->
+ ls_items :=
+ Ls_object {
+ uri = uri; ann = annflag;
+ types = typesflag; body = typesflag; proof_tree = treeflag
+ } :: !ls_items)
+ objs;
+ List.rev !ls_items
+ end
+
+ (* Shorthands from now on *)
+
+let getxml' uri = getxml (UriManager.string_of_uri uri)
+let resolve' uri = resolve (UriManager.string_of_uri uri)
+let register' uri url = register ~uri:(UriManager.string_of_uri uri) ~url
+
--- /dev/null
+(*
+ * Copyright (C) 2003-2004:
+ * Stefano Zacchiroli <zack@cs.unibo.it>
+ * for the HELM Team http://helm.cs.unibo.it/
+ *
+ * This file is part of HELM, an Hypertextual, Electronic
+ * Library of Mathematics, developed at the Computer Science
+ * Department, University of Bologna, Italy.
+ *
+ * HELM is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * HELM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HELM; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ * For details, see the HELM World-Wide-Web page,
+ * http://helm.cs.unibo.it/
+ *)
+
+open Http_getter_types
+
+ (** {2 Getter Web Service interface as API *)
+
+val help: unit -> string
+val resolve: string -> string (* uri -> url *)
+val register: uri:string -> url:string -> unit
+val update: unit -> HelmLogger.html_msg
+val getxml : ?format:encoding -> ?patch_dtd:bool -> string -> string
+val getxslt : ?patch_dtd:bool -> string -> string
+val getdtd : ?patch_dtd:bool -> string -> string
+val clean_cache: unit -> unit
+val list_servers: unit -> (int * string) list
+val add_server: ?position:int -> string -> HelmLogger.html_msg
+val remove_server: int -> HelmLogger.html_msg
+val getalluris: unit -> string list
+val getallrdfuris: [ `Forward | `Backward ] -> string list
+val ls: xml_uri -> ls_item list
+
+ (** {2 Shorthands} *)
+
+val getxml' : UriManager.uri -> string
+val resolve' : UriManager.uri -> string
+val register' : UriManager.uri -> string -> unit
+
+ (** {2 Misc} *)
+
+val close_maps: unit -> unit
+val update_from_one_server: string -> HelmLogger.html_msg
+
--- /dev/null
+(*
+ * Copyright (C) 2003-2004:
+ * Stefano Zacchiroli <zack@cs.unibo.it>
+ * for the HELM Team http://helm.cs.unibo.it/
+ *
+ * This file is part of HELM, an Hypertextual, Electronic
+ * Library of Mathematics, developed at the Computer Science
+ * Department, University of Bologna, Italy.
+ *
+ * HELM is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * HELM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HELM; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ * For details, see the HELM World-Wide-Web page,
+ * http://helm.cs.unibo.it/
+ *)
+
+(* TODO cache expires control!!! *)
+(* TODO uwobo loop:
+ if two large proof (already in cache) are requested at the same time by two
+ clients, uwobo (java implementation, not yet tested with the OCaml one)
+ starts looping sending output to one of the client *)
+
+open Http_getter_common;;
+open Http_getter_debugger;;
+open Http_getter_misc;;
+open Http_getter_types;;
+open Printf;;
+
+ (* expose ThreadSafe.threadSafe methods *)
+class threadSafe =
+ object
+ inherit ThreadSafe.threadSafe
+ method virtual doCritical : 'a. 'a lazy_t -> 'a
+ method virtual doReader : 'a. 'a lazy_t -> 'a
+ method virtual doWriter : 'a. 'a lazy_t -> 'a
+ end
+;;
+let threadSafe = new threadSafe ;;
+
+let resource_type_of_url = function
+ | url when Pcre.pmatch ~pat:"\\.xml\\.gz$" url -> Enc_gzipped
+ | url when Pcre.pmatch ~pat:"\\.xml$" url -> Enc_normal
+ | url -> raise (Invalid_URL url)
+let extension_of_resource_type = function
+ | Enc_normal -> "xml"
+ | Enc_gzipped -> "xml.gz"
+
+ (* basename = resource name without trailing ".gz", if any *)
+let is_in_cache basename =
+ Sys.file_exists
+ (match Lazy.force Http_getter_env.cache_mode with
+ | Enc_normal -> basename
+ | Enc_gzipped -> basename ^ ".gz")
+
+let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan =
+ let resource_type = resource_type_of_url url in
+ let extension = extension_of_resource_type resource_type in
+ let downloadname =
+ match uri_of_string uri with (* parse uri *)
+ | Cic_uri (Cic baseuri) | Cic_uri (Theory baseuri) ->
+ (* assumption: baseuri starts with "/" *)
+ sprintf "%s%s.%s" (Lazy.force Http_getter_env.cic_dir) baseuri extension
+ | Nuprl_uri baseuri ->
+ (* assumption: baseuri starts with "/" *)
+ sprintf "%s%s.%s" (Lazy.force Http_getter_env.nuprl_dir) baseuri extension
+ | Rdf_uri (prefix, ((Cic baseuri) as qbaseuri))
+ | Rdf_uri (prefix, ((Theory baseuri) as qbaseuri)) ->
+ let escaped_prefix =
+ (Pcre.replace ~pat:"/" ~templ:"_"
+ (Pcre.replace ~pat:"_" ~templ:"__"
+ (prefix ^
+ (match qbaseuri with
+ | Cic _ -> "//cic:"
+ | Theory _ -> "//theory:"))))
+ in
+ sprintf "%s/%s%s.%s"
+ (Lazy.force Http_getter_env.rdf_dir) escaped_prefix baseuri extension
+ in
+ let patch_fun =
+ if patch then Http_getter_common.patch_xml else (fun x -> x)
+ in
+ let basename = Pcre.replace ~pat:"\\.gz$" downloadname in
+ let contype = "text/xml" in
+ (* Fill cache if needed and return a short circuit file.
+ Short circuit is needed in situations like:
+ resource type = normal, cache type = gzipped, required encoding = normal
+ we would like to avoid normal -> gzipped -> normal conversions. To avoid
+ this tmp_short_circuit is used to remember the name of the intermediate
+ file name *)
+ let fill_cache () =
+ threadSafe#doWriter (lazy(
+ if not (is_in_cache basename) then begin (* cache MISS *)
+ debug_print "Cache MISS :-(";
+ mkdir ~parents:true (Filename.dirname downloadname);
+ match (resource_type, Lazy.force Http_getter_env.cache_mode) with
+ | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped ->
+ wget ~output:downloadname url;
+ None
+ | Enc_normal, Enc_gzipped -> (* resource normal, cache gzipped *)
+ let tmp = tempfile () in
+ wget ~output:tmp url;
+ gzip ~output:(basename ^ ".gz") ~keep:true tmp; (* fill cache *)
+ if enc = Enc_normal then (* user wants normal: don't delete it! *)
+ Some (tmp, enc)
+ else begin
+ Sys.remove tmp;
+ None
+ end
+ | Enc_gzipped, Enc_normal -> (* resource gzipped, cache normal *)
+ let tmp = tempfile () in
+ wget ~output:tmp url;
+ gunzip ~output:basename ~keep:true tmp; (* fill cache *)
+ if enc = Enc_gzipped then (* user wants gzipped: don't delete it! *)
+ Some (tmp, enc)
+ else begin
+ Sys.remove tmp;
+ None
+ end
+ end else begin
+ debug_print "Cache HIT :-)";
+ None
+ end
+ )) in
+ let tmp_short_circuit = fill_cache () in
+ threadSafe#doReader (lazy(
+ assert (is_in_cache basename);
+ match (enc, Lazy.force Http_getter_env.cache_mode) with
+ | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped ->
+ (* resource in cache is already in the required format *)
+ (match enc with
+ | Enc_normal ->
+ debug_print "No format mangling required (encoding = normal)";
+ return_file ~fname:basename ~contype ~patch_fun outchan
+ | Enc_gzipped ->
+ debug_print "No format mangling required (encoding = gzipped)";
+ return_file
+ ~fname:(basename ^ ".gz") ~contype ~contenc:"x-gzip"
+ ~patch_fun ~gunzip:true
+ outchan)
+ | Enc_normal, Enc_gzipped | Enc_gzipped, Enc_normal ->
+ (match tmp_short_circuit with
+ | None -> (* no short circuit possible, use cache *)
+ debug_print "No short circuit available, use cache";
+ let tmp = tempfile () in
+ (match enc with
+ | Enc_normal ->
+ (* required format is normal, cached version is gzipped *)
+ gunzip (* gunzip to tmp *)
+ ~output:tmp ~keep:true (basename ^ ".gz");
+ return_file ~fname:tmp ~contype ~patch_fun outchan;
+ | Enc_gzipped ->
+ (* required format is gzipped, cached version is normal *)
+ gzip ~output:tmp ~keep:true basename; (* gzip to tmp *)
+ return_file
+ ~fname:tmp ~contype ~contenc:"x-gzip"
+ ~patch_fun ~gunzip:true
+ outchan);
+ Sys.remove tmp
+ | Some (fname, Enc_normal) -> (* short circuit available, use it! *)
+ debug_print "Using short circuit (encoding = normal)";
+ return_file ~fname ~contype ~patch_fun outchan;
+ Sys.remove fname
+ | Some (fname, Enc_gzipped) -> (* short circuit available, use it! *)
+ debug_print "Using short circuit (encoding = gzipped)";
+ return_file
+ ~fname ~contype ~contenc:"x-gzip" ~patch_fun ~gunzip:true outchan;
+ Sys.remove fname)
+ ))
+;;
+
+ (* TODO enc is not yet supported *)
+let respond_xsl ?(enc = Enc_normal) ?(patch = true) ~url outchan =
+ let patch_fun =
+ if patch then Http_getter_common.patch_xsl else (fun x -> x)
+ in
+ let fname = tempfile () in
+ wget ~output:fname url;
+ return_file ~fname ~contype:"text/xml" ~patch_fun outchan;
+ Sys.remove fname
+;;
+
+ (* TODO enc is not yet supported *)
+let respond_dtd ?(enc = Enc_normal) ?(patch = true) ~url outchan =
+ let patch_fun =
+ if patch then Http_getter_common.patch_dtd else (fun x -> x)
+ in
+ if Sys.file_exists url then
+ (* TODO check this: old getter here used text/xml *)
+ return_file ~fname:url ~contype:"text/plain" ~patch_fun outchan
+ else
+ return_html_error ("Can't find DTD: " ^ url) outchan
+;;
+
+let clean () =
+ let module E = Http_getter_env in
+ List.iter
+ (function dir -> ignore (Unix.system ("rm -rf " ^ dir ^ "/*")))
+ [ Lazy.force E.cic_dir; Lazy.force E.nuprl_dir; Lazy.force E.rdf_dir ]
+;;
--- /dev/null
+(*
+ * Copyright (C) 2003-2004:
+ * Stefano Zacchiroli <zack@cs.unibo.it>
+ * for the HELM Team http://helm.cs.unibo.it/
+ *
+ * This file is part of HELM, an Hypertextual, Electronic
+ * Library of Mathematics, developed at the Computer Science
+ * Department, University of Bologna, Italy.
+ *
+ * HELM is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * HELM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HELM; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ * For details, see the HELM World-Wide-Web page,
+ * http://helm.cs.unibo.it/
+ *)
+
+open Http_getter_types;;
+
+val respond_xml:
+ ?enc:encoding -> ?patch:bool -> url:string -> uri:string -> out_channel ->
+ unit
+
+val respond_xsl:
+ ?enc:encoding -> ?patch:bool -> url:string -> out_channel ->
+ unit
+
+val respond_dtd:
+ ?enc:encoding -> ?patch:bool -> url:string -> out_channel ->
+ unit
+
+val clean: unit -> unit
+
--- /dev/null
+(*
+ * Copyright (C) 2003-2004:
+ * Stefano Zacchiroli <zack@cs.unibo.it>
+ * for the HELM Team http://helm.cs.unibo.it/
+ *
+ * This file is part of HELM, an Hypertextual, Electronic
+ * Library of Mathematics, developed at the Computer Science
+ * Department, University of Bologna, Italy.
+ *
+ * HELM is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * HELM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HELM; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ * For details, see the HELM World-Wide-Web page,
+ * http://helm.cs.unibo.it/
+ *)
+
+open Http_getter_types;;
+open Printf;;
+
+let string_of_ls_flag = function No -> "NO" | Yes -> "YES" | Ann -> "ANN"
+let string_of_encoding = function
+ | Enc_normal -> "Normal"
+ | Enc_gzipped -> "GZipped"
+
+let is_cic_obj_uri uri = Pcre.pmatch ~pat:"^cic:" uri
+let is_theory_uri uri = Pcre.pmatch ~pat:"^theory:" uri
+let is_cic_uri uri = is_cic_obj_uri uri || is_theory_uri uri
+let is_nuprl_uri uri = Pcre.pmatch ~pat:"^nuprl:" uri
+let is_rdf_uri uri = Pcre.pmatch ~pat:"^helm:rdf(.*):(.*)//(.*)" uri
+let is_xsl_uri uri = Pcre.pmatch ~pat:"^\\w+\\.xsl" uri
+
+let rec uri_of_string = function
+ | uri when is_rdf_uri uri ->
+ (match Pcre.split ~pat:"//" uri with
+ | [ prefix; uri ] ->
+ let rest =
+ match uri_of_string uri with
+ | Cic_uri xmluri -> xmluri
+ | _ -> raise (Invalid_URI uri)
+ in
+ Rdf_uri (prefix, rest)
+ | _ -> raise (Invalid_URI uri))
+ | uri when is_cic_uri uri -> Cic_uri (Cic (Pcre.replace ~pat:"^cic:" uri))
+ | uri when is_nuprl_uri uri -> Nuprl_uri (Pcre.replace ~pat:"^nuprl:" uri)
+ | uri when is_theory_uri uri ->
+ Cic_uri (Theory (Pcre.replace ~pat:"^theory:" uri))
+ | uri -> raise (Invalid_URI uri)
+
+let patch_xml line =
+ Pcre.replace
+ ~pat:(sprintf "DOCTYPE (.*) SYSTEM\\s+\"%s/"
+ (Lazy.force Http_getter_env.dtd_base_url))
+ ~templ:(sprintf "DOCTYPE $1 SYSTEM \"%s/getdtd?uri="
+ (Lazy.force Http_getter_env.my_own_url))
+ line
+let patch_xsl line =
+ let mk_patch_fun tag line =
+ Pcre.replace
+ ~pat:(sprintf "%s\\s+href=\"" tag)
+ ~templ:(sprintf "%s href=\"%s/getxslt?uri="
+ tag (Lazy.force Http_getter_env.my_own_url))
+ line
+ in
+ let (patch_import, patch_include) =
+ (mk_patch_fun "xsl:import", mk_patch_fun "xsl:include")
+ in
+ patch_include (patch_import line)
+let patch_dtd line =
+ Pcre.replace
+ ~pat:"ENTITY (.*) SYSTEM\\s+\""
+ ~templ:(sprintf "ENTITY $1 SYSTEM \"%s/getdtd?uri="
+ (Lazy.force Http_getter_env.my_own_url))
+ line
+
+let pp_error s =
+ sprintf "<html><body>Http Getter error: %s</body></html>" s
+let pp_internal_error s =
+ sprintf "<html><body>Http Getter Internal error: %s</body></html>" s
+let pp_msg s = sprintf "<html><body>%s</body></html>" s
+let null_pp s = s
+
+let mk_return_fun pp_fun contype msg outchan =
+ Http_daemon.respond
+ ~body:(pp_fun msg) ~headers:["Content-Type", contype] outchan
+
+let return_html_error = mk_return_fun pp_error "text/html"
+let return_html_internal_error = mk_return_fun pp_internal_error "text/html"
+let return_html_msg = mk_return_fun pp_msg "text/html"
+let return_html_raw = mk_return_fun null_pp "text/html"
+let return_xml_raw = mk_return_fun null_pp "text/xml"
+let return_file
+ ~fname ?contype ?contenc ?(patch_fun = fun x -> x) ?(gunzip = false) outchan
+ =
+ let headers =
+ match (contype, contenc) with
+ | (Some t, Some e) -> ["Content-Encoding", e; "Content-Type", t]
+ | (Some t, None) -> ["Content-Type" , t]
+ | (None, Some e) -> ["Content-Encoding", e]
+ | (None, None) -> []
+ in
+ Http_daemon.send_basic_headers ~code:200 outchan;
+ Http_daemon.send_headers headers outchan;
+ Http_daemon.send_CRLF outchan;
+ if gunzip then begin (* gunzip needed, uncompress file, apply patch_fun to
+ it, compress the result and sent it to client *)
+ let (tmp1, tmp2) =
+ (Http_getter_misc.tempfile (), Http_getter_misc.tempfile ())
+ in
+ Http_getter_misc.gunzip ~keep:true ~output:tmp1 fname; (* gunzip to tmp1 *)
+ let new_file = open_out tmp2 in
+ Http_getter_misc.iter_file (* tmp2 = patch(tmp1) *)
+ (fun line -> output_string new_file (patch_fun line ^ "\n"))
+ tmp1;
+ close_out new_file;
+ Http_getter_misc.gzip ~output:tmp1 tmp2; (* tmp1 = gzip(tmp2); rm tmp2 *)
+ Http_getter_misc.iter_file (* send tmp1 to client as is*)
+ (fun line -> output_string outchan (line ^ "\n"))
+ tmp1;
+ Sys.remove tmp1 (* rm tmp1 *)
+ end else (* no need to gunzip, apply patch_fun directly to file *)
+ Http_getter_misc.iter_file
+ (fun line -> output_string outchan (patch_fun line ^ "\n"))
+ fname
+;;
+let return_400 body outchan = Http_daemon.respond_error ~code:400 ~body outchan
+
--- /dev/null
+(*
+ * Copyright (C) 2003-2004:
+ * Stefano Zacchiroli <zack@cs.unibo.it>
+ * for the HELM Team http://helm.cs.unibo.it/
+ *
+ * This file is part of HELM, an Hypertextual, Electronic
+ * Library of Mathematics, developed at the Computer Science
+ * Department, University of Bologna, Italy.
+ *
+ * HELM is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * HELM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HELM; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ * For details, see the HELM World-Wide-Web page,
+ * http://helm.cs.unibo.it/
+ *)
+
+open Http_getter_types;;
+
+val string_of_ls_flag: ls_flag -> string
+val string_of_encoding: encoding -> string
+
+val is_cic_uri: string -> bool
+val is_nuprl_uri: string -> bool
+val is_rdf_uri: string -> bool
+val is_xsl_uri: string -> bool
+
+val uri_of_string: string -> uri
+
+val patch_xml : string -> string
+val patch_xsl : string -> string
+val patch_dtd : string -> string
+
+val return_html_error: string -> out_channel -> unit
+val return_html_internal_error: string -> out_channel -> unit
+ (** return an HTML HTTP response from the given string, embedding it in an
+ "H1" element of an HTML page; content-type is set to text/html *)
+val return_html_msg: string -> out_channel -> unit
+ (** return an HTTP response using given string as content; content-type is set
+ to text/html *)
+val return_html_raw: string -> out_channel -> unit
+ (** return an HTTP response using given string as content; content-type is set
+ to text/xml *)
+val return_xml_raw: string -> out_channel -> unit
+ (** return a bad request http response *)
+val return_400: string -> out_channel -> unit
+ (**
+ @param fname name of the file to be sent
+ @param contype Content-Type header value
+ @param contenc Content-Enconding header value
+ @param patch_fun function used to patch file contents
+ @param gunzip is meaningful only if a patch function is provided. If gunzip
+ is true patch_fun is applied to the uncompressed version of the file. The file
+ is then compressed again and send to client
+ @param outchan output channel over which sent file fname *)
+val return_file:
+ fname:string ->
+ ?contype:string -> ?contenc:string ->
+ ?patch_fun:(string -> string) -> ?gunzip:bool ->
+ out_channel ->
+ unit
+
--- /dev/null
+(*
+ * Copyright (C) 2003-2004:
+ * Stefano Zacchiroli <zack@cs.unibo.it>
+ * for the HELM Team http://helm.cs.unibo.it/
+ *
+ * This file is part of HELM, an Hypertextual, Electronic
+ * Library of Mathematics, developed at the Computer Science
+ * Department, University of Bologna, Italy.
+ *
+ * HELM is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * HELM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HELM; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ * For details, see the HELM World-Wide-Web page,
+ * http://helm.cs.unibo.it/
+ *)
+
+open Printf;;
+
+let version = "0.3.0"
+let conffile = "http_getter.conf.xml"
+
+ (* TODO provide a better usage string *)
+let usage_string configuration =
+ sprintf
+"
+<html>
+ <head>
+ <title>HTTP Getter's help message</title>
+ </head>
+ <body>
+ <h1>HTTP Getter, version %s</h1>
+ <h2>Usage information</h2>
+ <p>
+ Usage: <kbd>http://hostname:getterport/</kbd><em>command</em>
+ </p>
+ <p>
+ Available commands:
+ </p>
+ <p>
+ <b><kbd>help</kbd></b><br />
+ display this help message
+ </p>
+ <p>
+ <b><kbd>getxml?uri=URI[&format=(normal|gz)][&patch_dtd=(yes|no)]</kbd></b><br />
+ </p>
+ <p>
+ <b><kbd>register?uri=URI&url=URL</kbd></b><br />
+ </p>
+ <p>
+ <b><kbd>resolve?uri=URI</kbd></b><br />
+ </p>
+ <p>
+ <b><kbd>getdtd?uri=URI[&patch_dtd=(yes|no)]</kbd></b><br />
+ </p>
+ <p>
+ <b><kbd>getxslt?uri=URI[&patch_dtd=(yes|no)]</kbd></b><br />
+ </p>
+ <p>
+ <b><kbd>list_servers</kbd></b><br />
+ </p>
+ <p>
+ <b><kbd>add_server?url=URL&position=POSITION</kbd></b><br />
+ </p>
+ <p>
+ <b><kbd>remove_server?position=POSITION</kbd></b><br />
+ </p>
+ <p>
+ <b><kbd>update</kbd></b><br />
+ </p>
+ <p>
+ <b><kbd>clean_cache</kbd></b><br />
+ </p>
+ <p>
+ <b><kbd>getalluris</kbd></b><br />
+ </p>
+ <p>
+ <b><kbd>getallrdfuris</kbd></b><br />
+ </p>
+ <p>
+ <b><kbd>ls?baseuri=URI&format=(txt|xml)</kbd></b><br />
+ </p>
+ <p>
+ <b><kbd>getempty</kbd></b><br />
+ </p>
+ <h2>Current configuration</h2>
+ <pre>%s</pre>
+ </body>
+</html>
+"
+ version configuration
+
+let empty_xml =
+"<?xml version=\"1.0\"?>
+<!DOCTYPE empty [
+ <!ELEMENT empty EMPTY>
+]>
+<empty />
+"
+
--- /dev/null
+(*
+ * Copyright (C) 2003-2004:
+ * Stefano Zacchiroli <zack@cs.unibo.it>
+ * for the HELM Team http://helm.cs.unibo.it/
+ *
+ * This file is part of HELM, an Hypertextual, Electronic
+ * Library of Mathematics, developed at the Computer Science
+ * Department, University of Bologna, Italy.
+ *
+ * HELM is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * HELM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HELM; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ * For details, see the HELM World-Wide-Web page,
+ * http://helm.cs.unibo.it/
+ *)
+
+val version: string
+val conffile: string
+val empty_xml: string
+
+ (** @return an HTML usage string including configuration information passed as
+ input parameter *)
+val usage_string: string -> string
+
--- /dev/null
+(*
+ * Copyright (C) 2003-2004:
+ * Stefano Zacchiroli <zack@cs.unibo.it>
+ * for the HELM Team http://helm.cs.unibo.it/
+ *
+ * This file is part of HELM, an Hypertextual, Electronic
+ * Library of Mathematics, developed at the Computer Science
+ * Department, University of Bologna, Italy.
+ *
+ * HELM is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * HELM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HELM; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ * For details, see the HELM World-Wide-Web page,
+ * http://helm.cs.unibo.it/
+ *)
+
+let debug = ref true
+
+(* invariant: if logfile is set, then logchan is set too *)
+let logfile = ref None
+let logchan = ref None
+
+let set_logfile f =
+ (match !logchan with None -> () | Some oc -> close_out oc);
+ match f with
+ | Some f ->
+ logfile := Some f;
+ logchan := Some (open_out f)
+ | None ->
+ logfile := None;
+ logchan := None
+
+let get_logfile () = !logfile
+
+let close_logfile () = set_logfile None
+
+let debug_print s =
+ let msg = "[HTTP-Getter] " ^ s in
+ if !debug then
+ match (!logfile, !logchan) with
+ | None, _ -> prerr_endline msg
+ | Some fname, Some oc ->
+ output_string oc msg;
+ flush oc
+ | Some _, None -> assert false
+
--- /dev/null
+(*
+ * Copyright (C) 2003-2004:
+ * Stefano Zacchiroli <zack@cs.unibo.it>
+ * for the HELM Team http://helm.cs.unibo.it/
+ *
+ * This file is part of HELM, an Hypertextual, Electronic
+ * Library of Mathematics, developed at the Computer Science
+ * Department, University of Bologna, Italy.
+ *
+ * HELM is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * HELM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HELM; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ * For details, see the HELM World-Wide-Web page,
+ * http://helm.cs.unibo.it/
+ *)
+
+ (** enable/disable debugging messages *)
+val debug: bool ref
+
+ (** output a debugging message *)
+val debug_print: string -> unit
+
+ (** if set to Some fname, fname will be used as a logfile, otherwise stderr
+ * will be used *)
+val get_logfile: unit -> string option
+val set_logfile: string option -> unit
+val close_logfile: unit -> unit
+
--- /dev/null
+(*
+ * Copyright (C) 2003-2004:
+ * Stefano Zacchiroli <zack@cs.unibo.it>
+ * for the HELM Team http://helm.cs.unibo.it/
+ *
+ * This file is part of HELM, an Hypertextual, Electronic
+ * Library of Mathematics, developed at the Computer Science
+ * Department, University of Bologna, Italy.
+ *
+ * HELM is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * HELM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HELM; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ * For details, see the HELM World-Wide-Web page,
+ * http://helm.cs.unibo.it/
+ *)
+
+open Printf
+open Pxp_document
+open Pxp_types
+open Pxp_yacc
+
+open Http_getter_types
+
+let version = Http_getter_const.version
+
+let servers_file = lazy (Helm_registry.get "getter.servers_file")
+let cic_dbm = lazy (Helm_registry.get "getter.cic_dbm")
+let nuprl_dbm = lazy (Helm_registry.get "getter.nuprl_dbm")
+let rdf_dbm = lazy (Helm_registry.get "getter.rdf_dbm")
+let xsl_dbm = lazy (Helm_registry.get "getter.xsl_dbm")
+let xml_index = lazy (Helm_registry.get "getter.xml_indexname")
+let rdf_index = lazy (Helm_registry.get "getter.rdf_indexname")
+let xsl_index = lazy (Helm_registry.get "getter.xsl_indexname")
+let cic_dir = lazy (Helm_registry.get "getter.cic_dir")
+let nuprl_dir = lazy (Helm_registry.get "getter.nuprl_dir")
+let rdf_dir = lazy (Helm_registry.get "getter.rdf_dir")
+let dtd_dir = lazy (Helm_registry.get "getter.dtd_dir")
+let dtd_base_url = lazy (Helm_registry.get "getter.dtd_base_url")
+let port = lazy (Helm_registry.get_int "getter.port")
+
+let _servers = ref None
+
+let servers =
+ lazy
+ (match !_servers with
+ | None -> failwith "Getter not yet initialized: servers not available"
+ | Some servers -> servers)
+
+let load_servers () =
+ let pos = ref (-1) in
+ List.rev (Http_getter_misc.fold_file
+ (fun line servers ->
+ if Http_getter_misc.is_blank_line line then
+ servers
+ else
+ (incr pos; (!pos, line) :: servers))
+ []
+ (Lazy.force servers_file))
+
+let reload_servers () = _servers := Some (load_servers ())
+
+let save_servers () =
+ let oc = open_out (Lazy.force servers_file) in
+ List.iter (fun (_,server) -> output_string oc (server ^ "\n"))
+ (Lazy.force servers);
+ close_out oc
+
+let host =
+ lazy
+ (let buf = Buffer.create 20 in
+ Shell.call ~stdout:(Shell.to_buffer buf) [Shell.cmd "hostname" ["-f"]];
+ Pcre.replace ~pat:"\n+$" (Buffer.contents buf))
+
+let my_own_url =
+ lazy
+ (let (host, port) = (Lazy.force host, Lazy.force port) in
+ sprintf "http://%s%s" (* without trailing '/' *)
+ host (if port = 80 then "" else (sprintf ":%d" port)))
+
+let cache_mode =
+ lazy
+ (match String.lowercase (Helm_registry.get "getter.cache_mode") with
+ | "normal" -> Enc_normal
+ | "gz" -> Enc_gzipped
+ | mode -> failwith ("Invalid cache mode: " ^ mode))
+
+let reload () = reload_servers ()
+
+let env_to_string () =
+ sprintf
+"HTTP Getter %s (the OCaml one!)
+
+cic_dbm:\t%s
+nuprl_dbm:\t%s
+rdf_dbm:\t%s
+xsl_dbm:\t%s
+xml_index:\t%s
+rdf_index:\t%s
+xsl_index:\t%s
+cic_dir:\t%s
+nuprl_dir:\t%s
+rdf_dir:\t%s
+dtd_dir:\t%s
+servers_file:\t%s
+host:\t\t%s
+port:\t\t%d
+my_own_url:\t%s
+dtd_base_url:\t%s
+cache_mode:\t%s
+servers:
+\t%s
+"
+ version (Lazy.force cic_dbm) (Lazy.force nuprl_dbm) (Lazy.force rdf_dbm)
+ (Lazy.force xsl_dbm) (Lazy.force xml_index)
+ (Lazy.force rdf_index) (Lazy.force xsl_index) (Lazy.force cic_dir)
+ (Lazy.force nuprl_dir) (Lazy.force rdf_dir)
+ (Lazy.force dtd_dir) (Lazy.force servers_file) (Lazy.force host)
+ (Lazy.force port) (Lazy.force my_own_url)
+ (Lazy.force dtd_base_url)
+ (match Lazy.force cache_mode with Enc_normal -> "Normal" | Enc_gzipped -> "GZipped")
+ (String.concat "\n\t" (* (position * server) list *)
+ (List.map (fun (pos, server) -> sprintf "%3d: %s" pos server)
+ (Lazy.force servers)))
+
+let add_server ?position url =
+ let new_servers =
+ let servers = Lazy.force servers in
+ match position with
+ | None -> servers @ [-1, url];
+ | Some p when p > 0 ->
+ let rec add_after pos = function
+ | [] -> [-1, url]
+ | hd :: tl when p = 1 -> hd :: (-1, url) :: tl
+ | hd :: tl (* when p > 1 *) -> hd :: (add_after (pos - 1) tl)
+ in
+ add_after p servers
+ | Some _ -> assert false
+ in
+ _servers := Some new_servers;
+ save_servers ();
+ reload_servers ()
+
+let remove_server position =
+ _servers := Some (List.remove_assoc position (Lazy.force servers));
+ save_servers ();
+ reload_servers ()
+
--- /dev/null
+(*
+ * Copyright (C) 2003-2004:
+ * Stefano Zacchiroli <zack@cs.unibo.it>
+ * for the HELM Team http://helm.cs.unibo.it/
+ *
+ * This file is part of HELM, an Hypertextual, Electronic
+ * Library of Mathematics, developed at the Computer Science
+ * Department, University of Bologna, Italy.
+ *
+ * HELM is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * HELM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HELM; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ * For details, see the HELM World-Wide-Web page,
+ * http://helm.cs.unibo.it/
+ *)
+
+open Http_getter_types;;
+
+ (* {2 general information} *)
+
+val version : string (* getter version *)
+
+ (* {2 environment gathered data} *)
+
+val cic_dbm : string lazy_t (* XML map DBM file for CIC *)
+val nuprl_dbm : string lazy_t (* XML map DBM file for NuPRL *)
+val rdf_dbm : string lazy_t (* RDF map DBM file *)
+val xsl_dbm : string lazy_t (* XSL map DBM file *)
+val xml_index : string lazy_t (* XMLs' index *)
+val rdf_index : string lazy_t (* RDFs' index *)
+val xsl_index : string lazy_t (* XSLTs' index *)
+val cic_dir : string lazy_t (* XMLs' directory for CIC*)
+val nuprl_dir : string lazy_t (* XMLs' directory for NuPRL*)
+val rdf_dir : string lazy_t (* RDFs' directory *)
+val dtd_dir : string lazy_t (* DTDs' root directory *)
+val servers_file : string lazy_t (* servers.txt file *)
+val port : int lazy_t (* port on which getter listens *)
+val dtd_base_url : string lazy_t (* base URL for DTD downloading *)
+
+ (* {2 derived data} *)
+
+val host : string lazy_t (* host on which getter listens *)
+val my_own_url : string lazy_t (* URL at which contact getter *)
+val servers : (int * string) list lazy_t
+ (* (position * server) list *)
+val cache_mode : encoding lazy_t (* cached files encoding *)
+
+ (* {2 dynamic configuration changes} *)
+
+ (* add a server to servers list in a given position (defaults to "after the
+ last server", change servers file accordingly and reload servers list *)
+val add_server: ?position:int -> string -> unit
+ (* remove a server from servers list, change servers file accordingly and
+ reload servers list *)
+val remove_server: int -> unit
+
+ (* {2 misc} *)
+
+val reload: unit -> unit (* reload configuration information *)
+val env_to_string : unit -> string (* dump a textual representation of the
+ current http_getter settings on an output
+ channel *)
+
--- /dev/null
+(*
+ * Copyright (C) 2003-2004:
+ * Stefano Zacchiroli <zack@cs.unibo.it>
+ * for the HELM Team http://helm.cs.unibo.it/
+ *
+ * This file is part of HELM, an Hypertextual, Electronic
+ * Library of Mathematics, developed at the Computer Science
+ * Department, University of Bologna, Italy.
+ *
+ * HELM is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * HELM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HELM; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ * For details, see the HELM World-Wide-Web page,
+ * http://helm.cs.unibo.it/
+ *)
+
+exception Key_already_in of string;;
+exception Key_not_found of string;;
+
+class map dbname =
+ let perm = 420 in (* permission 644 in decimal notation *)
+ let open_dbm () = Dbm.opendbm dbname [ Dbm.Dbm_rdwr; Dbm.Dbm_create ] perm in
+
+ object (self)
+
+ inherit ThreadSafe.threadSafe
+
+ val mutable db = open_dbm ()
+
+ (*initializer Gc.finalise Dbm.close db (* close db on GC *)*)
+
+ method add key value =
+ self#doWriter (lazy (
+ try
+ Dbm.add db key value
+ with Dbm.Dbm_error "Entry already exists" -> raise (Key_already_in key)
+ ))
+
+ method replace key value =
+ self#doWriter (lazy (
+ Dbm.replace db key value
+ ))
+
+ method remove key =
+ self#doWriter (lazy (
+ try
+ Dbm.remove db key
+ with Dbm.Dbm_error "dbm_delete" -> raise (Key_not_found key)
+ ))
+
+ method resolve key =
+ self#doReader (lazy (
+ try Dbm.find db key with Not_found -> raise (Key_not_found key)
+ ))
+
+ method iter (f: string -> string -> unit) =
+ self#doReader (lazy (
+ Dbm.iter f db
+ ))
+
+ method sync =
+ self#doWriter (lazy (
+ Dbm.close db;
+ db <- open_dbm ()
+ ))
+
+ method clear =
+ self#doWriter (lazy (
+ Dbm.close db;
+ List.iter
+ (fun ext ->
+ let file = dbname ^ ext in
+ if Sys.file_exists file then Sys.remove file)
+ [".dir"; ".pag"; ".db"];
+ db <- open_dbm ()
+ ))
+
+ method close =
+ self#doWriter (lazy (
+ Dbm.close db
+ ))
+
+ end
+
--- /dev/null
+(*
+ * Copyright (C) 2003-2004:
+ * Stefano Zacchiroli <zack@cs.unibo.it>
+ * for the HELM Team http://helm.cs.unibo.it/
+ *
+ * This file is part of HELM, an Hypertextual, Electronic
+ * Library of Mathematics, developed at the Computer Science
+ * Department, University of Bologna, Italy.
+ *
+ * HELM is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * HELM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HELM; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ * For details, see the HELM World-Wide-Web page,
+ * http://helm.cs.unibo.it/
+ *)
+
+exception Key_already_in of string
+exception Key_not_found of string
+
+class map:
+ string ->
+ object
+ method add: string -> string -> unit
+ method replace: string -> string -> unit
+ method remove: string -> unit
+ method resolve: string -> string
+ method iter: (string -> string -> unit) -> unit
+ method sync: unit
+ method clear: unit
+
+ method close: unit (* use with caution! *)
+ end
--- /dev/null
+(*
+ * Copyright (C) 2003-2004:
+ * Stefano Zacchiroli <zack@cs.unibo.it>
+ * for the HELM Team http://helm.cs.unibo.it/
+ *
+ * This file is part of HELM, an Hypertextual, Electronic
+ * Library of Mathematics, developed at the Computer Science
+ * Department, University of Bologna, Italy.
+ *
+ * HELM is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * HELM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HELM; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ * For details, see the HELM World-Wide-Web page,
+ * http://helm.cs.unibo.it/
+ *)
+
+open Printf
+
+open Http_getter_debugger
+
+let trailing_dot_gz_RE = Pcre.regexp "\\.gz$" (* for g{,un}zip *)
+let url_RE = Pcre.regexp "^([\\w.-]+)(:(\\d+))?(/.*)?$"
+let http_scheme_RE = Pcre.regexp ~flags:[`CASELESS] "^http://"
+let file_scheme_RE = Pcre.regexp ~flags:[`CASELESS] "^file://"
+let dir_sep_RE = Pcre.regexp "/"
+let heading_slash_RE = Pcre.regexp "^/"
+
+let bufsiz = 16384 (* for file system I/O *)
+let tcp_bufsiz = 4096 (* for TCP I/O *)
+
+let fold_file f init fname =
+ let ic = open_in fname in
+ let rec aux acc =
+ let line = try Some (input_line ic) with End_of_file -> None in
+ match line with
+ | None -> acc
+ | Some line -> aux (f line acc)
+ in
+ let res = try aux init with e -> close_in ic; raise e in
+ close_in ic;
+ res
+
+let iter_file f = fold_file (fun line _ -> f line) ()
+
+let hashtbl_sorted_fold f tbl init =
+ let sorted_keys =
+ List.sort compare (Hashtbl.fold (fun key _ keys -> key::keys) tbl [])
+ in
+ List.fold_left (fun acc k -> f k (Hashtbl.find tbl k) acc) init sorted_keys
+
+let hashtbl_sorted_iter f tbl =
+ let sorted_keys =
+ List.sort compare (Hashtbl.fold (fun key _ keys -> key::keys) tbl [])
+ in
+ List.iter (fun k -> f k (Hashtbl.find tbl k)) sorted_keys
+
+let cp src dst =
+ let (ic, oc) = (open_in src, open_out dst) in
+ let buf = String.create bufsiz in
+ (try
+ while true do
+ let bytes = input ic buf 0 bufsiz in
+ if bytes = 0 then raise End_of_file else output oc buf 0 bytes
+ done
+ with End_of_file -> ());
+ close_in ic; close_out oc
+
+let parse_url url =
+ try
+ let subs =
+ Pcre.extract ~rex:url_RE (Pcre.replace ~rex:http_scheme_RE url)
+ in
+ (subs.(1),
+ (if subs.(2) = "" then 80 else int_of_string subs.(3)),
+ (if subs.(4) = "" then "/" else subs.(4)))
+ with exc ->
+ failwith
+ (sprintf "Can't parse url: %s (exception: %s)"
+ url (Printexc.to_string exc))
+let init_socket addr port =
+ let inet_addr = (Unix.gethostbyname addr).Unix.h_addr_list.(0) in
+ let sockaddr = Unix.ADDR_INET (inet_addr, port) in
+ let suck = Unix.socket Unix.PF_INET Unix.SOCK_STREAM 0 in
+ Unix.connect suck sockaddr;
+ let outchan = Unix.out_channel_of_descr suck in
+ let inchan = Unix.in_channel_of_descr suck in
+ (inchan, outchan)
+let http_get_iter_buf ~callback url =
+ let (address, port, path) = parse_url url in
+ let buf = String.create tcp_bufsiz in
+ let (inchan, outchan) = init_socket address port in
+ output_string outchan (sprintf "GET %s\r\n" path);
+ flush outchan;
+ (try
+ while true do
+ match input inchan buf 0 tcp_bufsiz with
+ | 0 -> raise End_of_file
+ | bytes when bytes = tcp_bufsiz -> (* buffer full, no need to slice it *)
+ callback buf
+ | bytes when bytes < tcp_bufsiz -> (* buffer not full, slice it *)
+ callback (String.sub buf 0 bytes)
+ | _ -> (* ( bytes < 0 ) || ( bytes > tcp_bufsiz ) *)
+ assert false
+ done
+ with End_of_file -> ());
+ close_in inchan (* close also outchan, same fd *)
+
+let wget ?output url =
+ debug_print
+ (sprintf "wgetting %s (output: %s)" url
+ (match output with None -> "default" | Some f -> f));
+ match url with
+ | url when Pcre.pmatch ~rex:file_scheme_RE url -> (* file:// *)
+ (let src_fname = Pcre.replace ~rex:file_scheme_RE url in
+ match output with
+ | Some dst_fname -> cp src_fname dst_fname
+ | None ->
+ let dst_fname = Filename.basename src_fname in
+ if src_fname <> dst_fname then
+ cp src_fname dst_fname
+ else (* src and dst are the same: do nothing *)
+ ())
+ | url when Pcre.pmatch ~rex:http_scheme_RE url -> (* http:// *)
+ (let oc =
+ open_out (match output with Some f -> f | None -> Filename.basename url)
+ in
+ http_get_iter_buf ~callback:(fun data -> output_string oc data) url;
+ close_out oc)
+ | scheme -> (* unsupported scheme *)
+ failwith ("Http_getter_misc.wget: unsupported scheme: " ^ scheme)
+
+let gzip ?(keep = false) ?output fname =
+ let output = match output with None -> fname ^ ".gz" | Some fname -> fname in
+ debug_print (sprintf "gzipping %s (keep: %b, output: %s)" fname keep output);
+ let (ic, oc) = (open_in fname, Gzip.open_out output) in
+ let buf = String.create bufsiz in
+ (try
+ while true do
+ let bytes = input ic buf 0 bufsiz in
+ if bytes = 0 then raise End_of_file else Gzip.output oc buf 0 bytes
+ done
+ with End_of_file -> ());
+ close_in ic; Gzip.close_out oc;
+ if not keep then Sys.remove fname
+;;
+
+let gunzip ?(keep = false) ?output fname =
+ (* assumption: given file name ends with ".gz" or output is set *)
+ let output =
+ match output with
+ | None ->
+ if (Pcre.pmatch ~rex:trailing_dot_gz_RE fname) then
+ Pcre.replace ~rex:trailing_dot_gz_RE fname
+ else
+ failwith
+ "Http_getter_misc.gunzip: unable to determine output file name"
+ | Some fname -> fname
+ in
+ debug_print (sprintf "gunzipping %s (keep: %b, output: %s)"
+ fname keep output);
+ let (ic, oc) = (Gzip.open_in fname, open_out output) in
+ let buf = String.create bufsiz in
+ (try
+ while true do
+ let bytes = Gzip.input ic buf 0 bufsiz in
+ if bytes = 0 then raise End_of_file else Pervasives.output oc buf 0 bytes
+ done
+ with End_of_file -> ());
+ Gzip.close_in ic; close_out oc;
+ if not keep then Sys.remove fname
+;;
+
+let tempfile () = Filename.temp_file "http_getter_" ""
+
+exception Mkdir_failure of string * string;; (* dirname, failure reason *)
+let dir_perm = 0o755
+
+let mkdir ?(parents = false) dirname =
+ let mkdirhier () =
+ let (pieces, hd) =
+ let split = Pcre.split ~rex:dir_sep_RE dirname in
+ if Pcre.pmatch ~rex:heading_slash_RE dirname then
+ (List.tl split, "/")
+ else
+ (split, "")
+ in
+ ignore
+ (List.fold_left
+ (fun pre dir ->
+ let next_dir =
+ sprintf "%s%s%s" pre (match pre with "/" | "" -> "" | _ -> "/") dir
+ in
+ (try
+ (match (Unix.stat next_dir).Unix.st_kind with
+ | Unix.S_DIR -> () (* dir component already exists, go on! *)
+ | _ -> (* dir component already exists but isn't a dir, abort! *)
+ raise
+ (Mkdir_failure (dirname,
+ sprintf "'%s' already exists but is not a dir" next_dir)))
+ with Unix.Unix_error (Unix.ENOENT, "stat", _) ->
+ (* dir component doesn't exists, create it and go on! *)
+ Unix.mkdir next_dir dir_perm);
+ next_dir)
+ hd pieces)
+ in
+ if parents then mkdirhier () else Unix.mkdir dirname dir_perm
+
+let string_of_proc_status = function
+ | Unix.WEXITED code -> sprintf "[Exited: %d]" code
+ | Unix.WSIGNALED sg -> sprintf "[Killed: %d]" sg
+ | Unix.WSTOPPED sg -> sprintf "[Stopped: %d]" sg
+
+let http_get url =
+ if Pcre.pmatch ~rex:file_scheme_RE url then begin
+ (* file:// URL. Read data from file system *)
+ let fname = Pcre.replace ~rex:file_scheme_RE url in
+ try
+ let size = (Unix.stat fname).Unix.st_size in
+ let buf = String.create size in
+ let ic = open_in fname in
+ really_input ic buf 0 size;
+ close_in ic;
+ Some buf
+ with Unix.Unix_error (Unix.ENOENT, "stat", _) -> None
+ end else (* other URL, pass it to Http_client *)
+ try
+ Some (Http_client.http_get url)
+ with e ->
+ prerr_endline (sprintf
+ "Warning: Http_client failed on url %s with exception: %s"
+ url (Printexc.to_string e));
+ None
+
+let is_blank_line =
+ let blank_line_RE = Pcre.regexp "(^#)|(^\\s*$)" in
+ fun line ->
+ Pcre.pmatch ~rex:blank_line_RE line
+
--- /dev/null
+(*
+ * Copyright (C) 2003-2004:
+ * Stefano Zacchiroli <zack@cs.unibo.it>
+ * for the HELM Team http://helm.cs.unibo.it/
+ *
+ * This file is part of HELM, an Hypertextual, Electronic
+ * Library of Mathematics, developed at the Computer Science
+ * Department, University of Bologna, Italy.
+ *
+ * HELM is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * HELM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HELM; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ * For details, see the HELM World-Wide-Web page,
+ * http://helm.cs.unibo.it/
+ *)
+
+ (** 'mkdir' failed, arguments are: name of the directory to be created and
+ failure reason *)
+exception Mkdir_failure of string * string
+
+ (** "fold_left" like function on file lines, trailing newline is not passed to
+ the given function *)
+val fold_file : (string -> 'a -> 'a) -> 'a -> string -> 'a
+ (* "iter" like function on file lines, trailing newline is not passed to the
+ given function *)
+val iter_file : (string -> unit) -> string -> unit
+
+ (** like Hashtbl.fold but keys are processed ordered *)
+val hashtbl_sorted_fold :
+ ('a -> 'b -> 'c -> 'c) -> ('a, 'b) Hashtbl.t -> 'c -> 'c
+ (** like Hashtbl.iter but keys are processed ordered *)
+val hashtbl_sorted_iter : ('a -> 'b -> unit) -> ('a, 'b) Hashtbl.t -> unit
+
+ (** cp frontend *)
+val cp: string -> string -> unit
+ (** wget frontend, if output is given it is the destination file, otherwise
+ standard wget rules are used. Additionally this function support also the
+ "file://" scheme for file system addressing *)
+val wget: ?output: string -> string -> unit
+ (** gzip frontend. If keep = true original file will be kept, default is
+ false. output is the file on which gzipped data will be saved, default is
+ given file with an added ".gz" suffix *)
+val gzip: ?keep: bool -> ?output: string -> string -> unit
+ (** gunzip frontend. If keep = true original file will be kept, default is
+ false. output is the file on which gunzipped data will be saved, default is
+ given file name without trailing ".gz" *)
+val gunzip: ?keep: bool -> ?output: string -> string -> unit
+ (** tempfile frontend, return the name of created file. A special purpose
+ suffix is used (actually "_http_getter" *)
+val tempfile: unit -> string
+ (** mkdir frontend, if parents = true also parent directories will be created.
+ If the given directory already exists doesn't act *)
+val mkdir: ?parents: bool -> string -> unit
+
+ (** pretty printer for Unix.process_status values *)
+val string_of_proc_status : Unix.process_status -> string
+
+ (** raw HTTP downloader, return Some the contents of downloaded resource or
+ None if an error occured while downloading. This function support also
+ "file://" scheme for filesystem resources *)
+val http_get: string -> string option
+ (** 'iter' like method that iter over string slices (unspecified length) of a
+ remote resources fetched via HTTP GET requests *)
+val http_get_iter_buf: callback:(string -> unit) -> string -> unit
+
+ (** true on blanks-only and #-commented lines, false otherwise *)
+val is_blank_line: string -> bool
+
--- /dev/null
+(*
+ * Copyright (C) 2003-2004:
+ * Stefano Zacchiroli <zack@cs.unibo.it>
+ * for the HELM Team http://helm.cs.unibo.it/
+ *
+ * This file is part of HELM, an Hypertextual, Electronic
+ * Library of Mathematics, developed at the Computer Science
+ * Department, University of Bologna, Italy.
+ *
+ * HELM is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * HELM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HELM; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ * For details, see the HELM World-Wide-Web page,
+ * http://helm.cs.unibo.it/
+ *)
+
+exception Bad_request of string
+exception Unresolvable_URI of string
+exception Invalid_URI of string
+exception Invalid_URL of string
+exception Invalid_RDF_class of string
+exception Internal_error of string
+
+type encoding = Enc_normal | Enc_gzipped
+type answer_format = Fmt_text | Fmt_xml
+type ls_flag = Yes | No | Ann
+type ls_object =
+ {
+ uri: string;
+ ann: bool;
+ types: ls_flag;
+ body: ls_flag;
+ proof_tree: ls_flag;
+ }
+type ls_item =
+ | Ls_section of string
+ | Ls_object of ls_object
+
+type xml_uri =
+ | Cic of string
+ | Theory of string
+type rdf_uri = string * xml_uri
+type nuprl_uri = string
+type uri =
+ | Cic_uri of xml_uri
+ | Nuprl_uri of nuprl_uri
+ | Rdf_uri of rdf_uri
+
+module StringSet = Set.Make (String)
+