From c10721e8910fc80786cf668d0f843e3da2598a21 Mon Sep 17 00:00:00 2001 From: Stefano Zacchiroli Date: Wed, 11 Feb 2004 17:17:26 +0000 Subject: [PATCH] getter's revolution (now uses backend in ocaml/getter) --- helm/http_getter/.depend | 40 --- helm/http_getter/Makefile | 76 +---- helm/http_getter/NEWS | 7 + helm/http_getter/http_getter.ml | 329 ---------------------- helm/http_getter/http_getter.mli | 52 ---- helm/http_getter/http_getter_cache.ml | 211 -------------- helm/http_getter/http_getter_cache.mli | 44 --- helm/http_getter/http_getter_common.ml | 137 --------- helm/http_getter/http_getter_common.mli | 73 ----- helm/http_getter/http_getter_const.ml | 111 -------- helm/http_getter/http_getter_const.mli | 36 --- helm/http_getter/http_getter_debugger.ml | 58 ---- helm/http_getter/http_getter_debugger.mli | 40 --- helm/http_getter/http_getter_env.ml | 193 ------------- helm/http_getter/http_getter_env.mli | 77 ----- helm/http_getter/http_getter_map.ml | 96 ------- helm/http_getter/http_getter_map.mli | 44 --- helm/http_getter/http_getter_misc.ml | 250 ---------------- helm/http_getter/http_getter_misc.mli | 80 ------ helm/http_getter/http_getter_types.ml | 62 ---- helm/http_getter/main.ml | 23 +- helm/http_getter/threadSafe.ml | 98 ------- helm/http_getter/threadSafe.mli | 44 --- 23 files changed, 28 insertions(+), 2153 deletions(-) delete mode 100644 helm/http_getter/.depend delete mode 100644 helm/http_getter/http_getter.ml delete mode 100644 helm/http_getter/http_getter.mli delete mode 100644 helm/http_getter/http_getter_cache.ml delete mode 100644 helm/http_getter/http_getter_cache.mli delete mode 100644 helm/http_getter/http_getter_common.ml delete mode 100644 helm/http_getter/http_getter_common.mli delete mode 100644 helm/http_getter/http_getter_const.ml delete mode 100644 helm/http_getter/http_getter_const.mli delete mode 100644 helm/http_getter/http_getter_debugger.ml delete mode 100644 helm/http_getter/http_getter_debugger.mli delete mode 100644 helm/http_getter/http_getter_env.ml delete mode 100644 helm/http_getter/http_getter_env.mli delete mode 100644 helm/http_getter/http_getter_map.ml delete mode 100644 helm/http_getter/http_getter_map.mli delete mode 100644 helm/http_getter/http_getter_misc.ml delete mode 100644 helm/http_getter/http_getter_misc.mli delete mode 100644 helm/http_getter/http_getter_types.ml delete mode 100644 helm/http_getter/threadSafe.ml delete mode 100644 helm/http_getter/threadSafe.mli diff --git a/helm/http_getter/.depend b/helm/http_getter/.depend deleted file mode 100644 index 987d8acf5..000000000 --- a/helm/http_getter/.depend +++ /dev/null @@ -1,40 +0,0 @@ -http_getter_cache.cmo: http_getter_common.cmi http_getter_debugger.cmi \ - http_getter_env.cmi http_getter_misc.cmi http_getter_types.cmo \ - threadSafe.cmi http_getter_cache.cmi -http_getter_cache.cmx: http_getter_common.cmx http_getter_debugger.cmx \ - http_getter_env.cmx http_getter_misc.cmx http_getter_types.cmx \ - threadSafe.cmx http_getter_cache.cmi -http_getter_common.cmo: http_getter_env.cmi http_getter_misc.cmi \ - http_getter_types.cmo http_getter_common.cmi -http_getter_common.cmx: http_getter_env.cmx http_getter_misc.cmx \ - http_getter_types.cmx http_getter_common.cmi -http_getter_const.cmo: http_getter_const.cmi -http_getter_const.cmx: http_getter_const.cmi -http_getter_debugger.cmo: http_getter_debugger.cmi -http_getter_debugger.cmx: http_getter_debugger.cmi -http_getter_env.cmo: http_getter_const.cmi http_getter_misc.cmi \ - http_getter_types.cmo http_getter_env.cmi -http_getter_env.cmx: http_getter_const.cmx http_getter_misc.cmx \ - http_getter_types.cmx http_getter_env.cmi -http_getter_map.cmo: threadSafe.cmi http_getter_map.cmi -http_getter_map.cmx: threadSafe.cmx http_getter_map.cmi -http_getter_misc.cmo: http_getter_debugger.cmi http_getter_misc.cmi -http_getter_misc.cmx: http_getter_debugger.cmx http_getter_misc.cmi -http_getter.cmo: http_getter.cmi http_getter_common.cmi http_getter_const.cmi \ - http_getter_env.cmi http_getter_map.cmi http_getter_types.cmo \ - http_getter.cmi -http_getter.cmx: http_getter.cmx http_getter_common.cmx http_getter_const.cmx \ - http_getter_env.cmx http_getter_map.cmx http_getter_types.cmx \ - http_getter.cmi -main.cmo: http_getter.cmi http_getter_cache.cmi http_getter_common.cmi \ - http_getter_const.cmi http_getter_debugger.cmi http_getter_env.cmi \ - http_getter_map.cmi http_getter_misc.cmi http_getter_types.cmo -main.cmx: http_getter.cmx http_getter_cache.cmx http_getter_common.cmx \ - http_getter_const.cmx http_getter_debugger.cmx http_getter_env.cmx \ - http_getter_map.cmx http_getter_misc.cmx http_getter_types.cmx -threadSafe.cmo: http_getter_debugger.cmi threadSafe.cmi -threadSafe.cmx: http_getter_debugger.cmx threadSafe.cmi -http_getter_cache.cmi: http_getter_types.cmo -http_getter_common.cmi: http_getter_types.cmo -http_getter_env.cmi: http_getter_types.cmo -http_getter.cmi: http_getter_map.cmi diff --git a/helm/http_getter/Makefile b/helm/http_getter/Makefile index 13e939b06..bf0c96ac8 100644 --- a/helm/http_getter/Makefile +++ b/helm/http_getter/Makefile @@ -1,89 +1,25 @@ VERSION = 0.3.0 NAME = http_getter -DISTDIR = http-getter-$(VERSION) -EXTRA_DIST = AUTHORS COPYING NEWS README BUGS -DOCS = doc/http_getter.conf.xml.sample - -REQUIRES = \ - http dbm pcre pxp shell threads zip \ - helm-logger +REQUIRES = helm-getter helm-logger helm-registry COMMONOPTS = -package "$(REQUIRES)" -pp camlp4o OCAMLFIND = ocamlfind -OCAMLC = $(OCAMLFIND) ocamlc -thread $(COMMONOPTS) -OCAMLOPT = $(OCAMLFIND) ocamlopt -thread $(COMMONOPTS) -OCAMLDEP = $(OCAMLFIND) ocamldep $(COMMONOPTS) -OCAMLDOC = \ - ocamldoc \ - $(shell $(OCAMLFIND) query -i-format http) \ - $(shell $(OCAMLFIND) query -i-format dbm) \ - $(shell $(OCAMLFIND) query -i-format pcre) \ - $(shell $(OCAMLFIND) query -i-format pxp) \ - $(shell $(OCAMLFIND) query -i-format shell) \ - $(shell $(OCAMLFIND) query -i-format threads) \ - $(shell $(OCAMLFIND) query -i-format zip) -MODULES = \ - http_getter_debugger threadSafe \ - http_getter_types http_getter_misc http_getter_const \ - http_getter_env http_getter_common http_getter_map \ - http_getter_cache http_getter - -OBJS = $(patsubst %,%.cmo,$(MODULES)) -OBJSOPT = $(patsubst %,%.cmx,$(MODULES)) +OCAMLC = $(OCAMLFIND) ocamlc -g $(COMMONOPTS) +OCAMLOPT = $(OCAMLFIND) opt $(COMMONOPTS) all: byte byte: $(NAME) opt: $(NAME).opt world: byte opt -include .depend -depend: - $(OCAMLDEP) *.ml *.mli > .depend - -%.cmi: %.mli - $(OCAMLC) -c $< -%.cmo: %.ml %.cmi - $(OCAMLC) -c $< -%.cmx: %.ml %.cmi - $(OCAMLOPT) -c $< -include Makefile.overrides -$(NAME).cmo: $(NAME).ml - $(OCAMLC) -c $< -$(NAME).cmx: $(NAME).ml - $(OCAMLOPT) -c $< -$(NAME): $(OBJS) main.ml +$(NAME): main.ml $(OCAMLC) -linkpkg -thread -o $@ $^ -$(NAME).opt: $(OBJSOPT) main.ml +$(NAME).opt: main.ml $(OCAMLOPT) -linkpkg -thread -o $@ $^ -http_getter.dot: *.ml *.mli - $(OCAMLDOC) -dot -o $@ $^ - distclean: clean clean: rm -f *.cm[aiox] *.o $(NAME){,.opt} *.dot -dist: distclean depend - if [ -d $(DISTDIR) ]; then rm -rf $(DISTDIR); else true; fi - mkdir $(DISTDIR)/ - mkdir $(DISTDIR)/doc - cp $(DOCS) $(DISTDIR)/doc/ - for m in $(patsubst %, %.mli, $(MODULES)); do \ - if [ "$$m" != "http_getter_types.mli" ]; then \ - cp $$m $(DISTDIR)/; \ - fi; \ - done - cp \ - $(patsubst %, %.ml, $(MODULES)) \ - $(NAME).ml \ - Makefile Makefile.overrides .depend \ - $(DISTDIR)/ - cp $(EXTRA_DIST) $(DISTDIR)/ - tar cvzf $(DISTDIR).tar.gz $(DISTDIR)/ - rm -rf $(DISTDIR)/ -distcheck: dist - tar xvzf $(DISTDIR).tar.gz - cd $(DISTDIR); make - rm -rf $(DISTDIR) -.PHONY: all byte dist distcheck opt world depend clean distclean +.PHONY: all byte opt world clean distclean diff --git a/helm/http_getter/NEWS b/helm/http_getter/NEWS index cbb967e21..aaf0047a2 100644 --- a/helm/http_getter/NEWS +++ b/helm/http_getter/NEWS @@ -1,4 +1,11 @@ +09/02/2004 + + Split getter in backend and frontend. Frontend is the only remaining file here + (namely main.ml). Backend is shipped in the helm-http_getter library (look in + the HELM cvs, dir ocaml/getter/). + -- Zack + 08/01/2003 Fully OCaml HTTP Getter is now available: go and burn the Perl implementation! diff --git a/helm/http_getter/http_getter.ml b/helm/http_getter/http_getter.ml deleted file mode 100644 index 32c4882df..000000000 --- a/helm/http_getter/http_getter.ml +++ /dev/null @@ -1,329 +0,0 @@ -(* - * Copyright (C) 2003-2004: - * Stefano Zacchiroli - * for the HELM Team http://helm.cs.unibo.it/ - * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://helm.cs.unibo.it/ - *) - -open Printf - -open Http_getter_common -open Http_getter_misc -open Http_getter_debugger -open Http_getter_types - -let (index_line_sep_RE, index_sep_RE, trailing_types_RE, - heading_cic_RE, heading_theory_RE, heading_nuprl_RE, - heading_rdf_cic_RE, heading_rdf_theory_RE) = - (Pcre.regexp "[ \t]+", Pcre.regexp "\r\n|\r|\n", - Pcre.regexp "\\.types$", - Pcre.regexp "^cic:", Pcre.regexp "^theory:", Pcre.regexp "^nuprl:", - Pcre.regexp "^helm:rdf.*//cic:", Pcre.regexp "^helm:rdf.*//theory:") - - (* global maps, shared by all threads *) - -let cic_map = new Http_getter_map.map Http_getter_env.cic_dbm -let nuprl_map = new Http_getter_map.map Http_getter_env.nuprl_dbm -let rdf_map = new Http_getter_map.map Http_getter_env.rdf_dbm -let xsl_map = new Http_getter_map.map Http_getter_env.xsl_dbm - -let maps = [ cic_map; nuprl_map; rdf_map; xsl_map ] -let close_maps () = List.iter (fun m -> m#close) maps -let clear_maps () = List.iter (fun m -> m#clear) maps -let sync_maps () = List.iter (fun m -> m#sync) maps - -let map_of_uri = function - | uri when is_cic_uri uri -> cic_map - | uri when is_nuprl_uri uri -> nuprl_map - | uri when is_rdf_uri uri -> rdf_map - | uri when is_xsl_uri uri -> xsl_map - | uri -> raise (Unresolvable_URI uri) - -let update_from_server logmsg server_url = (* use global maps *) - debug_print ("Updating information from " ^ server_url); - let xml_url_of_uri = function - (* TODO missing sanity checks on server_url, e.g. it can contains $1 *) - | uri when (Pcre.pmatch ~rex:heading_cic_RE uri) -> - Pcre.replace ~rex:heading_cic_RE ~templ:server_url uri - | uri when (Pcre.pmatch ~rex:heading_theory_RE uri) -> - Pcre.replace ~rex:heading_theory_RE ~templ:server_url uri - | uri when (Pcre.pmatch ~rex:heading_nuprl_RE uri) -> - Pcre.replace ~rex:heading_nuprl_RE ~templ:server_url uri - | uri -> raise (Invalid_URI uri) - in - let rdf_url_of_uri = function (* TODO as above *) - | uri when (Pcre.pmatch ~rex:heading_rdf_cic_RE uri) -> - Pcre.replace ~rex:heading_rdf_cic_RE ~templ:server_url uri - | uri when (Pcre.pmatch ~rex:heading_rdf_theory_RE uri) -> - Pcre.replace ~rex:heading_rdf_theory_RE ~templ:server_url uri - | uri -> raise (Invalid_URI uri) - in - let log = ref (`T ("Processing server: " ^ server_url) :: logmsg) in - let (xml_index, rdf_index, xsl_index) = - (* TODO keeps index in memory, is better to keep them on temp files? *) - (http_get (server_url ^ "/" ^ Http_getter_env.xml_index), - http_get (server_url ^ "/" ^ Http_getter_env.rdf_index), - http_get (server_url ^ "/" ^ Http_getter_env.xsl_index)) - in - if (xml_index = None && rdf_index = None && xsl_index = None) then - debug_print (sprintf "Warning: useless server %s" server_url); - (match xml_index with - | Some xml_index -> - (log := `T "Updating XML db ...
" :: !log; - List.iter - (function - | l when is_blank_line l -> () (* skip blank and commented lines *) - | l -> - (try - (match Pcre.split ~rex:index_line_sep_RE l with - | [uri; "gz"] -> - assert (is_cic_uri uri || is_nuprl_uri uri) ; - (map_of_uri uri)#replace - uri ((xml_url_of_uri uri) ^ ".xml.gz") - | [uri] -> - assert (is_cic_uri uri || is_nuprl_uri uri) ; - (map_of_uri uri)#replace - uri ((xml_url_of_uri uri) ^ ".xml") - | _ -> log := `T ("Ignoring invalid line: '" ^ l) :: !log) - with Invalid_URI uri -> - log := `T ("Ignoring invalid XML URI: '" ^ l) :: !log)) - (Pcre.split ~rex:index_sep_RE xml_index); (* xml_index lines *) - log := `T "All done" :: !log) - | None -> ()); - (match rdf_index with - | Some rdf_index -> - (log := `T "Updating RDF db ..." :: !log; - List.iter - (fun l -> - try - (match Pcre.split ~rex:index_line_sep_RE l with - | [uri; "gz"] -> - rdf_map#replace uri - ((rdf_url_of_uri uri) ^ ".xml.gz") - | [uri] -> - rdf_map#replace uri ((rdf_url_of_uri uri) ^ ".xml") - | _ -> log := `T ("Ignoring invalid line: '" ^ l) :: !log) - with Invalid_URI uri -> - log := `T ("Ignoring invalid RDF URI: '" ^ l) :: !log) - (Pcre.split ~rex:index_sep_RE rdf_index); (* rdf_index lines *) - log := `T "All done" :: !log) - | None -> ()); - (match xsl_index with - | Some xsl_index -> - (log := `T "Updating XSLT db ..." :: !log; - List.iter - (fun l -> xsl_map#replace l (server_url ^ "/" ^ l)) - (Pcre.split ~rex:index_sep_RE xsl_index); - log := `T "All done" :: !log) - | None -> ()); - debug_print "done with this server"; - !log - -let update_from_all_servers () = (* use global maps *) - clear_maps (); - let log = - List.fold_left - update_from_server - [] (* initial logmsg: empty *) - (* reverse order: 1st server is the most important one *) - (List.map snd (List.rev (Http_getter_env.servers ()))) - in - sync_maps (); - `Msg (`L (List.rev log)) - -let update_from_one_server server_url = - let log = update_from_server [] server_url in - `Msg (`L (List.rev log)) - -let temp_file_of_uri uri = - let flat_string s s' c = - let cs = String.copy s in - for i = 0 to (String.length s) - 1 do - if String.contains s' s.[i] then cs.[i] <- c - done; - cs - in - let user = try Unix.getlogin () with _ -> "" in - Filename.open_temp_file (user ^ flat_string uri ".-=:;!?/&" '_') "" - -(* API *) - -let help () = Http_getter_const.usage_string (Http_getter_env.env_to_string ()) - -let resolve uri = - try - (map_of_uri uri)#resolve uri - with Http_getter_map.Key_not_found _ -> raise (Unresolvable_URI uri) - - (* Warning: this fail if uri is already registered *) -let register ~uri ~url = (map_of_uri uri)#add uri url - -let update () = update_from_all_servers () - -let getxml ?(format = Enc_normal) ?(patch_dtd = true) uri = - let url = resolve uri in - let (fname, outchan) = temp_file_of_uri uri in - Http_getter_cache.respond_xml ~uri ~url ~enc:format ~patch:patch_dtd outchan; - close_out outchan; - fname - -let getxslt ?(patch_dtd = true) uri = - let url = resolve uri in - let (fname, outchan) = temp_file_of_uri uri in - Http_getter_cache.respond_xsl ~url ~patch:patch_dtd outchan; - close_out outchan; - fname - -let getdtd ?(patch_dtd = true) uri = - let url = Http_getter_env.dtd_dir ^ "/" ^ uri in - let (fname, outchan) = temp_file_of_uri uri in - Http_getter_cache.respond_dtd ~url ~patch:patch_dtd outchan; - close_out outchan; - fname - -let clean_cache () = Http_getter_cache.clean () - -let list_servers () = Http_getter_env.servers () - -let add_server ?(position = 0) name = - if position = 0 then begin - Http_getter_env.add_server ~position:0 name; - update_from_one_server name (* quick update (new server only) *) - end else if position > 0 then begin - Http_getter_env.add_server ~position name; - update () - end else (* already checked bt parse_position *) - assert false - -let remove_server position = - let server_name = - try - List.assoc position (Http_getter_env.servers ()) - with Not_found -> - raise (Invalid_argument (sprintf "no server with position %d" position)) - in - Http_getter_env.remove_server position; - update () - -let return_uris map filter = - let uris = ref [] in - map#iter (fun uri _ -> if filter uri then uris := uri :: !uris); - List.rev !uris - -let getalluris () = - let filter uri = - (Pcre.pmatch ~rex:heading_cic_RE uri) && - not (Pcre.pmatch ~rex:trailing_types_RE uri) - in - return_uris cic_map filter - -let getallrdfuris classs = - let filter = - let base = "^helm:rdf:www\\.cs\\.unibo\\.it/helm/rdf/" in - match classs with - | `Forward -> (fun uri -> Pcre.pmatch ~pat:(base ^ "forward") uri) - | `Backward -> (fun uri -> Pcre.pmatch ~pat:(base ^ "backward") uri) - in - return_uris rdf_map filter - -let ls = - let (++) (oldann, oldtypes, oldbody, oldtree) - (newann, newtypes, newbody, newtree) = - ((if newann > oldann then newann else oldann), - (if newtypes > oldtypes then newtypes else oldtypes), - (if newbody > oldbody then newbody else oldbody), - (if newtree > oldtree then newtree else oldtree)) - in - let basepart_RE = - Pcre.regexp - "^([^.]*\\.[^.]*)((\\.body)|(\\.proof_tree)|(\\.types))?(\\.ann)?$" - in - let (types_RE, types_ann_RE, body_RE, body_ann_RE, - proof_tree_RE, proof_tree_ann_RE) = - (Pcre.regexp "\\.types$", Pcre.regexp "\\.types\\.ann$", - Pcre.regexp "\\.body$", Pcre.regexp "\\.body\\.ann$", - Pcre.regexp "\\.proof_tree$", Pcre.regexp "\\.proof_tree\\.ann$") - in - let (slash_RE, til_slash_RE, no_slashes_RE) = - (Pcre.regexp "/", Pcre.regexp "^.*/", Pcre.regexp "^[^/]*$") - in - fun lsuri -> - let pat = - "^" ^ - (match lsuri with Cic p -> ("cic:" ^ p) | Theory p -> ("theory:" ^ p)) - in - let (dir_RE, obj_RE) = - (Pcre.regexp (pat ^ "/"), Pcre.regexp (pat ^ "(\\.|$)")) - in - let dirs = ref StringSet.empty in - let objs = Hashtbl.create 17 in - let store_dir d = - dirs := StringSet.add (List.hd (Pcre.split ~rex:slash_RE d)) !dirs - in - let store_obj o = - let basepart = Pcre.replace ~rex:basepart_RE ~templ:"$1" o in - let no_flags = false, No, No, No in - let oldflags = - try - Hashtbl.find objs basepart - with Not_found -> (* no ann, no types, no body, no proof tree *) - no_flags - in - let newflags = - match o with - | s when Pcre.pmatch ~rex:types_RE s -> (false, Yes, No, No) - | s when Pcre.pmatch ~rex:types_ann_RE s -> (true, Ann, No, No) - | s when Pcre.pmatch ~rex:body_RE s -> (false, No, Yes, No) - | s when Pcre.pmatch ~rex:body_ann_RE s -> (true, No, Ann, No) - | s when Pcre.pmatch ~rex:proof_tree_RE s -> (false, No, No, Yes) - | s when Pcre.pmatch ~rex:proof_tree_ann_RE s -> (true, No, No, Ann) - | s -> no_flags - in - Hashtbl.replace objs basepart (oldflags ++ newflags) - in - cic_map#iter - (* BLEARGH Dbm module lacks support for fold-like functions *) - (fun key _ -> - match key with - | uri when Pcre.pmatch ~rex:dir_RE uri -> (* directory hit *) - let localpart = Pcre.replace ~rex:dir_RE uri in - if Pcre.pmatch ~rex:no_slashes_RE localpart then - store_obj localpart - else - store_dir localpart - | uri when Pcre.pmatch ~rex:obj_RE uri -> (* file hit *) - store_obj (Pcre.replace ~rex:til_slash_RE uri) - | uri -> () (* miss *)); - let ls_items = ref [] in - StringSet.iter (fun dir -> ls_items := Ls_section dir :: !ls_items) !dirs; - Http_getter_misc.hashtbl_sorted_iter - (fun uri (annflag, typesflag, bodyflag, treeflag) -> - ls_items := - Ls_object { - uri = uri; ann = annflag; - types = typesflag; body = typesflag; proof_tree = treeflag - } :: !ls_items) - objs; - List.rev !ls_items - diff --git a/helm/http_getter/http_getter.mli b/helm/http_getter/http_getter.mli deleted file mode 100644 index a559917b9..000000000 --- a/helm/http_getter/http_getter.mli +++ /dev/null @@ -1,52 +0,0 @@ -(* - * Copyright (C) 2003-2004: - * Stefano Zacchiroli - * for the HELM Team http://helm.cs.unibo.it/ - * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://helm.cs.unibo.it/ - *) - -open Http_getter_types - - (** {2 Getter Web Service interface as API *) - -val help: unit -> string -val resolve: string -> string (* uri -> url *) -val register: uri:string -> url:string -> unit -val update: unit -> Ui_logger.html_msg -val getxml : ?format:encoding -> ?patch_dtd:bool -> string -> string -val getxslt : ?patch_dtd:bool -> string -> string -val getdtd : ?patch_dtd:bool -> string -> string -val clean_cache: unit -> unit -val list_servers: unit -> (int * string) list -val add_server: ?position:int -> string -> Ui_logger.html_msg -val remove_server: int -> Ui_logger.html_msg -val getalluris: unit -> string list -val getallrdfuris: [ `Forward | `Backward ] -> string list -val ls: xml_uri -> ls_item list - - (** {2 Misc} *) - -val close_maps: unit -> unit -val update_from_one_server: string -> Ui_logger.html_msg - diff --git a/helm/http_getter/http_getter_cache.ml b/helm/http_getter/http_getter_cache.ml deleted file mode 100644 index 8f5bc2312..000000000 --- a/helm/http_getter/http_getter_cache.ml +++ /dev/null @@ -1,211 +0,0 @@ -(* - * Copyright (C) 2003-2004: - * Stefano Zacchiroli - * for the HELM Team http://helm.cs.unibo.it/ - * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://helm.cs.unibo.it/ - *) - -(* TODO cache expires control!!! *) -(* TODO uwobo loop: - if two large proof (already in cache) are requested at the same time by two - clients, uwobo (java implementation, not yet tested with the OCaml one) - starts looping sending output to one of the client *) - -open Http_getter_common;; -open Http_getter_debugger;; -open Http_getter_misc;; -open Http_getter_types;; -open Printf;; - - (* expose ThreadSafe.threadSafe methods *) -class threadSafe = - object - inherit ThreadSafe.threadSafe - method virtual doCritical : 'a. 'a lazy_t -> 'a - method virtual doReader : 'a. 'a lazy_t -> 'a - method virtual doWriter : 'a. 'a lazy_t -> 'a - end -;; -let threadSafe = new threadSafe ;; - -let resource_type_of_url = function - | url when Pcre.pmatch ~pat:"\\.xml\\.gz$" url -> Enc_gzipped - | url when Pcre.pmatch ~pat:"\\.xml$" url -> Enc_normal - | url -> raise (Invalid_URL url) -let extension_of_resource_type = function - | Enc_normal -> "xml" - | Enc_gzipped -> "xml.gz" - - (* basename = resource name without trailing ".gz", if any *) -let is_in_cache basename = - Sys.file_exists - (match Http_getter_env.cache_mode with - | Enc_normal -> basename - | Enc_gzipped -> basename ^ ".gz") - -let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan = - let resource_type = resource_type_of_url url in - let extension = extension_of_resource_type resource_type in - let downloadname = - match uri_of_string uri with (* parse uri *) - | Cic_uri (Cic baseuri) | Cic_uri (Theory baseuri) -> - (* assumption: baseuri starts with "/" *) - sprintf "%s%s.%s" Http_getter_env.cic_dir baseuri extension - | Nuprl_uri baseuri -> - (* assumption: baseuri starts with "/" *) - sprintf "%s%s.%s" Http_getter_env.nuprl_dir baseuri extension - | Rdf_uri (prefix, ((Cic baseuri) as qbaseuri)) - | Rdf_uri (prefix, ((Theory baseuri) as qbaseuri)) -> - let escaped_prefix = - (Pcre.replace ~pat:"/" ~templ:"_" - (Pcre.replace ~pat:"_" ~templ:"__" - (prefix ^ - (match qbaseuri with - | Cic _ -> "//cic:" - | Theory _ -> "//theory:")))) - in - sprintf "%s/%s%s.%s" - Http_getter_env.rdf_dir escaped_prefix baseuri extension - in - let patch_fun = - if patch then Http_getter_common.patch_xml else (fun x -> x) - in - let basename = Pcre.replace ~pat:"\\.gz$" downloadname in - let contype = "text/xml" in - (* Fill cache if needed and return a short circuit file. - Short circuit is needed in situations like: - resource type = normal, cache type = gzipped, required encoding = normal - we would like to avoid normal -> gzipped -> normal conversions. To avoid - this tmp_short_circuit is used to remember the name of the intermediate - file name *) - let fill_cache () = - threadSafe#doWriter (lazy( - if not (is_in_cache basename) then begin (* cache MISS *) - debug_print "Cache MISS :-("; - mkdir ~parents:true (Filename.dirname downloadname); - match (resource_type, Http_getter_env.cache_mode) with - | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped -> - wget ~output:downloadname url; - None - | Enc_normal, Enc_gzipped -> (* resource normal, cache gzipped *) - let tmp = tempfile () in - wget ~output:tmp url; - gzip ~output:(basename ^ ".gz") ~keep:true tmp; (* fill cache *) - if enc = Enc_normal then (* user wants normal: don't delete it! *) - Some (tmp, enc) - else begin - Sys.remove tmp; - None - end - | Enc_gzipped, Enc_normal -> (* resource gzipped, cache normal *) - let tmp = tempfile () in - wget ~output:tmp url; - gunzip ~output:basename ~keep:true tmp; (* fill cache *) - if enc = Enc_gzipped then (* user wants gzipped: don't delete it! *) - Some (tmp, enc) - else begin - Sys.remove tmp; - None - end - end else begin - debug_print "Cache HIT :-)"; - None - end - )) in - let tmp_short_circuit = fill_cache () in - threadSafe#doReader (lazy( - assert (is_in_cache basename); - match (enc, Http_getter_env.cache_mode) with - | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped -> - (* resource in cache is already in the required format *) - (match enc with - | Enc_normal -> - debug_print "No format mangling required (encoding = normal)"; - return_file ~fname:basename ~contype ~patch_fun outchan - | Enc_gzipped -> - debug_print "No format mangling required (encoding = gzipped)"; - return_file - ~fname:(basename ^ ".gz") ~contype ~contenc:"x-gzip" - ~patch_fun ~gunzip:true - outchan) - | Enc_normal, Enc_gzipped | Enc_gzipped, Enc_normal -> - (match tmp_short_circuit with - | None -> (* no short circuit possible, use cache *) - debug_print "No short circuit available, use cache"; - let tmp = tempfile () in - (match enc with - | Enc_normal -> - (* required format is normal, cached version is gzipped *) - gunzip (* gunzip to tmp *) - ~output:tmp ~keep:true (basename ^ ".gz"); - return_file ~fname:tmp ~contype ~patch_fun outchan; - | Enc_gzipped -> - (* required format is gzipped, cached version is normal *) - gzip ~output:tmp ~keep:true basename; (* gzip to tmp *) - return_file - ~fname:tmp ~contype ~contenc:"x-gzip" - ~patch_fun ~gunzip:true - outchan); - Sys.remove tmp - | Some (fname, Enc_normal) -> (* short circuit available, use it! *) - debug_print "Using short circuit (encoding = normal)"; - return_file ~fname ~contype ~patch_fun outchan; - Sys.remove fname - | Some (fname, Enc_gzipped) -> (* short circuit available, use it! *) - debug_print "Using short circuit (encoding = gzipped)"; - return_file - ~fname ~contype ~contenc:"x-gzip" ~patch_fun ~gunzip:true outchan; - Sys.remove fname) - )) -;; - - (* TODO enc is not yet supported *) -let respond_xsl ?(enc = Enc_normal) ?(patch = true) ~url outchan = - let patch_fun = - if patch then Http_getter_common.patch_xsl else (fun x -> x) - in - let fname = tempfile () in - wget ~output:fname url; - return_file ~fname ~contype:"text/xml" ~patch_fun outchan; - Sys.remove fname -;; - - (* TODO enc is not yet supported *) -let respond_dtd ?(enc = Enc_normal) ?(patch = true) ~url outchan = - let patch_fun = - if patch then Http_getter_common.patch_dtd else (fun x -> x) - in - if Sys.file_exists url then - (* TODO check this: old getter here used text/xml *) - return_file ~fname:url ~contype:"text/plain" ~patch_fun outchan - else - return_html_error ("Can't find DTD: " ^ url) outchan -;; - -let clean () = - let module E = Http_getter_env in - List.iter - (function dir -> ignore (Unix.system ("rm -rf " ^ dir ^ "/*")) - ) [E.cic_dir ; E.nuprl_dir ; E.rdf_dir ] -;; diff --git a/helm/http_getter/http_getter_cache.mli b/helm/http_getter/http_getter_cache.mli deleted file mode 100644 index 11211288a..000000000 --- a/helm/http_getter/http_getter_cache.mli +++ /dev/null @@ -1,44 +0,0 @@ -(* - * Copyright (C) 2003-2004: - * Stefano Zacchiroli - * for the HELM Team http://helm.cs.unibo.it/ - * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://helm.cs.unibo.it/ - *) - -open Http_getter_types;; - -val respond_xml: - ?enc:encoding -> ?patch:bool -> url:string -> uri:string -> out_channel -> - unit - -val respond_xsl: - ?enc:encoding -> ?patch:bool -> url:string -> out_channel -> - unit - -val respond_dtd: - ?enc:encoding -> ?patch:bool -> url:string -> out_channel -> - unit - -val clean: unit -> unit - diff --git a/helm/http_getter/http_getter_common.ml b/helm/http_getter/http_getter_common.ml deleted file mode 100644 index 6087a467b..000000000 --- a/helm/http_getter/http_getter_common.ml +++ /dev/null @@ -1,137 +0,0 @@ -(* - * Copyright (C) 2003-2004: - * Stefano Zacchiroli - * for the HELM Team http://helm.cs.unibo.it/ - * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://helm.cs.unibo.it/ - *) - -open Http_getter_types;; -open Printf;; - -let string_of_ls_flag = function No -> "NO" | Yes -> "YES" | Ann -> "ANN" -let string_of_encoding = function - | Enc_normal -> "Normal" - | Enc_gzipped -> "GZipped" - -let is_cic_obj_uri uri = Pcre.pmatch ~pat:"^cic:" uri -let is_theory_uri uri = Pcre.pmatch ~pat:"^theory:" uri -let is_cic_uri uri = is_cic_obj_uri uri || is_theory_uri uri -let is_nuprl_uri uri = Pcre.pmatch ~pat:"^nuprl:" uri -let is_rdf_uri uri = Pcre.pmatch ~pat:"^helm:rdf(.*):(.*)//(.*)" uri -let is_xsl_uri uri = Pcre.pmatch ~pat:"^\\w+\\.xsl" uri - -let rec uri_of_string = function - | uri when is_rdf_uri uri -> - (match Pcre.split ~pat:"//" uri with - | [ prefix; uri ] -> - let rest = - match uri_of_string uri with - | Cic_uri xmluri -> xmluri - | _ -> raise (Invalid_URI uri) - in - Rdf_uri (prefix, rest) - | _ -> raise (Invalid_URI uri)) - | uri when is_cic_uri uri -> Cic_uri (Cic (Pcre.replace ~pat:"^cic:" uri)) - | uri when is_nuprl_uri uri -> Nuprl_uri (Pcre.replace ~pat:"^nuprl:" uri) - | uri when is_theory_uri uri -> - Cic_uri (Theory (Pcre.replace ~pat:"^theory:" uri)) - | uri -> raise (Invalid_URI uri) - -let patch_xml line = - Pcre.replace - ~pat:(sprintf "DOCTYPE (.*) SYSTEM\\s+\"%s/" Http_getter_env.dtd_base_url) - ~templ:( - sprintf "DOCTYPE $1 SYSTEM \"%s/getdtd?uri=" Http_getter_env.my_own_url) - line -let patch_xsl = - let mk_patch_fun tag line = - Pcre.replace - ~pat:(sprintf "%s\\s+href=\"" tag) - ~templ:( - sprintf "%s href=\"%s/getxslt?uri=" tag Http_getter_env.my_own_url) - line - in - let (patch_import, patch_include) = - (mk_patch_fun "xsl:import", mk_patch_fun "xsl:include") - in - fun line -> patch_include (patch_import line) -let patch_dtd line = - Pcre.replace - ~pat:"ENTITY (.*) SYSTEM\\s+\"" - ~templ:( - sprintf "ENTITY $1 SYSTEM \"%s/getdtd?uri=" Http_getter_env.my_own_url) - line - -let pp_error s = - sprintf "Http Getter error: %s" s -let pp_internal_error s = - sprintf "Http Getter Internal error: %s" s -let pp_msg s = sprintf "%s" s -let null_pp s = s - -let mk_return_fun pp_fun contype msg outchan = - Http_daemon.respond - ~body:(pp_fun msg) ~headers:["Content-Type", contype] outchan - -let return_html_error = mk_return_fun pp_error "text/html" -let return_html_internal_error = mk_return_fun pp_internal_error "text/html" -let return_html_msg = mk_return_fun pp_msg "text/html" -let return_html_raw = mk_return_fun null_pp "text/html" -let return_xml_raw = mk_return_fun null_pp "text/xml" -let return_file - ~fname ?contype ?contenc ?(patch_fun = fun x -> x) ?(gunzip = false) outchan - = - let headers = - match (contype, contenc) with - | (Some t, Some e) -> ["Content-Encoding", e; "Content-Type", t] - | (Some t, None) -> ["Content-Type" , t] - | (None, Some e) -> ["Content-Encoding", e] - | (None, None) -> [] - in - Http_daemon.send_basic_headers ~code:200 outchan; - Http_daemon.send_headers headers outchan; - Http_daemon.send_CRLF outchan; - if gunzip then begin (* gunzip needed, uncompress file, apply patch_fun to - it, compress the result and sent it to client *) - let (tmp1, tmp2) = - (Http_getter_misc.tempfile (), Http_getter_misc.tempfile ()) - in - Http_getter_misc.gunzip ~keep:true ~output:tmp1 fname; (* gunzip to tmp1 *) - let new_file = open_out tmp2 in - Http_getter_misc.iter_file (* tmp2 = patch(tmp1) *) - (fun line -> output_string new_file (patch_fun line ^ "\n")) - tmp1; - close_out new_file; - Http_getter_misc.gzip ~output:tmp1 tmp2; (* tmp1 = gzip(tmp2); rm tmp2 *) - Http_getter_misc.iter_file (* send tmp1 to client as is*) - (fun line -> output_string outchan (line ^ "\n")) - tmp1; - Sys.remove tmp1 (* rm tmp1 *) - end else (* no need to gunzip, apply patch_fun directly to file *) - Http_getter_misc.iter_file - (fun line -> output_string outchan (patch_fun line ^ "\n")) - fname -;; -let return_400 body outchan = Http_daemon.respond_error ~code:400 ~body outchan - diff --git a/helm/http_getter/http_getter_common.mli b/helm/http_getter/http_getter_common.mli deleted file mode 100644 index f4ecb3dc8..000000000 --- a/helm/http_getter/http_getter_common.mli +++ /dev/null @@ -1,73 +0,0 @@ -(* - * Copyright (C) 2003-2004: - * Stefano Zacchiroli - * for the HELM Team http://helm.cs.unibo.it/ - * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://helm.cs.unibo.it/ - *) - -open Http_getter_types;; - -val string_of_ls_flag: ls_flag -> string -val string_of_encoding: encoding -> string - -val is_cic_uri: string -> bool -val is_nuprl_uri: string -> bool -val is_rdf_uri: string -> bool -val is_xsl_uri: string -> bool - -val uri_of_string: string -> uri - -val patch_xml : string -> string -val patch_xsl : string -> string -val patch_dtd : string -> string - -val return_html_error: string -> out_channel -> unit -val return_html_internal_error: string -> out_channel -> unit - (** return an HTML HTTP response from the given string, embedding it in an - "H1" element of an HTML page; content-type is set to text/html *) -val return_html_msg: string -> out_channel -> unit - (** return an HTTP response using given string as content; content-type is set - to text/html *) -val return_html_raw: string -> out_channel -> unit - (** return an HTTP response using given string as content; content-type is set - to text/xml *) -val return_xml_raw: string -> out_channel -> unit - (** return a bad request http response *) -val return_400: string -> out_channel -> unit - (** - @param fname name of the file to be sent - @param contype Content-Type header value - @param contenc Content-Enconding header value - @param patch_fun function used to patch file contents - @param gunzip is meaningful only if a patch function is provided. If gunzip - is true patch_fun is applied to the uncompressed version of the file. The file - is then compressed again and send to client - @param outchan output channel over which sent file fname *) -val return_file: - fname:string -> - ?contype:string -> ?contenc:string -> - ?patch_fun:(string -> string) -> ?gunzip:bool -> - out_channel -> - unit - diff --git a/helm/http_getter/http_getter_const.ml b/helm/http_getter/http_getter_const.ml deleted file mode 100644 index a4eac83e5..000000000 --- a/helm/http_getter/http_getter_const.ml +++ /dev/null @@ -1,111 +0,0 @@ -(* - * Copyright (C) 2003-2004: - * Stefano Zacchiroli - * for the HELM Team http://helm.cs.unibo.it/ - * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://helm.cs.unibo.it/ - *) - -open Printf;; - -let version = "0.3.0" -let conffile = "http_getter.conf.xml" - - (* TODO provide a better usage string *) -let usage_string configuration = - sprintf -" - - - HTTP Getter's help message - - -

HTTP Getter, version %s

-

Usage information

-

- Usage: http://hostname:getterport/command -

-

- Available commands: -

-

- help
- display this help message -

-

- getxml?uri=URI[&format=(normal|gz)][&patch_dtd=(yes|no)]
-

-

- register?uri=URI&url=URL
-

-

- resolve?uri=URI
-

-

- getdtd?uri=URI[&patch_dtd=(yes|no)]
-

-

- getxslt?uri=URI[&patch_dtd=(yes|no)]
-

-

- list_servers
-

-

- add_server?url=URL&position=POSITION
-

-

- remove_server?position=POSITION
-

-

- update
-

-

- clean_cache
-

-

- getalluris
-

-

- getallrdfuris
-

-

- ls?baseuri=URI&format=(txt|xml)
-

-

- getempty
-

-

Current configuration

-
%s
- - -" - version configuration - -let empty_xml = -" - -]> - -" - diff --git a/helm/http_getter/http_getter_const.mli b/helm/http_getter/http_getter_const.mli deleted file mode 100644 index 894ccd645..000000000 --- a/helm/http_getter/http_getter_const.mli +++ /dev/null @@ -1,36 +0,0 @@ -(* - * Copyright (C) 2003-2004: - * Stefano Zacchiroli - * for the HELM Team http://helm.cs.unibo.it/ - * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://helm.cs.unibo.it/ - *) - -val version: string -val conffile: string -val empty_xml: string - - (** @return an HTML usage string including configuration information passed as - input parameter *) -val usage_string: string -> string - diff --git a/helm/http_getter/http_getter_debugger.ml b/helm/http_getter/http_getter_debugger.ml deleted file mode 100644 index 3f9afd78c..000000000 --- a/helm/http_getter/http_getter_debugger.ml +++ /dev/null @@ -1,58 +0,0 @@ -(* - * Copyright (C) 2003-2004: - * Stefano Zacchiroli - * for the HELM Team http://helm.cs.unibo.it/ - * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://helm.cs.unibo.it/ - *) - -let debug = ref true - -(* invariant: if logfile is set, then logchan is set too *) -let logfile = ref None -let logchan = ref None - -let set_logfile f = - (match !logchan with None -> () | Some oc -> close_out oc); - match f with - | Some f -> - logfile := Some f; - logchan := Some (open_out f) - | None -> - logfile := None; - logchan := None - -let get_logfile () = !logfile - -let close_logfile () = set_logfile None - -let debug_print s = - let msg = "[HTTP-Getter] " ^ s in - if !debug then - match (!logfile, !logchan) with - | None, _ -> prerr_endline msg - | Some fname, Some oc -> - output_string oc msg; - flush oc - | Some _, None -> assert false - diff --git a/helm/http_getter/http_getter_debugger.mli b/helm/http_getter/http_getter_debugger.mli deleted file mode 100644 index 461e2a1a7..000000000 --- a/helm/http_getter/http_getter_debugger.mli +++ /dev/null @@ -1,40 +0,0 @@ -(* - * Copyright (C) 2003-2004: - * Stefano Zacchiroli - * for the HELM Team http://helm.cs.unibo.it/ - * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://helm.cs.unibo.it/ - *) - - (** enable/disable debugging messages *) -val debug: bool ref - - (** output a debugging message *) -val debug_print: string -> unit - - (** if set to Some fname, fname will be used as a logfile, otherwise stderr - * will be used *) -val get_logfile: unit -> string option -val set_logfile: string option -> unit -val close_logfile: unit -> unit - diff --git a/helm/http_getter/http_getter_env.ml b/helm/http_getter/http_getter_env.ml deleted file mode 100644 index a7ab80f24..000000000 --- a/helm/http_getter/http_getter_env.ml +++ /dev/null @@ -1,193 +0,0 @@ -(* - * Copyright (C) 2003-2004: - * Stefano Zacchiroli - * for the HELM Team http://helm.cs.unibo.it/ - * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://helm.cs.unibo.it/ - *) - -open Printf -open Pxp_document -open Pxp_types -open Pxp_yacc - -open Http_getter_types - -let version = Http_getter_const.version - -type setting_src = - | Environment (* read setting from environment variables *) - | Conffile (* read setting from configuration file *) - | Both (* read setting from both; environment override config file *) - -let conf_file_tree = ref None - -let (conf_file, conf_dir) = - try - let conf_dir = - Pcre.replace ~pat:"/$" (Sys.getenv "HELM_CONFIGURATION_DIR") - in - (conf_dir ^ "/" ^ Http_getter_const.conffile, conf_dir) - with Not_found -> failwith "HELM_CONFIGURATION_DIR undefined" - -let safe_getenv ?(from = Both) var = - (let rec read_from_file () = - (match !conf_file_tree with - | None -> - conf_file_tree := - Some - (parse_wfcontent_entity - default_config (from_file conf_file) default_spec); - read_from_file () - | Some t -> - (try - Some (find_element (String.lowercase var) t)#data - with Not_found -> None)) - in - let read_from_env () = try Some (Sys.getenv var) with Not_found -> None in - let return_value name = function - | Some v -> v - | None -> failwith ("Setting " ^ name ^ " is not defined") - in - (match from with - | Environment -> return_value var (read_from_env ()) - | Conffile -> return_value var (read_from_file ()) - | Both -> - (match read_from_env () with - | None -> return_value var (read_from_file ()) - | v -> return_value var v))) - -let servers_file = safe_getenv "HTTP_GETTER_SERVERS_FILE" - -let load_servers () = - let pos = ref (-1) in - List.rev (Http_getter_misc.fold_file - (fun line servers -> - if Http_getter_misc.is_blank_line line then - servers - else - (incr pos; (!pos, line) :: servers)) - [] - servers_file) - -let _servers = ref (load_servers ()) -let servers () = !_servers - -let save_servers () = - let oc = open_out servers_file in - List.iter (fun (_,server) -> output_string oc (server ^ "\n")) (servers ()); - close_out oc -let reload_servers () = _servers := load_servers () - -let cic_dbm = safe_getenv "HTTP_GETTER_CIC_DBM" -let nuprl_dbm = safe_getenv "HTTP_GETTER_NUPRL_DBM" -let rdf_dbm = safe_getenv "HTTP_GETTER_RDF_DBM" -let xsl_dbm = safe_getenv "HTTP_GETTER_XSL_DBM" -let xml_index = safe_getenv "HTTP_GETTER_XML_INDEXNAME" -let rdf_index = safe_getenv "HTTP_GETTER_RDF_INDEXNAME" -let xsl_index = safe_getenv "HTTP_GETTER_XSL_INDEXNAME" -let cic_dir = safe_getenv "HTTP_GETTER_CIC_DIR" -let nuprl_dir = safe_getenv "HTTP_GETTER_NUPRL_DIR" -let rdf_dir = safe_getenv "HTTP_GETTER_RDF_DIR" -let dtd_dir = safe_getenv "HTTP_GETTER_DTD_DIR" - -let port = - let port = safe_getenv "HTTP_GETTER_PORT" in - try - int_of_string port - with Failure "int_of_string" -> - failwith ("Invalid port value: " ^ port) -let host = - let buf = Buffer.create 20 in - Shell.call ~stdout:(Shell.to_buffer buf) [Shell.cmd "hostname" ["-f"]]; - Pcre.replace ~pat:"\n+$" (Buffer.contents buf) -let my_own_url = - sprintf - "http://%s%s" (* without trailing '/' *) - host - (if port = 80 then "" else (sprintf ":%d" port)) -let dtd_base_url = safe_getenv "HTTP_GETTER_DTD_BASE_URL" - -let cache_mode = - match String.lowercase (safe_getenv "HTTP_GETTER_CACHE_MODE") with - | "normal" -> Enc_normal - | "gz" -> Enc_gzipped - | mode -> failwith ("Invalid cache mode: " ^ mode) - -let reload () = reload_servers () - -let env_to_string () = - sprintf -"HTTP Getter %s (the OCaml one!) - -cic_dbm:\t%s -nuprl_dbm:\t%s -rdf_dbm:\t%s -xsl_dbm:\t%s -xml_index:\t%s -rdf_index:\t%s -xsl_index:\t%s -cic_dir:\t%s -nuprl_dir:\t%s -rdf_dir:\t%s -dtd_dir:\t%s -servers_file:\t%s -host:\t\t%s -port:\t\t%d -my_own_url:\t%s -dtd_base_url:\t%s -cache_mode:\t%s -conf_file:\t%s -conf_dir:\t%s -servers: -\t%s -" - version cic_dbm nuprl_dbm rdf_dbm xsl_dbm xml_index rdf_index xsl_index - cic_dir nuprl_dir rdf_dir dtd_dir servers_file host port my_own_url - dtd_base_url - (match cache_mode with Enc_normal -> "Normal" | Enc_gzipped -> "GZipped") - conf_file conf_dir - (String.concat "\n\t" (* (position * server) list *) - (List.map (fun (pos, server) -> sprintf "%3d: %s" pos server) - (servers ()))) - -let add_server ?position url = - (match position with - | None -> - _servers := !_servers @ [-1, url]; - | Some p when p > 0 -> - let rec add_after pos = function - | [] -> [-1, url] - | hd :: tl when p = 1 -> hd :: (-1, url) :: tl - | hd :: tl (* when p > 1 *) -> hd :: (add_after (pos - 1) tl) - in - _servers := add_after p !_servers - | Some _ -> assert false); - save_servers (); - reload_servers () - -let remove_server position = - _servers := List.remove_assoc position !_servers; - save_servers (); - reload_servers () - diff --git a/helm/http_getter/http_getter_env.mli b/helm/http_getter/http_getter_env.mli deleted file mode 100644 index fe660d849..000000000 --- a/helm/http_getter/http_getter_env.mli +++ /dev/null @@ -1,77 +0,0 @@ -(* - * Copyright (C) 2003-2004: - * Stefano Zacchiroli - * for the HELM Team http://helm.cs.unibo.it/ - * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://helm.cs.unibo.it/ - *) - -open Http_getter_types;; - - (* {2 general information} *) - -val version : string (* getter version *) - - (* {2 environment gathered data} *) - -val cic_dbm : string (* XML map DBM file for CIC *) -val nuprl_dbm : string (* XML map DBM file for NuPRL *) -val rdf_dbm : string (* RDF map DBM file *) -val xsl_dbm : string (* XSL map DBM file *) -val xml_index : string (* XMLs' index *) -val rdf_index : string (* RDFs' index *) -val xsl_index : string (* XSLTs' index *) -val cic_dir : string (* XMLs' directory for CIC*) -val nuprl_dir : string (* XMLs' directory for NuPRL*) -val rdf_dir : string (* RDFs' directory *) -val dtd_dir : string (* DTDs' root directory *) -val servers_file : string (* servers.txt file *) -val port : int (* port on which getter listens *) -val dtd_base_url : string (* base URL for DTD downloading *) - - (* {2 derived data} *) - -val host : string (* host on which getter listens *) -val my_own_url : string (* URL at which contact getter *) -val servers : unit -> (int * string) list - (* (position * server) list *) -val cache_mode : encoding (* cached files encoding *) -val conf_file : string (* configuration file's full path *) -val conf_dir : string (* directory where conf_file resides *) - - (* {2 dynamic configuration changes} *) - - (* add a server to servers list in a given position (defaults to "after the - last server", change servers file accordingly and reload servers list *) -val add_server: ?position:int -> string -> unit - (* remove a server from servers list, change servers file accordingly and - reload servers list *) -val remove_server: int -> unit - - (* {2 misc} *) - -val reload: unit -> unit (* reload configuration information *) -val env_to_string : unit -> string (* dump a textual representation of the - current http_getter settings on an output - channel *) - diff --git a/helm/http_getter/http_getter_map.ml b/helm/http_getter/http_getter_map.ml deleted file mode 100644 index 57ec92736..000000000 --- a/helm/http_getter/http_getter_map.ml +++ /dev/null @@ -1,96 +0,0 @@ -(* - * Copyright (C) 2003-2004: - * Stefano Zacchiroli - * for the HELM Team http://helm.cs.unibo.it/ - * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://helm.cs.unibo.it/ - *) - -exception Key_already_in of string;; -exception Key_not_found of string;; - -class map dbname = - let perm = 420 in (* permission 644 in decimal notation *) - let open_dbm () = Dbm.opendbm dbname [ Dbm.Dbm_rdwr; Dbm.Dbm_create ] perm in - - object (self) - - inherit ThreadSafe.threadSafe - - val mutable db = open_dbm () - - (*initializer Gc.finalise Dbm.close db (* close db on GC *)*) - - method add key value = - self#doWriter (lazy ( - try - Dbm.add db key value - with Dbm.Dbm_error "Entry already exists" -> raise (Key_already_in key) - )) - - method replace key value = - self#doWriter (lazy ( - Dbm.replace db key value - )) - - method remove key = - self#doWriter (lazy ( - try - Dbm.remove db key - with Dbm.Dbm_error "dbm_delete" -> raise (Key_not_found key) - )) - - method resolve key = - self#doReader (lazy ( - try Dbm.find db key with Not_found -> raise (Key_not_found key) - )) - - method iter (f: string -> string -> unit) = - self#doReader (lazy ( - Dbm.iter f db - )) - - method sync = - self#doWriter (lazy ( - Dbm.close db; - db <- open_dbm () - )) - - method clear = - self#doWriter (lazy ( - Dbm.close db; - List.iter - (fun ext -> - let file = dbname ^ ext in - if Sys.file_exists file then Sys.remove file) - [".dir"; ".pag"; ".db"]; - db <- open_dbm () - )) - - method close = - self#doWriter (lazy ( - Dbm.close db - )) - - end - diff --git a/helm/http_getter/http_getter_map.mli b/helm/http_getter/http_getter_map.mli deleted file mode 100644 index 7081f1962..000000000 --- a/helm/http_getter/http_getter_map.mli +++ /dev/null @@ -1,44 +0,0 @@ -(* - * Copyright (C) 2003-2004: - * Stefano Zacchiroli - * for the HELM Team http://helm.cs.unibo.it/ - * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://helm.cs.unibo.it/ - *) - -exception Key_already_in of string -exception Key_not_found of string - -class map: - string -> - object - method add: string -> string -> unit - method replace: string -> string -> unit - method remove: string -> unit - method resolve: string -> string - method iter: (string -> string -> unit) -> unit - method sync: unit - method clear: unit - - method close: unit (* use with caution! *) - end diff --git a/helm/http_getter/http_getter_misc.ml b/helm/http_getter/http_getter_misc.ml deleted file mode 100644 index c983c2988..000000000 --- a/helm/http_getter/http_getter_misc.ml +++ /dev/null @@ -1,250 +0,0 @@ -(* - * Copyright (C) 2003-2004: - * Stefano Zacchiroli - * for the HELM Team http://helm.cs.unibo.it/ - * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://helm.cs.unibo.it/ - *) - -open Printf - -open Http_getter_debugger - -let trailing_dot_gz_RE = Pcre.regexp "\\.gz$" (* for g{,un}zip *) -let url_RE = Pcre.regexp "^([\\w.-]+)(:(\\d+))?(/.*)?$" -let http_scheme_RE = Pcre.regexp ~flags:[`CASELESS] "^http://" -let file_scheme_RE = Pcre.regexp ~flags:[`CASELESS] "^file://" -let dir_sep_RE = Pcre.regexp "/" -let heading_slash_RE = Pcre.regexp "^/" - -let bufsiz = 16384 (* for file system I/O *) -let tcp_bufsiz = 4096 (* for TCP I/O *) - -let fold_file f init fname = - let ic = open_in fname in - let rec aux acc = - let line = try Some (input_line ic) with End_of_file -> None in - match line with - | None -> acc - | Some line -> aux (f line acc) - in - let res = try aux init with e -> close_in ic; raise e in - close_in ic; - res - -let iter_file f = fold_file (fun line _ -> f line) () - -let hashtbl_sorted_fold f tbl init = - let sorted_keys = - List.sort compare (Hashtbl.fold (fun key _ keys -> key::keys) tbl []) - in - List.fold_left (fun acc k -> f k (Hashtbl.find tbl k) acc) init sorted_keys - -let hashtbl_sorted_iter f tbl = - let sorted_keys = - List.sort compare (Hashtbl.fold (fun key _ keys -> key::keys) tbl []) - in - List.iter (fun k -> f k (Hashtbl.find tbl k)) sorted_keys - -let cp src dst = - let (ic, oc) = (open_in src, open_out dst) in - let buf = String.create bufsiz in - (try - while true do - let bytes = input ic buf 0 bufsiz in - if bytes = 0 then raise End_of_file else output oc buf 0 bytes - done - with End_of_file -> ()); - close_in ic; close_out oc - -let parse_url url = - try - let subs = - Pcre.extract ~rex:url_RE (Pcre.replace ~rex:http_scheme_RE url) - in - (subs.(1), - (if subs.(2) = "" then 80 else int_of_string subs.(3)), - (if subs.(4) = "" then "/" else subs.(4))) - with exc -> - failwith - (sprintf "Can't parse url: %s (exception: %s)" - url (Printexc.to_string exc)) -let init_socket addr port = - let inet_addr = (Unix.gethostbyname addr).Unix.h_addr_list.(0) in - let sockaddr = Unix.ADDR_INET (inet_addr, port) in - let suck = Unix.socket Unix.PF_INET Unix.SOCK_STREAM 0 in - Unix.connect suck sockaddr; - let outchan = Unix.out_channel_of_descr suck in - let inchan = Unix.in_channel_of_descr suck in - (inchan, outchan) -let http_get_iter_buf ~callback url = - let (address, port, path) = parse_url url in - let buf = String.create tcp_bufsiz in - let (inchan, outchan) = init_socket address port in - output_string outchan (sprintf "GET %s\r\n" path); - flush outchan; - (try - while true do - match input inchan buf 0 tcp_bufsiz with - | 0 -> raise End_of_file - | bytes when bytes = tcp_bufsiz -> (* buffer full, no need to slice it *) - callback buf - | bytes when bytes < tcp_bufsiz -> (* buffer not full, slice it *) - callback (String.sub buf 0 bytes) - | _ -> (* ( bytes < 0 ) || ( bytes > tcp_bufsiz ) *) - assert false - done - with End_of_file -> ()); - close_in inchan (* close also outchan, same fd *) - -let wget ?output url = - debug_print - (sprintf "wgetting %s (output: %s)" url - (match output with None -> "default" | Some f -> f)); - match url with - | url when Pcre.pmatch ~rex:file_scheme_RE url -> (* file:// *) - (let src_fname = Pcre.replace ~rex:file_scheme_RE url in - match output with - | Some dst_fname -> cp src_fname dst_fname - | None -> - let dst_fname = Filename.basename src_fname in - if src_fname <> dst_fname then - cp src_fname dst_fname - else (* src and dst are the same: do nothing *) - ()) - | url when Pcre.pmatch ~rex:http_scheme_RE url -> (* http:// *) - (let oc = - open_out (match output with Some f -> f | None -> Filename.basename url) - in - http_get_iter_buf ~callback:(fun data -> output_string oc data) url; - close_out oc) - | scheme -> (* unsupported scheme *) - failwith ("Http_getter_misc.wget: unsupported scheme: " ^ scheme) - -let gzip ?(keep = false) ?output fname = - let output = match output with None -> fname ^ ".gz" | Some fname -> fname in - debug_print (sprintf "gzipping %s (keep: %b, output: %s)" fname keep output); - let (ic, oc) = (open_in fname, Gzip.open_out output) in - let buf = String.create bufsiz in - (try - while true do - let bytes = input ic buf 0 bufsiz in - if bytes = 0 then raise End_of_file else Gzip.output oc buf 0 bytes - done - with End_of_file -> ()); - close_in ic; Gzip.close_out oc; - if not keep then Sys.remove fname -;; - -let gunzip ?(keep = false) ?output fname = - (* assumption: given file name ends with ".gz" or output is set *) - let output = - match output with - | None -> - if (Pcre.pmatch ~rex:trailing_dot_gz_RE fname) then - Pcre.replace ~rex:trailing_dot_gz_RE fname - else - failwith - "Http_getter_misc.gunzip: unable to determine output file name" - | Some fname -> fname - in - debug_print (sprintf "gunzipping %s (keep: %b, output: %s)" - fname keep output); - let (ic, oc) = (Gzip.open_in fname, open_out output) in - let buf = String.create bufsiz in - (try - while true do - let bytes = Gzip.input ic buf 0 bufsiz in - if bytes = 0 then raise End_of_file else Pervasives.output oc buf 0 bytes - done - with End_of_file -> ()); - Gzip.close_in ic; close_out oc; - if not keep then Sys.remove fname -;; - -let tempfile () = Filename.temp_file "http_getter_" "" - -exception Mkdir_failure of string * string;; (* dirname, failure reason *) -let dir_perm = 0o755 - -let mkdir ?(parents = false) dirname = - let mkdirhier () = - let (pieces, hd) = - let split = Pcre.split ~rex:dir_sep_RE dirname in - if Pcre.pmatch ~rex:heading_slash_RE dirname then - (List.tl split, "/") - else - (split, "") - in - ignore - (List.fold_left - (fun pre dir -> - let next_dir = - sprintf "%s%s%s" pre (match pre with "/" | "" -> "" | _ -> "/") dir - in - (try - (match (Unix.stat next_dir).Unix.st_kind with - | Unix.S_DIR -> () (* dir component already exists, go on! *) - | _ -> (* dir component already exists but isn't a dir, abort! *) - raise - (Mkdir_failure (dirname, - sprintf "'%s' already exists but is not a dir" next_dir))) - with Unix.Unix_error (Unix.ENOENT, "stat", _) -> - (* dir component doesn't exists, create it and go on! *) - Unix.mkdir next_dir dir_perm); - next_dir) - hd pieces) - in - if parents then mkdirhier () else Unix.mkdir dirname dir_perm - -let string_of_proc_status = function - | Unix.WEXITED code -> sprintf "[Exited: %d]" code - | Unix.WSIGNALED sg -> sprintf "[Killed: %d]" sg - | Unix.WSTOPPED sg -> sprintf "[Stopped: %d]" sg - -let http_get url = - if Pcre.pmatch ~rex:file_scheme_RE url then begin - (* file:// URL. Read data from file system *) - let fname = Pcre.replace ~rex:file_scheme_RE url in - try - let size = (Unix.stat fname).Unix.st_size in - let buf = String.create size in - let ic = open_in fname in - really_input ic buf 0 size; - close_in ic; - Some buf - with Unix.Unix_error (Unix.ENOENT, "stat", _) -> None - end else (* other URL, pass it to Http_client *) - try - Some (Http_client.http_get url) - with e -> - prerr_endline (sprintf - "Warning: Http_client failed on url %s with exception: %s" - url (Printexc.to_string e)); - None - -let is_blank_line = - let blank_line_RE = Pcre.regexp "(^#)|(^\\s*$)" in - fun line -> - Pcre.pmatch ~rex:blank_line_RE line - diff --git a/helm/http_getter/http_getter_misc.mli b/helm/http_getter/http_getter_misc.mli deleted file mode 100644 index b328742be..000000000 --- a/helm/http_getter/http_getter_misc.mli +++ /dev/null @@ -1,80 +0,0 @@ -(* - * Copyright (C) 2003-2004: - * Stefano Zacchiroli - * for the HELM Team http://helm.cs.unibo.it/ - * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://helm.cs.unibo.it/ - *) - - (** 'mkdir' failed, arguments are: name of the directory to be created and - failure reason *) -exception Mkdir_failure of string * string - - (** "fold_left" like function on file lines, trailing newline is not passed to - the given function *) -val fold_file : (string -> 'a -> 'a) -> 'a -> string -> 'a - (* "iter" like function on file lines, trailing newline is not passed to the - given function *) -val iter_file : (string -> unit) -> string -> unit - - (** like Hashtbl.fold but keys are processed ordered *) -val hashtbl_sorted_fold : - ('a -> 'b -> 'c -> 'c) -> ('a, 'b) Hashtbl.t -> 'c -> 'c - (** like Hashtbl.iter but keys are processed ordered *) -val hashtbl_sorted_iter : ('a -> 'b -> unit) -> ('a, 'b) Hashtbl.t -> unit - - (** cp frontend *) -val cp: string -> string -> unit - (** wget frontend, if output is given it is the destination file, otherwise - standard wget rules are used. Additionally this function support also the - "file://" scheme for file system addressing *) -val wget: ?output: string -> string -> unit - (** gzip frontend. If keep = true original file will be kept, default is - false. output is the file on which gzipped data will be saved, default is - given file with an added ".gz" suffix *) -val gzip: ?keep: bool -> ?output: string -> string -> unit - (** gunzip frontend. If keep = true original file will be kept, default is - false. output is the file on which gunzipped data will be saved, default is - given file name without trailing ".gz" *) -val gunzip: ?keep: bool -> ?output: string -> string -> unit - (** tempfile frontend, return the name of created file. A special purpose - suffix is used (actually "_http_getter" *) -val tempfile: unit -> string - (** mkdir frontend, if parents = true also parent directories will be created. - If the given directory already exists doesn't act *) -val mkdir: ?parents: bool -> string -> unit - - (** pretty printer for Unix.process_status values *) -val string_of_proc_status : Unix.process_status -> string - - (** raw HTTP downloader, return Some the contents of downloaded resource or - None if an error occured while downloading. This function support also - "file://" scheme for filesystem resources *) -val http_get: string -> string option - (** 'iter' like method that iter over string slices (unspecified length) of a - remote resources fetched via HTTP GET requests *) -val http_get_iter_buf: callback:(string -> unit) -> string -> unit - - (** true on blanks-only and #-commented lines, false otherwise *) -val is_blank_line: string -> bool - diff --git a/helm/http_getter/http_getter_types.ml b/helm/http_getter/http_getter_types.ml deleted file mode 100644 index bf584f6ce..000000000 --- a/helm/http_getter/http_getter_types.ml +++ /dev/null @@ -1,62 +0,0 @@ -(* - * Copyright (C) 2003-2004: - * Stefano Zacchiroli - * for the HELM Team http://helm.cs.unibo.it/ - * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://helm.cs.unibo.it/ - *) - -exception Bad_request of string -exception Unresolvable_URI of string -exception Invalid_URI of string -exception Invalid_URL of string -exception Invalid_RDF_class of string -exception Internal_error of string - -type encoding = Enc_normal | Enc_gzipped -type answer_format = Fmt_text | Fmt_xml -type ls_flag = Yes | No | Ann -type ls_object = - { - uri: string; - ann: bool; - types: ls_flag; - body: ls_flag; - proof_tree: ls_flag; - } -type ls_item = - | Ls_section of string - | Ls_object of ls_object - -type xml_uri = - | Cic of string - | Theory of string -type rdf_uri = string * xml_uri -type nuprl_uri = string -type uri = - | Cic_uri of xml_uri - | Nuprl_uri of nuprl_uri - | Rdf_uri of rdf_uri - -module StringSet = Set.Make (String) - diff --git a/helm/http_getter/main.ml b/helm/http_getter/main.ml index e6b9f6e02..90f776df3 100644 --- a/helm/http_getter/main.ml +++ b/helm/http_getter/main.ml @@ -35,6 +35,8 @@ open Http_getter_debugger (* constants *) +let configuration_file = "http_getter.conf.xml" + let common_headers = [ "Cache-Control", "no-cache"; "Pragma", "no-cache"; @@ -122,7 +124,7 @@ let return_all_foo_uris doctype uris outchan = <%s> " doctype - Http_getter_env.my_own_url + (Lazy.force Http_getter_env.my_own_url) doctype doctype); List.iter @@ -153,7 +155,7 @@ let return_ls xmluri fmt outchan = | Fmt_xml -> Buffer.add_string buf "\n"; bprintf buf "\n" - Http_getter_env.my_own_url; + (Lazy.force Http_getter_env.my_own_url); Buffer.add_string buf "\n"; List.iter (function @@ -216,7 +218,9 @@ let callback (req: Http_types.request) outchan = ~patch:(parse_patch req) outchan | "/getdtd" -> Http_getter_cache.respond_dtd ~patch:(parse_patch req) - ~url:(Http_getter_env.dtd_dir ^ "/" ^ (req#param "uri")) outchan + ~url:(sprintf "%s/%s" + (Helm_registry.get "getter.dtd_dir") (req#param "uri")) + outchan | "/resolve" -> return_resolve (req#param "uri") outchan | "/register" -> Http_getter.register ~uri:(req#param "uri") ~url:(req#param "url"); @@ -227,7 +231,7 @@ let callback (req: Http_types.request) outchan = | "/update" -> Http_getter_env.reload (); (* reload servers list from servers file *) let log = Http_getter.update () in - return_html_msg (Ui_logger.html_of_html_msg log) outchan + return_html_msg (HelmLogger.html_of_html_msg log) outchan | "/list_servers" -> return_list_servers outchan | "/add_server" -> let name = req#param "url" in @@ -235,7 +239,7 @@ let callback (req: Http_types.request) outchan = let log = Http_getter.add_server ~position name in return_html_msg (sprintf "Added server %s in position %d)
\n%s" - name position (Ui_logger.html_of_html_msg log)) + name position (HelmLogger.html_of_html_msg log)) outchan | "/remove_server" -> let position = parse_position req in @@ -247,7 +251,7 @@ let callback (req: Http_types.request) outchan = in return_html_msg (sprintf "Removed server at position %d
\n%s" - position (Ui_logger.html_of_html_msg log)) + position (HelmLogger.html_of_html_msg log)) outchan | "/getalluris" -> return_all_xml_uris outchan | "/getallrdfuris" -> return_all_rdf_uris (parse_rdf_class req) outchan @@ -279,13 +283,16 @@ let callback (req: Http_types.request) outchan = (* Main *) let main () = + Helm_registry.load_from configuration_file; + Http_getter_env.reload (); print_string (Http_getter_env.env_to_string ()); flush stdout; at_exit Http_getter.close_maps; Sys.catch_break true; try - Http_daemon.start' - ~timeout:(Some 600) ~port:Http_getter_env.port ~mode:`Thread callback + Http_daemon.start' ~mode:`Thread + ~timeout:(Some 600) ~port:(Helm_registry.get_int "getter.port") + callback with Sys.Break -> () (* 'close_maps' already registered with 'at_exit' *) let _ = main () diff --git a/helm/http_getter/threadSafe.ml b/helm/http_getter/threadSafe.ml deleted file mode 100644 index 164b5b477..000000000 --- a/helm/http_getter/threadSafe.ml +++ /dev/null @@ -1,98 +0,0 @@ -(* - * Copyright (C) 2003-2004: - * Stefano Zacchiroli - * for the HELM Team http://helm.cs.unibo.it/ - * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://helm.cs.unibo.it/ - *) - -open Http_getter_debugger;; -let debug_print _ = ();; (* override debugger 'debug_print' *) - -class threadSafe = - object (self) - - val mutex = Mutex.create () - - (** condition variable: 'no readers is currently reading' *) - val noReaders = Condition.create () - - (** readers count *) - val mutable readersCount = 0 - - method private incrReadersCount = (* internal, not exported *) - self#doCritical (lazy ( - readersCount <- readersCount + 1 - )) - - method private decrReadersCount = (* internal, not exported *) - self#doCritical (lazy ( - if readersCount > 0 then readersCount <- readersCount - 1; - )) - - method private signalNoReaders = (* internal, not exported *) - self#doCritical (lazy ( - if readersCount = 0 then Condition.signal noReaders - )) - - method private doCritical: 'a. 'a lazy_t -> 'a = - fun action -> - debug_print ""; - (try - Mutex.lock mutex; - let res = Lazy.force action in - Mutex.unlock mutex; - debug_print ""; - res - with e -> - Mutex.unlock mutex; - raise e); - - method private doReader: 'a. 'a lazy_t -> 'a = - fun action -> - debug_print ""; - let cleanup () = - self#decrReadersCount; - self#signalNoReaders - in - self#incrReadersCount; - let res = (try Lazy.force action with e -> (cleanup (); raise e)) in - cleanup (); - debug_print ""; - res - - (* TODO may starve!!!! is what we want or not? *) - method private doWriter: 'a. 'a lazy_t -> 'a = - fun action -> - debug_print ""; - self#doCritical (lazy ( - while readersCount > 0 do - Condition.wait noReaders mutex - done; - let res = Lazy.force action in - debug_print ""; - res - )) - - end - diff --git a/helm/http_getter/threadSafe.mli b/helm/http_getter/threadSafe.mli deleted file mode 100644 index 0023c89e6..000000000 --- a/helm/http_getter/threadSafe.mli +++ /dev/null @@ -1,44 +0,0 @@ -(* - * Copyright (C) 2003-2004: - * Stefano Zacchiroli - * for the HELM Team http://helm.cs.unibo.it/ - * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://helm.cs.unibo.it/ - *) - -class threadSafe: - object - - (** execute 'action' in mutual exclusion between all other threads *) - method private doCritical: 'a lazy_t -> 'a - - (** execute 'action' acting as a 'reader' i.e.: multiple readers can act - at the same time but no writer can act until no readers are acting *) - method private doReader: 'a lazy_t -> 'a - - (** execute 'action' acting as a 'writer' i.e.: when a writer is acting, - no readers or writer can act, beware that writers can starve *) - method private doWriter: 'a lazy_t -> 'a - - end - -- 2.39.2