+++ /dev/null
-http_getter_cache.cmo: http_getter_common.cmi http_getter_debugger.cmi \
- http_getter_env.cmi http_getter_misc.cmi http_getter_types.cmo \
- threadSafe.cmi http_getter_cache.cmi
-http_getter_cache.cmx: http_getter_common.cmx http_getter_debugger.cmx \
- http_getter_env.cmx http_getter_misc.cmx http_getter_types.cmx \
- threadSafe.cmx http_getter_cache.cmi
-http_getter_common.cmo: http_getter_env.cmi http_getter_misc.cmi \
- http_getter_types.cmo http_getter_common.cmi
-http_getter_common.cmx: http_getter_env.cmx http_getter_misc.cmx \
- http_getter_types.cmx http_getter_common.cmi
-http_getter_const.cmo: http_getter_const.cmi
-http_getter_const.cmx: http_getter_const.cmi
-http_getter_debugger.cmo: http_getter_debugger.cmi
-http_getter_debugger.cmx: http_getter_debugger.cmi
-http_getter_env.cmo: http_getter_const.cmi http_getter_misc.cmi \
- http_getter_types.cmo http_getter_env.cmi
-http_getter_env.cmx: http_getter_const.cmx http_getter_misc.cmx \
- http_getter_types.cmx http_getter_env.cmi
-http_getter_map.cmo: threadSafe.cmi http_getter_map.cmi
-http_getter_map.cmx: threadSafe.cmx http_getter_map.cmi
-http_getter_misc.cmo: http_getter_debugger.cmi http_getter_misc.cmi
-http_getter_misc.cmx: http_getter_debugger.cmx http_getter_misc.cmi
-http_getter.cmo: http_getter.cmi http_getter_common.cmi http_getter_const.cmi \
- http_getter_env.cmi http_getter_map.cmi http_getter_types.cmo \
- http_getter.cmi
-http_getter.cmx: http_getter.cmx http_getter_common.cmx http_getter_const.cmx \
- http_getter_env.cmx http_getter_map.cmx http_getter_types.cmx \
- http_getter.cmi
-main.cmo: http_getter.cmi http_getter_cache.cmi http_getter_common.cmi \
- http_getter_const.cmi http_getter_debugger.cmi http_getter_env.cmi \
- http_getter_map.cmi http_getter_misc.cmi http_getter_types.cmo
-main.cmx: http_getter.cmx http_getter_cache.cmx http_getter_common.cmx \
- http_getter_const.cmx http_getter_debugger.cmx http_getter_env.cmx \
- http_getter_map.cmx http_getter_misc.cmx http_getter_types.cmx
-threadSafe.cmo: http_getter_debugger.cmi threadSafe.cmi
-threadSafe.cmx: http_getter_debugger.cmx threadSafe.cmi
-http_getter_cache.cmi: http_getter_types.cmo
-http_getter_common.cmi: http_getter_types.cmo
-http_getter_env.cmi: http_getter_types.cmo
-http_getter.cmi: http_getter_map.cmi
VERSION = 0.3.0
NAME = http_getter
-DISTDIR = http-getter-$(VERSION)
-EXTRA_DIST = AUTHORS COPYING NEWS README BUGS
-DOCS = doc/http_getter.conf.xml.sample
-
-REQUIRES = \
- http dbm pcre pxp shell threads zip \
- helm-logger
+REQUIRES = helm-getter helm-logger helm-registry
COMMONOPTS = -package "$(REQUIRES)" -pp camlp4o
OCAMLFIND = ocamlfind
-OCAMLC = $(OCAMLFIND) ocamlc -thread $(COMMONOPTS)
-OCAMLOPT = $(OCAMLFIND) ocamlopt -thread $(COMMONOPTS)
-OCAMLDEP = $(OCAMLFIND) ocamldep $(COMMONOPTS)
-OCAMLDOC = \
- ocamldoc \
- $(shell $(OCAMLFIND) query -i-format http) \
- $(shell $(OCAMLFIND) query -i-format dbm) \
- $(shell $(OCAMLFIND) query -i-format pcre) \
- $(shell $(OCAMLFIND) query -i-format pxp) \
- $(shell $(OCAMLFIND) query -i-format shell) \
- $(shell $(OCAMLFIND) query -i-format threads) \
- $(shell $(OCAMLFIND) query -i-format zip)
-MODULES = \
- http_getter_debugger threadSafe \
- http_getter_types http_getter_misc http_getter_const \
- http_getter_env http_getter_common http_getter_map \
- http_getter_cache http_getter
-
-OBJS = $(patsubst %,%.cmo,$(MODULES))
-OBJSOPT = $(patsubst %,%.cmx,$(MODULES))
+OCAMLC = $(OCAMLFIND) ocamlc -g $(COMMONOPTS)
+OCAMLOPT = $(OCAMLFIND) opt $(COMMONOPTS)
all: byte
byte: $(NAME)
opt: $(NAME).opt
world: byte opt
-include .depend
-depend:
- $(OCAMLDEP) *.ml *.mli > .depend
-
-%.cmi: %.mli
- $(OCAMLC) -c $<
-%.cmo: %.ml %.cmi
- $(OCAMLC) -c $<
-%.cmx: %.ml %.cmi
- $(OCAMLOPT) -c $<
-include Makefile.overrides
-$(NAME).cmo: $(NAME).ml
- $(OCAMLC) -c $<
-$(NAME).cmx: $(NAME).ml
- $(OCAMLOPT) -c $<
-$(NAME): $(OBJS) main.ml
+$(NAME): main.ml
$(OCAMLC) -linkpkg -thread -o $@ $^
-$(NAME).opt: $(OBJSOPT) main.ml
+$(NAME).opt: main.ml
$(OCAMLOPT) -linkpkg -thread -o $@ $^
-http_getter.dot: *.ml *.mli
- $(OCAMLDOC) -dot -o $@ $^
-
distclean: clean
clean:
rm -f *.cm[aiox] *.o $(NAME){,.opt} *.dot
-dist: distclean depend
- if [ -d $(DISTDIR) ]; then rm -rf $(DISTDIR); else true; fi
- mkdir $(DISTDIR)/
- mkdir $(DISTDIR)/doc
- cp $(DOCS) $(DISTDIR)/doc/
- for m in $(patsubst %, %.mli, $(MODULES)); do \
- if [ "$$m" != "http_getter_types.mli" ]; then \
- cp $$m $(DISTDIR)/; \
- fi; \
- done
- cp \
- $(patsubst %, %.ml, $(MODULES)) \
- $(NAME).ml \
- Makefile Makefile.overrides .depend \
- $(DISTDIR)/
- cp $(EXTRA_DIST) $(DISTDIR)/
- tar cvzf $(DISTDIR).tar.gz $(DISTDIR)/
- rm -rf $(DISTDIR)/
-distcheck: dist
- tar xvzf $(DISTDIR).tar.gz
- cd $(DISTDIR); make
- rm -rf $(DISTDIR)
-.PHONY: all byte dist distcheck opt world depend clean distclean
+.PHONY: all byte opt world clean distclean
+09/02/2004
+
+ Split getter in backend and frontend. Frontend is the only remaining file here
+ (namely main.ml). Backend is shipped in the helm-http_getter library (look in
+ the HELM cvs, dir ocaml/getter/).
+ -- Zack <zack@cs.unibo.it>
+
08/01/2003
Fully OCaml HTTP Getter is now available: go and burn the Perl implementation!
+++ /dev/null
-(*
- * Copyright (C) 2003-2004:
- * Stefano Zacchiroli <zack@cs.unibo.it>
- * for the HELM Team http://helm.cs.unibo.it/
- *
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://helm.cs.unibo.it/
- *)
-
-open Printf
-
-open Http_getter_common
-open Http_getter_misc
-open Http_getter_debugger
-open Http_getter_types
-
-let (index_line_sep_RE, index_sep_RE, trailing_types_RE,
- heading_cic_RE, heading_theory_RE, heading_nuprl_RE,
- heading_rdf_cic_RE, heading_rdf_theory_RE) =
- (Pcre.regexp "[ \t]+", Pcre.regexp "\r\n|\r|\n",
- Pcre.regexp "\\.types$",
- Pcre.regexp "^cic:", Pcre.regexp "^theory:", Pcre.regexp "^nuprl:",
- Pcre.regexp "^helm:rdf.*//cic:", Pcre.regexp "^helm:rdf.*//theory:")
-
- (* global maps, shared by all threads *)
-
-let cic_map = new Http_getter_map.map Http_getter_env.cic_dbm
-let nuprl_map = new Http_getter_map.map Http_getter_env.nuprl_dbm
-let rdf_map = new Http_getter_map.map Http_getter_env.rdf_dbm
-let xsl_map = new Http_getter_map.map Http_getter_env.xsl_dbm
-
-let maps = [ cic_map; nuprl_map; rdf_map; xsl_map ]
-let close_maps () = List.iter (fun m -> m#close) maps
-let clear_maps () = List.iter (fun m -> m#clear) maps
-let sync_maps () = List.iter (fun m -> m#sync) maps
-
-let map_of_uri = function
- | uri when is_cic_uri uri -> cic_map
- | uri when is_nuprl_uri uri -> nuprl_map
- | uri when is_rdf_uri uri -> rdf_map
- | uri when is_xsl_uri uri -> xsl_map
- | uri -> raise (Unresolvable_URI uri)
-
-let update_from_server logmsg server_url = (* use global maps *)
- debug_print ("Updating information from " ^ server_url);
- let xml_url_of_uri = function
- (* TODO missing sanity checks on server_url, e.g. it can contains $1 *)
- | uri when (Pcre.pmatch ~rex:heading_cic_RE uri) ->
- Pcre.replace ~rex:heading_cic_RE ~templ:server_url uri
- | uri when (Pcre.pmatch ~rex:heading_theory_RE uri) ->
- Pcre.replace ~rex:heading_theory_RE ~templ:server_url uri
- | uri when (Pcre.pmatch ~rex:heading_nuprl_RE uri) ->
- Pcre.replace ~rex:heading_nuprl_RE ~templ:server_url uri
- | uri -> raise (Invalid_URI uri)
- in
- let rdf_url_of_uri = function (* TODO as above *)
- | uri when (Pcre.pmatch ~rex:heading_rdf_cic_RE uri) ->
- Pcre.replace ~rex:heading_rdf_cic_RE ~templ:server_url uri
- | uri when (Pcre.pmatch ~rex:heading_rdf_theory_RE uri) ->
- Pcre.replace ~rex:heading_rdf_theory_RE ~templ:server_url uri
- | uri -> raise (Invalid_URI uri)
- in
- let log = ref (`T ("Processing server: " ^ server_url) :: logmsg) in
- let (xml_index, rdf_index, xsl_index) =
- (* TODO keeps index in memory, is better to keep them on temp files? *)
- (http_get (server_url ^ "/" ^ Http_getter_env.xml_index),
- http_get (server_url ^ "/" ^ Http_getter_env.rdf_index),
- http_get (server_url ^ "/" ^ Http_getter_env.xsl_index))
- in
- if (xml_index = None && rdf_index = None && xsl_index = None) then
- debug_print (sprintf "Warning: useless server %s" server_url);
- (match xml_index with
- | Some xml_index ->
- (log := `T "Updating XML db ...<br />" :: !log;
- List.iter
- (function
- | l when is_blank_line l -> () (* skip blank and commented lines *)
- | l ->
- (try
- (match Pcre.split ~rex:index_line_sep_RE l with
- | [uri; "gz"] ->
- assert (is_cic_uri uri || is_nuprl_uri uri) ;
- (map_of_uri uri)#replace
- uri ((xml_url_of_uri uri) ^ ".xml.gz")
- | [uri] ->
- assert (is_cic_uri uri || is_nuprl_uri uri) ;
- (map_of_uri uri)#replace
- uri ((xml_url_of_uri uri) ^ ".xml")
- | _ -> log := `T ("Ignoring invalid line: '" ^ l) :: !log)
- with Invalid_URI uri ->
- log := `T ("Ignoring invalid XML URI: '" ^ l) :: !log))
- (Pcre.split ~rex:index_sep_RE xml_index); (* xml_index lines *)
- log := `T "All done" :: !log)
- | None -> ());
- (match rdf_index with
- | Some rdf_index ->
- (log := `T "Updating RDF db ..." :: !log;
- List.iter
- (fun l ->
- try
- (match Pcre.split ~rex:index_line_sep_RE l with
- | [uri; "gz"] ->
- rdf_map#replace uri
- ((rdf_url_of_uri uri) ^ ".xml.gz")
- | [uri] ->
- rdf_map#replace uri ((rdf_url_of_uri uri) ^ ".xml")
- | _ -> log := `T ("Ignoring invalid line: '" ^ l) :: !log)
- with Invalid_URI uri ->
- log := `T ("Ignoring invalid RDF URI: '" ^ l) :: !log)
- (Pcre.split ~rex:index_sep_RE rdf_index); (* rdf_index lines *)
- log := `T "All done" :: !log)
- | None -> ());
- (match xsl_index with
- | Some xsl_index ->
- (log := `T "Updating XSLT db ..." :: !log;
- List.iter
- (fun l -> xsl_map#replace l (server_url ^ "/" ^ l))
- (Pcre.split ~rex:index_sep_RE xsl_index);
- log := `T "All done" :: !log)
- | None -> ());
- debug_print "done with this server";
- !log
-
-let update_from_all_servers () = (* use global maps *)
- clear_maps ();
- let log =
- List.fold_left
- update_from_server
- [] (* initial logmsg: empty *)
- (* reverse order: 1st server is the most important one *)
- (List.map snd (List.rev (Http_getter_env.servers ())))
- in
- sync_maps ();
- `Msg (`L (List.rev log))
-
-let update_from_one_server server_url =
- let log = update_from_server [] server_url in
- `Msg (`L (List.rev log))
-
-let temp_file_of_uri uri =
- let flat_string s s' c =
- let cs = String.copy s in
- for i = 0 to (String.length s) - 1 do
- if String.contains s' s.[i] then cs.[i] <- c
- done;
- cs
- in
- let user = try Unix.getlogin () with _ -> "" in
- Filename.open_temp_file (user ^ flat_string uri ".-=:;!?/&" '_') ""
-
-(* API *)
-
-let help () = Http_getter_const.usage_string (Http_getter_env.env_to_string ())
-
-let resolve uri =
- try
- (map_of_uri uri)#resolve uri
- with Http_getter_map.Key_not_found _ -> raise (Unresolvable_URI uri)
-
- (* Warning: this fail if uri is already registered *)
-let register ~uri ~url = (map_of_uri uri)#add uri url
-
-let update () = update_from_all_servers ()
-
-let getxml ?(format = Enc_normal) ?(patch_dtd = true) uri =
- let url = resolve uri in
- let (fname, outchan) = temp_file_of_uri uri in
- Http_getter_cache.respond_xml ~uri ~url ~enc:format ~patch:patch_dtd outchan;
- close_out outchan;
- fname
-
-let getxslt ?(patch_dtd = true) uri =
- let url = resolve uri in
- let (fname, outchan) = temp_file_of_uri uri in
- Http_getter_cache.respond_xsl ~url ~patch:patch_dtd outchan;
- close_out outchan;
- fname
-
-let getdtd ?(patch_dtd = true) uri =
- let url = Http_getter_env.dtd_dir ^ "/" ^ uri in
- let (fname, outchan) = temp_file_of_uri uri in
- Http_getter_cache.respond_dtd ~url ~patch:patch_dtd outchan;
- close_out outchan;
- fname
-
-let clean_cache () = Http_getter_cache.clean ()
-
-let list_servers () = Http_getter_env.servers ()
-
-let add_server ?(position = 0) name =
- if position = 0 then begin
- Http_getter_env.add_server ~position:0 name;
- update_from_one_server name (* quick update (new server only) *)
- end else if position > 0 then begin
- Http_getter_env.add_server ~position name;
- update ()
- end else (* already checked bt parse_position *)
- assert false
-
-let remove_server position =
- let server_name =
- try
- List.assoc position (Http_getter_env.servers ())
- with Not_found ->
- raise (Invalid_argument (sprintf "no server with position %d" position))
- in
- Http_getter_env.remove_server position;
- update ()
-
-let return_uris map filter =
- let uris = ref [] in
- map#iter (fun uri _ -> if filter uri then uris := uri :: !uris);
- List.rev !uris
-
-let getalluris () =
- let filter uri =
- (Pcre.pmatch ~rex:heading_cic_RE uri) &&
- not (Pcre.pmatch ~rex:trailing_types_RE uri)
- in
- return_uris cic_map filter
-
-let getallrdfuris classs =
- let filter =
- let base = "^helm:rdf:www\\.cs\\.unibo\\.it/helm/rdf/" in
- match classs with
- | `Forward -> (fun uri -> Pcre.pmatch ~pat:(base ^ "forward") uri)
- | `Backward -> (fun uri -> Pcre.pmatch ~pat:(base ^ "backward") uri)
- in
- return_uris rdf_map filter
-
-let ls =
- let (++) (oldann, oldtypes, oldbody, oldtree)
- (newann, newtypes, newbody, newtree) =
- ((if newann > oldann then newann else oldann),
- (if newtypes > oldtypes then newtypes else oldtypes),
- (if newbody > oldbody then newbody else oldbody),
- (if newtree > oldtree then newtree else oldtree))
- in
- let basepart_RE =
- Pcre.regexp
- "^([^.]*\\.[^.]*)((\\.body)|(\\.proof_tree)|(\\.types))?(\\.ann)?$"
- in
- let (types_RE, types_ann_RE, body_RE, body_ann_RE,
- proof_tree_RE, proof_tree_ann_RE) =
- (Pcre.regexp "\\.types$", Pcre.regexp "\\.types\\.ann$",
- Pcre.regexp "\\.body$", Pcre.regexp "\\.body\\.ann$",
- Pcre.regexp "\\.proof_tree$", Pcre.regexp "\\.proof_tree\\.ann$")
- in
- let (slash_RE, til_slash_RE, no_slashes_RE) =
- (Pcre.regexp "/", Pcre.regexp "^.*/", Pcre.regexp "^[^/]*$")
- in
- fun lsuri ->
- let pat =
- "^" ^
- (match lsuri with Cic p -> ("cic:" ^ p) | Theory p -> ("theory:" ^ p))
- in
- let (dir_RE, obj_RE) =
- (Pcre.regexp (pat ^ "/"), Pcre.regexp (pat ^ "(\\.|$)"))
- in
- let dirs = ref StringSet.empty in
- let objs = Hashtbl.create 17 in
- let store_dir d =
- dirs := StringSet.add (List.hd (Pcre.split ~rex:slash_RE d)) !dirs
- in
- let store_obj o =
- let basepart = Pcre.replace ~rex:basepart_RE ~templ:"$1" o in
- let no_flags = false, No, No, No in
- let oldflags =
- try
- Hashtbl.find objs basepart
- with Not_found -> (* no ann, no types, no body, no proof tree *)
- no_flags
- in
- let newflags =
- match o with
- | s when Pcre.pmatch ~rex:types_RE s -> (false, Yes, No, No)
- | s when Pcre.pmatch ~rex:types_ann_RE s -> (true, Ann, No, No)
- | s when Pcre.pmatch ~rex:body_RE s -> (false, No, Yes, No)
- | s when Pcre.pmatch ~rex:body_ann_RE s -> (true, No, Ann, No)
- | s when Pcre.pmatch ~rex:proof_tree_RE s -> (false, No, No, Yes)
- | s when Pcre.pmatch ~rex:proof_tree_ann_RE s -> (true, No, No, Ann)
- | s -> no_flags
- in
- Hashtbl.replace objs basepart (oldflags ++ newflags)
- in
- cic_map#iter
- (* BLEARGH Dbm module lacks support for fold-like functions *)
- (fun key _ ->
- match key with
- | uri when Pcre.pmatch ~rex:dir_RE uri -> (* directory hit *)
- let localpart = Pcre.replace ~rex:dir_RE uri in
- if Pcre.pmatch ~rex:no_slashes_RE localpart then
- store_obj localpart
- else
- store_dir localpart
- | uri when Pcre.pmatch ~rex:obj_RE uri -> (* file hit *)
- store_obj (Pcre.replace ~rex:til_slash_RE uri)
- | uri -> () (* miss *));
- let ls_items = ref [] in
- StringSet.iter (fun dir -> ls_items := Ls_section dir :: !ls_items) !dirs;
- Http_getter_misc.hashtbl_sorted_iter
- (fun uri (annflag, typesflag, bodyflag, treeflag) ->
- ls_items :=
- Ls_object {
- uri = uri; ann = annflag;
- types = typesflag; body = typesflag; proof_tree = treeflag
- } :: !ls_items)
- objs;
- List.rev !ls_items
-
+++ /dev/null
-(*
- * Copyright (C) 2003-2004:
- * Stefano Zacchiroli <zack@cs.unibo.it>
- * for the HELM Team http://helm.cs.unibo.it/
- *
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://helm.cs.unibo.it/
- *)
-
-open Http_getter_types
-
- (** {2 Getter Web Service interface as API *)
-
-val help: unit -> string
-val resolve: string -> string (* uri -> url *)
-val register: uri:string -> url:string -> unit
-val update: unit -> Ui_logger.html_msg
-val getxml : ?format:encoding -> ?patch_dtd:bool -> string -> string
-val getxslt : ?patch_dtd:bool -> string -> string
-val getdtd : ?patch_dtd:bool -> string -> string
-val clean_cache: unit -> unit
-val list_servers: unit -> (int * string) list
-val add_server: ?position:int -> string -> Ui_logger.html_msg
-val remove_server: int -> Ui_logger.html_msg
-val getalluris: unit -> string list
-val getallrdfuris: [ `Forward | `Backward ] -> string list
-val ls: xml_uri -> ls_item list
-
- (** {2 Misc} *)
-
-val close_maps: unit -> unit
-val update_from_one_server: string -> Ui_logger.html_msg
-
+++ /dev/null
-(*
- * Copyright (C) 2003-2004:
- * Stefano Zacchiroli <zack@cs.unibo.it>
- * for the HELM Team http://helm.cs.unibo.it/
- *
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://helm.cs.unibo.it/
- *)
-
-(* TODO cache expires control!!! *)
-(* TODO uwobo loop:
- if two large proof (already in cache) are requested at the same time by two
- clients, uwobo (java implementation, not yet tested with the OCaml one)
- starts looping sending output to one of the client *)
-
-open Http_getter_common;;
-open Http_getter_debugger;;
-open Http_getter_misc;;
-open Http_getter_types;;
-open Printf;;
-
- (* expose ThreadSafe.threadSafe methods *)
-class threadSafe =
- object
- inherit ThreadSafe.threadSafe
- method virtual doCritical : 'a. 'a lazy_t -> 'a
- method virtual doReader : 'a. 'a lazy_t -> 'a
- method virtual doWriter : 'a. 'a lazy_t -> 'a
- end
-;;
-let threadSafe = new threadSafe ;;
-
-let resource_type_of_url = function
- | url when Pcre.pmatch ~pat:"\\.xml\\.gz$" url -> Enc_gzipped
- | url when Pcre.pmatch ~pat:"\\.xml$" url -> Enc_normal
- | url -> raise (Invalid_URL url)
-let extension_of_resource_type = function
- | Enc_normal -> "xml"
- | Enc_gzipped -> "xml.gz"
-
- (* basename = resource name without trailing ".gz", if any *)
-let is_in_cache basename =
- Sys.file_exists
- (match Http_getter_env.cache_mode with
- | Enc_normal -> basename
- | Enc_gzipped -> basename ^ ".gz")
-
-let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan =
- let resource_type = resource_type_of_url url in
- let extension = extension_of_resource_type resource_type in
- let downloadname =
- match uri_of_string uri with (* parse uri *)
- | Cic_uri (Cic baseuri) | Cic_uri (Theory baseuri) ->
- (* assumption: baseuri starts with "/" *)
- sprintf "%s%s.%s" Http_getter_env.cic_dir baseuri extension
- | Nuprl_uri baseuri ->
- (* assumption: baseuri starts with "/" *)
- sprintf "%s%s.%s" Http_getter_env.nuprl_dir baseuri extension
- | Rdf_uri (prefix, ((Cic baseuri) as qbaseuri))
- | Rdf_uri (prefix, ((Theory baseuri) as qbaseuri)) ->
- let escaped_prefix =
- (Pcre.replace ~pat:"/" ~templ:"_"
- (Pcre.replace ~pat:"_" ~templ:"__"
- (prefix ^
- (match qbaseuri with
- | Cic _ -> "//cic:"
- | Theory _ -> "//theory:"))))
- in
- sprintf "%s/%s%s.%s"
- Http_getter_env.rdf_dir escaped_prefix baseuri extension
- in
- let patch_fun =
- if patch then Http_getter_common.patch_xml else (fun x -> x)
- in
- let basename = Pcre.replace ~pat:"\\.gz$" downloadname in
- let contype = "text/xml" in
- (* Fill cache if needed and return a short circuit file.
- Short circuit is needed in situations like:
- resource type = normal, cache type = gzipped, required encoding = normal
- we would like to avoid normal -> gzipped -> normal conversions. To avoid
- this tmp_short_circuit is used to remember the name of the intermediate
- file name *)
- let fill_cache () =
- threadSafe#doWriter (lazy(
- if not (is_in_cache basename) then begin (* cache MISS *)
- debug_print "Cache MISS :-(";
- mkdir ~parents:true (Filename.dirname downloadname);
- match (resource_type, Http_getter_env.cache_mode) with
- | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped ->
- wget ~output:downloadname url;
- None
- | Enc_normal, Enc_gzipped -> (* resource normal, cache gzipped *)
- let tmp = tempfile () in
- wget ~output:tmp url;
- gzip ~output:(basename ^ ".gz") ~keep:true tmp; (* fill cache *)
- if enc = Enc_normal then (* user wants normal: don't delete it! *)
- Some (tmp, enc)
- else begin
- Sys.remove tmp;
- None
- end
- | Enc_gzipped, Enc_normal -> (* resource gzipped, cache normal *)
- let tmp = tempfile () in
- wget ~output:tmp url;
- gunzip ~output:basename ~keep:true tmp; (* fill cache *)
- if enc = Enc_gzipped then (* user wants gzipped: don't delete it! *)
- Some (tmp, enc)
- else begin
- Sys.remove tmp;
- None
- end
- end else begin
- debug_print "Cache HIT :-)";
- None
- end
- )) in
- let tmp_short_circuit = fill_cache () in
- threadSafe#doReader (lazy(
- assert (is_in_cache basename);
- match (enc, Http_getter_env.cache_mode) with
- | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped ->
- (* resource in cache is already in the required format *)
- (match enc with
- | Enc_normal ->
- debug_print "No format mangling required (encoding = normal)";
- return_file ~fname:basename ~contype ~patch_fun outchan
- | Enc_gzipped ->
- debug_print "No format mangling required (encoding = gzipped)";
- return_file
- ~fname:(basename ^ ".gz") ~contype ~contenc:"x-gzip"
- ~patch_fun ~gunzip:true
- outchan)
- | Enc_normal, Enc_gzipped | Enc_gzipped, Enc_normal ->
- (match tmp_short_circuit with
- | None -> (* no short circuit possible, use cache *)
- debug_print "No short circuit available, use cache";
- let tmp = tempfile () in
- (match enc with
- | Enc_normal ->
- (* required format is normal, cached version is gzipped *)
- gunzip (* gunzip to tmp *)
- ~output:tmp ~keep:true (basename ^ ".gz");
- return_file ~fname:tmp ~contype ~patch_fun outchan;
- | Enc_gzipped ->
- (* required format is gzipped, cached version is normal *)
- gzip ~output:tmp ~keep:true basename; (* gzip to tmp *)
- return_file
- ~fname:tmp ~contype ~contenc:"x-gzip"
- ~patch_fun ~gunzip:true
- outchan);
- Sys.remove tmp
- | Some (fname, Enc_normal) -> (* short circuit available, use it! *)
- debug_print "Using short circuit (encoding = normal)";
- return_file ~fname ~contype ~patch_fun outchan;
- Sys.remove fname
- | Some (fname, Enc_gzipped) -> (* short circuit available, use it! *)
- debug_print "Using short circuit (encoding = gzipped)";
- return_file
- ~fname ~contype ~contenc:"x-gzip" ~patch_fun ~gunzip:true outchan;
- Sys.remove fname)
- ))
-;;
-
- (* TODO enc is not yet supported *)
-let respond_xsl ?(enc = Enc_normal) ?(patch = true) ~url outchan =
- let patch_fun =
- if patch then Http_getter_common.patch_xsl else (fun x -> x)
- in
- let fname = tempfile () in
- wget ~output:fname url;
- return_file ~fname ~contype:"text/xml" ~patch_fun outchan;
- Sys.remove fname
-;;
-
- (* TODO enc is not yet supported *)
-let respond_dtd ?(enc = Enc_normal) ?(patch = true) ~url outchan =
- let patch_fun =
- if patch then Http_getter_common.patch_dtd else (fun x -> x)
- in
- if Sys.file_exists url then
- (* TODO check this: old getter here used text/xml *)
- return_file ~fname:url ~contype:"text/plain" ~patch_fun outchan
- else
- return_html_error ("Can't find DTD: " ^ url) outchan
-;;
-
-let clean () =
- let module E = Http_getter_env in
- List.iter
- (function dir -> ignore (Unix.system ("rm -rf " ^ dir ^ "/*"))
- ) [E.cic_dir ; E.nuprl_dir ; E.rdf_dir ]
-;;
+++ /dev/null
-(*
- * Copyright (C) 2003-2004:
- * Stefano Zacchiroli <zack@cs.unibo.it>
- * for the HELM Team http://helm.cs.unibo.it/
- *
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://helm.cs.unibo.it/
- *)
-
-open Http_getter_types;;
-
-val respond_xml:
- ?enc:encoding -> ?patch:bool -> url:string -> uri:string -> out_channel ->
- unit
-
-val respond_xsl:
- ?enc:encoding -> ?patch:bool -> url:string -> out_channel ->
- unit
-
-val respond_dtd:
- ?enc:encoding -> ?patch:bool -> url:string -> out_channel ->
- unit
-
-val clean: unit -> unit
-
+++ /dev/null
-(*
- * Copyright (C) 2003-2004:
- * Stefano Zacchiroli <zack@cs.unibo.it>
- * for the HELM Team http://helm.cs.unibo.it/
- *
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://helm.cs.unibo.it/
- *)
-
-open Http_getter_types;;
-open Printf;;
-
-let string_of_ls_flag = function No -> "NO" | Yes -> "YES" | Ann -> "ANN"
-let string_of_encoding = function
- | Enc_normal -> "Normal"
- | Enc_gzipped -> "GZipped"
-
-let is_cic_obj_uri uri = Pcre.pmatch ~pat:"^cic:" uri
-let is_theory_uri uri = Pcre.pmatch ~pat:"^theory:" uri
-let is_cic_uri uri = is_cic_obj_uri uri || is_theory_uri uri
-let is_nuprl_uri uri = Pcre.pmatch ~pat:"^nuprl:" uri
-let is_rdf_uri uri = Pcre.pmatch ~pat:"^helm:rdf(.*):(.*)//(.*)" uri
-let is_xsl_uri uri = Pcre.pmatch ~pat:"^\\w+\\.xsl" uri
-
-let rec uri_of_string = function
- | uri when is_rdf_uri uri ->
- (match Pcre.split ~pat:"//" uri with
- | [ prefix; uri ] ->
- let rest =
- match uri_of_string uri with
- | Cic_uri xmluri -> xmluri
- | _ -> raise (Invalid_URI uri)
- in
- Rdf_uri (prefix, rest)
- | _ -> raise (Invalid_URI uri))
- | uri when is_cic_uri uri -> Cic_uri (Cic (Pcre.replace ~pat:"^cic:" uri))
- | uri when is_nuprl_uri uri -> Nuprl_uri (Pcre.replace ~pat:"^nuprl:" uri)
- | uri when is_theory_uri uri ->
- Cic_uri (Theory (Pcre.replace ~pat:"^theory:" uri))
- | uri -> raise (Invalid_URI uri)
-
-let patch_xml line =
- Pcre.replace
- ~pat:(sprintf "DOCTYPE (.*) SYSTEM\\s+\"%s/" Http_getter_env.dtd_base_url)
- ~templ:(
- sprintf "DOCTYPE $1 SYSTEM \"%s/getdtd?uri=" Http_getter_env.my_own_url)
- line
-let patch_xsl =
- let mk_patch_fun tag line =
- Pcre.replace
- ~pat:(sprintf "%s\\s+href=\"" tag)
- ~templ:(
- sprintf "%s href=\"%s/getxslt?uri=" tag Http_getter_env.my_own_url)
- line
- in
- let (patch_import, patch_include) =
- (mk_patch_fun "xsl:import", mk_patch_fun "xsl:include")
- in
- fun line -> patch_include (patch_import line)
-let patch_dtd line =
- Pcre.replace
- ~pat:"ENTITY (.*) SYSTEM\\s+\""
- ~templ:(
- sprintf "ENTITY $1 SYSTEM \"%s/getdtd?uri=" Http_getter_env.my_own_url)
- line
-
-let pp_error s =
- sprintf "<html><body>Http Getter error: %s</body></html>" s
-let pp_internal_error s =
- sprintf "<html><body>Http Getter Internal error: %s</body></html>" s
-let pp_msg s = sprintf "<html><body>%s</body></html>" s
-let null_pp s = s
-
-let mk_return_fun pp_fun contype msg outchan =
- Http_daemon.respond
- ~body:(pp_fun msg) ~headers:["Content-Type", contype] outchan
-
-let return_html_error = mk_return_fun pp_error "text/html"
-let return_html_internal_error = mk_return_fun pp_internal_error "text/html"
-let return_html_msg = mk_return_fun pp_msg "text/html"
-let return_html_raw = mk_return_fun null_pp "text/html"
-let return_xml_raw = mk_return_fun null_pp "text/xml"
-let return_file
- ~fname ?contype ?contenc ?(patch_fun = fun x -> x) ?(gunzip = false) outchan
- =
- let headers =
- match (contype, contenc) with
- | (Some t, Some e) -> ["Content-Encoding", e; "Content-Type", t]
- | (Some t, None) -> ["Content-Type" , t]
- | (None, Some e) -> ["Content-Encoding", e]
- | (None, None) -> []
- in
- Http_daemon.send_basic_headers ~code:200 outchan;
- Http_daemon.send_headers headers outchan;
- Http_daemon.send_CRLF outchan;
- if gunzip then begin (* gunzip needed, uncompress file, apply patch_fun to
- it, compress the result and sent it to client *)
- let (tmp1, tmp2) =
- (Http_getter_misc.tempfile (), Http_getter_misc.tempfile ())
- in
- Http_getter_misc.gunzip ~keep:true ~output:tmp1 fname; (* gunzip to tmp1 *)
- let new_file = open_out tmp2 in
- Http_getter_misc.iter_file (* tmp2 = patch(tmp1) *)
- (fun line -> output_string new_file (patch_fun line ^ "\n"))
- tmp1;
- close_out new_file;
- Http_getter_misc.gzip ~output:tmp1 tmp2; (* tmp1 = gzip(tmp2); rm tmp2 *)
- Http_getter_misc.iter_file (* send tmp1 to client as is*)
- (fun line -> output_string outchan (line ^ "\n"))
- tmp1;
- Sys.remove tmp1 (* rm tmp1 *)
- end else (* no need to gunzip, apply patch_fun directly to file *)
- Http_getter_misc.iter_file
- (fun line -> output_string outchan (patch_fun line ^ "\n"))
- fname
-;;
-let return_400 body outchan = Http_daemon.respond_error ~code:400 ~body outchan
-
+++ /dev/null
-(*
- * Copyright (C) 2003-2004:
- * Stefano Zacchiroli <zack@cs.unibo.it>
- * for the HELM Team http://helm.cs.unibo.it/
- *
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://helm.cs.unibo.it/
- *)
-
-open Http_getter_types;;
-
-val string_of_ls_flag: ls_flag -> string
-val string_of_encoding: encoding -> string
-
-val is_cic_uri: string -> bool
-val is_nuprl_uri: string -> bool
-val is_rdf_uri: string -> bool
-val is_xsl_uri: string -> bool
-
-val uri_of_string: string -> uri
-
-val patch_xml : string -> string
-val patch_xsl : string -> string
-val patch_dtd : string -> string
-
-val return_html_error: string -> out_channel -> unit
-val return_html_internal_error: string -> out_channel -> unit
- (** return an HTML HTTP response from the given string, embedding it in an
- "H1" element of an HTML page; content-type is set to text/html *)
-val return_html_msg: string -> out_channel -> unit
- (** return an HTTP response using given string as content; content-type is set
- to text/html *)
-val return_html_raw: string -> out_channel -> unit
- (** return an HTTP response using given string as content; content-type is set
- to text/xml *)
-val return_xml_raw: string -> out_channel -> unit
- (** return a bad request http response *)
-val return_400: string -> out_channel -> unit
- (**
- @param fname name of the file to be sent
- @param contype Content-Type header value
- @param contenc Content-Enconding header value
- @param patch_fun function used to patch file contents
- @param gunzip is meaningful only if a patch function is provided. If gunzip
- is true patch_fun is applied to the uncompressed version of the file. The file
- is then compressed again and send to client
- @param outchan output channel over which sent file fname *)
-val return_file:
- fname:string ->
- ?contype:string -> ?contenc:string ->
- ?patch_fun:(string -> string) -> ?gunzip:bool ->
- out_channel ->
- unit
-
+++ /dev/null
-(*
- * Copyright (C) 2003-2004:
- * Stefano Zacchiroli <zack@cs.unibo.it>
- * for the HELM Team http://helm.cs.unibo.it/
- *
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://helm.cs.unibo.it/
- *)
-
-open Printf;;
-
-let version = "0.3.0"
-let conffile = "http_getter.conf.xml"
-
- (* TODO provide a better usage string *)
-let usage_string configuration =
- sprintf
-"
-<html>
- <head>
- <title>HTTP Getter's help message</title>
- </head>
- <body>
- <h1>HTTP Getter, version %s</h1>
- <h2>Usage information</h2>
- <p>
- Usage: <kbd>http://hostname:getterport/</kbd><em>command</em>
- </p>
- <p>
- Available commands:
- </p>
- <p>
- <b><kbd>help</kbd></b><br />
- display this help message
- </p>
- <p>
- <b><kbd>getxml?uri=URI[&format=(normal|gz)][&patch_dtd=(yes|no)]</kbd></b><br />
- </p>
- <p>
- <b><kbd>register?uri=URI&url=URL</kbd></b><br />
- </p>
- <p>
- <b><kbd>resolve?uri=URI</kbd></b><br />
- </p>
- <p>
- <b><kbd>getdtd?uri=URI[&patch_dtd=(yes|no)]</kbd></b><br />
- </p>
- <p>
- <b><kbd>getxslt?uri=URI[&patch_dtd=(yes|no)]</kbd></b><br />
- </p>
- <p>
- <b><kbd>list_servers</kbd></b><br />
- </p>
- <p>
- <b><kbd>add_server?url=URL&position=POSITION</kbd></b><br />
- </p>
- <p>
- <b><kbd>remove_server?position=POSITION</kbd></b><br />
- </p>
- <p>
- <b><kbd>update</kbd></b><br />
- </p>
- <p>
- <b><kbd>clean_cache</kbd></b><br />
- </p>
- <p>
- <b><kbd>getalluris</kbd></b><br />
- </p>
- <p>
- <b><kbd>getallrdfuris</kbd></b><br />
- </p>
- <p>
- <b><kbd>ls?baseuri=URI&format=(txt|xml)</kbd></b><br />
- </p>
- <p>
- <b><kbd>getempty</kbd></b><br />
- </p>
- <h2>Current configuration</h2>
- <pre>%s</pre>
- </body>
-</html>
-"
- version configuration
-
-let empty_xml =
-"<?xml version=\"1.0\"?>
-<!DOCTYPE empty [
- <!ELEMENT empty EMPTY>
-]>
-<empty />
-"
-
+++ /dev/null
-(*
- * Copyright (C) 2003-2004:
- * Stefano Zacchiroli <zack@cs.unibo.it>
- * for the HELM Team http://helm.cs.unibo.it/
- *
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://helm.cs.unibo.it/
- *)
-
-val version: string
-val conffile: string
-val empty_xml: string
-
- (** @return an HTML usage string including configuration information passed as
- input parameter *)
-val usage_string: string -> string
-
+++ /dev/null
-(*
- * Copyright (C) 2003-2004:
- * Stefano Zacchiroli <zack@cs.unibo.it>
- * for the HELM Team http://helm.cs.unibo.it/
- *
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://helm.cs.unibo.it/
- *)
-
-let debug = ref true
-
-(* invariant: if logfile is set, then logchan is set too *)
-let logfile = ref None
-let logchan = ref None
-
-let set_logfile f =
- (match !logchan with None -> () | Some oc -> close_out oc);
- match f with
- | Some f ->
- logfile := Some f;
- logchan := Some (open_out f)
- | None ->
- logfile := None;
- logchan := None
-
-let get_logfile () = !logfile
-
-let close_logfile () = set_logfile None
-
-let debug_print s =
- let msg = "[HTTP-Getter] " ^ s in
- if !debug then
- match (!logfile, !logchan) with
- | None, _ -> prerr_endline msg
- | Some fname, Some oc ->
- output_string oc msg;
- flush oc
- | Some _, None -> assert false
-
+++ /dev/null
-(*
- * Copyright (C) 2003-2004:
- * Stefano Zacchiroli <zack@cs.unibo.it>
- * for the HELM Team http://helm.cs.unibo.it/
- *
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://helm.cs.unibo.it/
- *)
-
- (** enable/disable debugging messages *)
-val debug: bool ref
-
- (** output a debugging message *)
-val debug_print: string -> unit
-
- (** if set to Some fname, fname will be used as a logfile, otherwise stderr
- * will be used *)
-val get_logfile: unit -> string option
-val set_logfile: string option -> unit
-val close_logfile: unit -> unit
-
+++ /dev/null
-(*
- * Copyright (C) 2003-2004:
- * Stefano Zacchiroli <zack@cs.unibo.it>
- * for the HELM Team http://helm.cs.unibo.it/
- *
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://helm.cs.unibo.it/
- *)
-
-open Printf
-open Pxp_document
-open Pxp_types
-open Pxp_yacc
-
-open Http_getter_types
-
-let version = Http_getter_const.version
-
-type setting_src =
- | Environment (* read setting from environment variables *)
- | Conffile (* read setting from configuration file *)
- | Both (* read setting from both; environment override config file *)
-
-let conf_file_tree = ref None
-
-let (conf_file, conf_dir) =
- try
- let conf_dir =
- Pcre.replace ~pat:"/$" (Sys.getenv "HELM_CONFIGURATION_DIR")
- in
- (conf_dir ^ "/" ^ Http_getter_const.conffile, conf_dir)
- with Not_found -> failwith "HELM_CONFIGURATION_DIR undefined"
-
-let safe_getenv ?(from = Both) var =
- (let rec read_from_file () =
- (match !conf_file_tree with
- | None ->
- conf_file_tree :=
- Some
- (parse_wfcontent_entity
- default_config (from_file conf_file) default_spec);
- read_from_file ()
- | Some t ->
- (try
- Some (find_element (String.lowercase var) t)#data
- with Not_found -> None))
- in
- let read_from_env () = try Some (Sys.getenv var) with Not_found -> None in
- let return_value name = function
- | Some v -> v
- | None -> failwith ("Setting " ^ name ^ " is not defined")
- in
- (match from with
- | Environment -> return_value var (read_from_env ())
- | Conffile -> return_value var (read_from_file ())
- | Both ->
- (match read_from_env () with
- | None -> return_value var (read_from_file ())
- | v -> return_value var v)))
-
-let servers_file = safe_getenv "HTTP_GETTER_SERVERS_FILE"
-
-let load_servers () =
- let pos = ref (-1) in
- List.rev (Http_getter_misc.fold_file
- (fun line servers ->
- if Http_getter_misc.is_blank_line line then
- servers
- else
- (incr pos; (!pos, line) :: servers))
- []
- servers_file)
-
-let _servers = ref (load_servers ())
-let servers () = !_servers
-
-let save_servers () =
- let oc = open_out servers_file in
- List.iter (fun (_,server) -> output_string oc (server ^ "\n")) (servers ());
- close_out oc
-let reload_servers () = _servers := load_servers ()
-
-let cic_dbm = safe_getenv "HTTP_GETTER_CIC_DBM"
-let nuprl_dbm = safe_getenv "HTTP_GETTER_NUPRL_DBM"
-let rdf_dbm = safe_getenv "HTTP_GETTER_RDF_DBM"
-let xsl_dbm = safe_getenv "HTTP_GETTER_XSL_DBM"
-let xml_index = safe_getenv "HTTP_GETTER_XML_INDEXNAME"
-let rdf_index = safe_getenv "HTTP_GETTER_RDF_INDEXNAME"
-let xsl_index = safe_getenv "HTTP_GETTER_XSL_INDEXNAME"
-let cic_dir = safe_getenv "HTTP_GETTER_CIC_DIR"
-let nuprl_dir = safe_getenv "HTTP_GETTER_NUPRL_DIR"
-let rdf_dir = safe_getenv "HTTP_GETTER_RDF_DIR"
-let dtd_dir = safe_getenv "HTTP_GETTER_DTD_DIR"
-
-let port =
- let port = safe_getenv "HTTP_GETTER_PORT" in
- try
- int_of_string port
- with Failure "int_of_string" ->
- failwith ("Invalid port value: " ^ port)
-let host =
- let buf = Buffer.create 20 in
- Shell.call ~stdout:(Shell.to_buffer buf) [Shell.cmd "hostname" ["-f"]];
- Pcre.replace ~pat:"\n+$" (Buffer.contents buf)
-let my_own_url =
- sprintf
- "http://%s%s" (* without trailing '/' *)
- host
- (if port = 80 then "" else (sprintf ":%d" port))
-let dtd_base_url = safe_getenv "HTTP_GETTER_DTD_BASE_URL"
-
-let cache_mode =
- match String.lowercase (safe_getenv "HTTP_GETTER_CACHE_MODE") with
- | "normal" -> Enc_normal
- | "gz" -> Enc_gzipped
- | mode -> failwith ("Invalid cache mode: " ^ mode)
-
-let reload () = reload_servers ()
-
-let env_to_string () =
- sprintf
-"HTTP Getter %s (the OCaml one!)
-
-cic_dbm:\t%s
-nuprl_dbm:\t%s
-rdf_dbm:\t%s
-xsl_dbm:\t%s
-xml_index:\t%s
-rdf_index:\t%s
-xsl_index:\t%s
-cic_dir:\t%s
-nuprl_dir:\t%s
-rdf_dir:\t%s
-dtd_dir:\t%s
-servers_file:\t%s
-host:\t\t%s
-port:\t\t%d
-my_own_url:\t%s
-dtd_base_url:\t%s
-cache_mode:\t%s
-conf_file:\t%s
-conf_dir:\t%s
-servers:
-\t%s
-"
- version cic_dbm nuprl_dbm rdf_dbm xsl_dbm xml_index rdf_index xsl_index
- cic_dir nuprl_dir rdf_dir dtd_dir servers_file host port my_own_url
- dtd_base_url
- (match cache_mode with Enc_normal -> "Normal" | Enc_gzipped -> "GZipped")
- conf_file conf_dir
- (String.concat "\n\t" (* (position * server) list *)
- (List.map (fun (pos, server) -> sprintf "%3d: %s" pos server)
- (servers ())))
-
-let add_server ?position url =
- (match position with
- | None ->
- _servers := !_servers @ [-1, url];
- | Some p when p > 0 ->
- let rec add_after pos = function
- | [] -> [-1, url]
- | hd :: tl when p = 1 -> hd :: (-1, url) :: tl
- | hd :: tl (* when p > 1 *) -> hd :: (add_after (pos - 1) tl)
- in
- _servers := add_after p !_servers
- | Some _ -> assert false);
- save_servers ();
- reload_servers ()
-
-let remove_server position =
- _servers := List.remove_assoc position !_servers;
- save_servers ();
- reload_servers ()
-
+++ /dev/null
-(*
- * Copyright (C) 2003-2004:
- * Stefano Zacchiroli <zack@cs.unibo.it>
- * for the HELM Team http://helm.cs.unibo.it/
- *
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://helm.cs.unibo.it/
- *)
-
-open Http_getter_types;;
-
- (* {2 general information} *)
-
-val version : string (* getter version *)
-
- (* {2 environment gathered data} *)
-
-val cic_dbm : string (* XML map DBM file for CIC *)
-val nuprl_dbm : string (* XML map DBM file for NuPRL *)
-val rdf_dbm : string (* RDF map DBM file *)
-val xsl_dbm : string (* XSL map DBM file *)
-val xml_index : string (* XMLs' index *)
-val rdf_index : string (* RDFs' index *)
-val xsl_index : string (* XSLTs' index *)
-val cic_dir : string (* XMLs' directory for CIC*)
-val nuprl_dir : string (* XMLs' directory for NuPRL*)
-val rdf_dir : string (* RDFs' directory *)
-val dtd_dir : string (* DTDs' root directory *)
-val servers_file : string (* servers.txt file *)
-val port : int (* port on which getter listens *)
-val dtd_base_url : string (* base URL for DTD downloading *)
-
- (* {2 derived data} *)
-
-val host : string (* host on which getter listens *)
-val my_own_url : string (* URL at which contact getter *)
-val servers : unit -> (int * string) list
- (* (position * server) list *)
-val cache_mode : encoding (* cached files encoding *)
-val conf_file : string (* configuration file's full path *)
-val conf_dir : string (* directory where conf_file resides *)
-
- (* {2 dynamic configuration changes} *)
-
- (* add a server to servers list in a given position (defaults to "after the
- last server", change servers file accordingly and reload servers list *)
-val add_server: ?position:int -> string -> unit
- (* remove a server from servers list, change servers file accordingly and
- reload servers list *)
-val remove_server: int -> unit
-
- (* {2 misc} *)
-
-val reload: unit -> unit (* reload configuration information *)
-val env_to_string : unit -> string (* dump a textual representation of the
- current http_getter settings on an output
- channel *)
-
+++ /dev/null
-(*
- * Copyright (C) 2003-2004:
- * Stefano Zacchiroli <zack@cs.unibo.it>
- * for the HELM Team http://helm.cs.unibo.it/
- *
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://helm.cs.unibo.it/
- *)
-
-exception Key_already_in of string;;
-exception Key_not_found of string;;
-
-class map dbname =
- let perm = 420 in (* permission 644 in decimal notation *)
- let open_dbm () = Dbm.opendbm dbname [ Dbm.Dbm_rdwr; Dbm.Dbm_create ] perm in
-
- object (self)
-
- inherit ThreadSafe.threadSafe
-
- val mutable db = open_dbm ()
-
- (*initializer Gc.finalise Dbm.close db (* close db on GC *)*)
-
- method add key value =
- self#doWriter (lazy (
- try
- Dbm.add db key value
- with Dbm.Dbm_error "Entry already exists" -> raise (Key_already_in key)
- ))
-
- method replace key value =
- self#doWriter (lazy (
- Dbm.replace db key value
- ))
-
- method remove key =
- self#doWriter (lazy (
- try
- Dbm.remove db key
- with Dbm.Dbm_error "dbm_delete" -> raise (Key_not_found key)
- ))
-
- method resolve key =
- self#doReader (lazy (
- try Dbm.find db key with Not_found -> raise (Key_not_found key)
- ))
-
- method iter (f: string -> string -> unit) =
- self#doReader (lazy (
- Dbm.iter f db
- ))
-
- method sync =
- self#doWriter (lazy (
- Dbm.close db;
- db <- open_dbm ()
- ))
-
- method clear =
- self#doWriter (lazy (
- Dbm.close db;
- List.iter
- (fun ext ->
- let file = dbname ^ ext in
- if Sys.file_exists file then Sys.remove file)
- [".dir"; ".pag"; ".db"];
- db <- open_dbm ()
- ))
-
- method close =
- self#doWriter (lazy (
- Dbm.close db
- ))
-
- end
-
+++ /dev/null
-(*
- * Copyright (C) 2003-2004:
- * Stefano Zacchiroli <zack@cs.unibo.it>
- * for the HELM Team http://helm.cs.unibo.it/
- *
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://helm.cs.unibo.it/
- *)
-
-exception Key_already_in of string
-exception Key_not_found of string
-
-class map:
- string ->
- object
- method add: string -> string -> unit
- method replace: string -> string -> unit
- method remove: string -> unit
- method resolve: string -> string
- method iter: (string -> string -> unit) -> unit
- method sync: unit
- method clear: unit
-
- method close: unit (* use with caution! *)
- end
+++ /dev/null
-(*
- * Copyright (C) 2003-2004:
- * Stefano Zacchiroli <zack@cs.unibo.it>
- * for the HELM Team http://helm.cs.unibo.it/
- *
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://helm.cs.unibo.it/
- *)
-
-open Printf
-
-open Http_getter_debugger
-
-let trailing_dot_gz_RE = Pcre.regexp "\\.gz$" (* for g{,un}zip *)
-let url_RE = Pcre.regexp "^([\\w.-]+)(:(\\d+))?(/.*)?$"
-let http_scheme_RE = Pcre.regexp ~flags:[`CASELESS] "^http://"
-let file_scheme_RE = Pcre.regexp ~flags:[`CASELESS] "^file://"
-let dir_sep_RE = Pcre.regexp "/"
-let heading_slash_RE = Pcre.regexp "^/"
-
-let bufsiz = 16384 (* for file system I/O *)
-let tcp_bufsiz = 4096 (* for TCP I/O *)
-
-let fold_file f init fname =
- let ic = open_in fname in
- let rec aux acc =
- let line = try Some (input_line ic) with End_of_file -> None in
- match line with
- | None -> acc
- | Some line -> aux (f line acc)
- in
- let res = try aux init with e -> close_in ic; raise e in
- close_in ic;
- res
-
-let iter_file f = fold_file (fun line _ -> f line) ()
-
-let hashtbl_sorted_fold f tbl init =
- let sorted_keys =
- List.sort compare (Hashtbl.fold (fun key _ keys -> key::keys) tbl [])
- in
- List.fold_left (fun acc k -> f k (Hashtbl.find tbl k) acc) init sorted_keys
-
-let hashtbl_sorted_iter f tbl =
- let sorted_keys =
- List.sort compare (Hashtbl.fold (fun key _ keys -> key::keys) tbl [])
- in
- List.iter (fun k -> f k (Hashtbl.find tbl k)) sorted_keys
-
-let cp src dst =
- let (ic, oc) = (open_in src, open_out dst) in
- let buf = String.create bufsiz in
- (try
- while true do
- let bytes = input ic buf 0 bufsiz in
- if bytes = 0 then raise End_of_file else output oc buf 0 bytes
- done
- with End_of_file -> ());
- close_in ic; close_out oc
-
-let parse_url url =
- try
- let subs =
- Pcre.extract ~rex:url_RE (Pcre.replace ~rex:http_scheme_RE url)
- in
- (subs.(1),
- (if subs.(2) = "" then 80 else int_of_string subs.(3)),
- (if subs.(4) = "" then "/" else subs.(4)))
- with exc ->
- failwith
- (sprintf "Can't parse url: %s (exception: %s)"
- url (Printexc.to_string exc))
-let init_socket addr port =
- let inet_addr = (Unix.gethostbyname addr).Unix.h_addr_list.(0) in
- let sockaddr = Unix.ADDR_INET (inet_addr, port) in
- let suck = Unix.socket Unix.PF_INET Unix.SOCK_STREAM 0 in
- Unix.connect suck sockaddr;
- let outchan = Unix.out_channel_of_descr suck in
- let inchan = Unix.in_channel_of_descr suck in
- (inchan, outchan)
-let http_get_iter_buf ~callback url =
- let (address, port, path) = parse_url url in
- let buf = String.create tcp_bufsiz in
- let (inchan, outchan) = init_socket address port in
- output_string outchan (sprintf "GET %s\r\n" path);
- flush outchan;
- (try
- while true do
- match input inchan buf 0 tcp_bufsiz with
- | 0 -> raise End_of_file
- | bytes when bytes = tcp_bufsiz -> (* buffer full, no need to slice it *)
- callback buf
- | bytes when bytes < tcp_bufsiz -> (* buffer not full, slice it *)
- callback (String.sub buf 0 bytes)
- | _ -> (* ( bytes < 0 ) || ( bytes > tcp_bufsiz ) *)
- assert false
- done
- with End_of_file -> ());
- close_in inchan (* close also outchan, same fd *)
-
-let wget ?output url =
- debug_print
- (sprintf "wgetting %s (output: %s)" url
- (match output with None -> "default" | Some f -> f));
- match url with
- | url when Pcre.pmatch ~rex:file_scheme_RE url -> (* file:// *)
- (let src_fname = Pcre.replace ~rex:file_scheme_RE url in
- match output with
- | Some dst_fname -> cp src_fname dst_fname
- | None ->
- let dst_fname = Filename.basename src_fname in
- if src_fname <> dst_fname then
- cp src_fname dst_fname
- else (* src and dst are the same: do nothing *)
- ())
- | url when Pcre.pmatch ~rex:http_scheme_RE url -> (* http:// *)
- (let oc =
- open_out (match output with Some f -> f | None -> Filename.basename url)
- in
- http_get_iter_buf ~callback:(fun data -> output_string oc data) url;
- close_out oc)
- | scheme -> (* unsupported scheme *)
- failwith ("Http_getter_misc.wget: unsupported scheme: " ^ scheme)
-
-let gzip ?(keep = false) ?output fname =
- let output = match output with None -> fname ^ ".gz" | Some fname -> fname in
- debug_print (sprintf "gzipping %s (keep: %b, output: %s)" fname keep output);
- let (ic, oc) = (open_in fname, Gzip.open_out output) in
- let buf = String.create bufsiz in
- (try
- while true do
- let bytes = input ic buf 0 bufsiz in
- if bytes = 0 then raise End_of_file else Gzip.output oc buf 0 bytes
- done
- with End_of_file -> ());
- close_in ic; Gzip.close_out oc;
- if not keep then Sys.remove fname
-;;
-
-let gunzip ?(keep = false) ?output fname =
- (* assumption: given file name ends with ".gz" or output is set *)
- let output =
- match output with
- | None ->
- if (Pcre.pmatch ~rex:trailing_dot_gz_RE fname) then
- Pcre.replace ~rex:trailing_dot_gz_RE fname
- else
- failwith
- "Http_getter_misc.gunzip: unable to determine output file name"
- | Some fname -> fname
- in
- debug_print (sprintf "gunzipping %s (keep: %b, output: %s)"
- fname keep output);
- let (ic, oc) = (Gzip.open_in fname, open_out output) in
- let buf = String.create bufsiz in
- (try
- while true do
- let bytes = Gzip.input ic buf 0 bufsiz in
- if bytes = 0 then raise End_of_file else Pervasives.output oc buf 0 bytes
- done
- with End_of_file -> ());
- Gzip.close_in ic; close_out oc;
- if not keep then Sys.remove fname
-;;
-
-let tempfile () = Filename.temp_file "http_getter_" ""
-
-exception Mkdir_failure of string * string;; (* dirname, failure reason *)
-let dir_perm = 0o755
-
-let mkdir ?(parents = false) dirname =
- let mkdirhier () =
- let (pieces, hd) =
- let split = Pcre.split ~rex:dir_sep_RE dirname in
- if Pcre.pmatch ~rex:heading_slash_RE dirname then
- (List.tl split, "/")
- else
- (split, "")
- in
- ignore
- (List.fold_left
- (fun pre dir ->
- let next_dir =
- sprintf "%s%s%s" pre (match pre with "/" | "" -> "" | _ -> "/") dir
- in
- (try
- (match (Unix.stat next_dir).Unix.st_kind with
- | Unix.S_DIR -> () (* dir component already exists, go on! *)
- | _ -> (* dir component already exists but isn't a dir, abort! *)
- raise
- (Mkdir_failure (dirname,
- sprintf "'%s' already exists but is not a dir" next_dir)))
- with Unix.Unix_error (Unix.ENOENT, "stat", _) ->
- (* dir component doesn't exists, create it and go on! *)
- Unix.mkdir next_dir dir_perm);
- next_dir)
- hd pieces)
- in
- if parents then mkdirhier () else Unix.mkdir dirname dir_perm
-
-let string_of_proc_status = function
- | Unix.WEXITED code -> sprintf "[Exited: %d]" code
- | Unix.WSIGNALED sg -> sprintf "[Killed: %d]" sg
- | Unix.WSTOPPED sg -> sprintf "[Stopped: %d]" sg
-
-let http_get url =
- if Pcre.pmatch ~rex:file_scheme_RE url then begin
- (* file:// URL. Read data from file system *)
- let fname = Pcre.replace ~rex:file_scheme_RE url in
- try
- let size = (Unix.stat fname).Unix.st_size in
- let buf = String.create size in
- let ic = open_in fname in
- really_input ic buf 0 size;
- close_in ic;
- Some buf
- with Unix.Unix_error (Unix.ENOENT, "stat", _) -> None
- end else (* other URL, pass it to Http_client *)
- try
- Some (Http_client.http_get url)
- with e ->
- prerr_endline (sprintf
- "Warning: Http_client failed on url %s with exception: %s"
- url (Printexc.to_string e));
- None
-
-let is_blank_line =
- let blank_line_RE = Pcre.regexp "(^#)|(^\\s*$)" in
- fun line ->
- Pcre.pmatch ~rex:blank_line_RE line
-
+++ /dev/null
-(*
- * Copyright (C) 2003-2004:
- * Stefano Zacchiroli <zack@cs.unibo.it>
- * for the HELM Team http://helm.cs.unibo.it/
- *
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://helm.cs.unibo.it/
- *)
-
- (** 'mkdir' failed, arguments are: name of the directory to be created and
- failure reason *)
-exception Mkdir_failure of string * string
-
- (** "fold_left" like function on file lines, trailing newline is not passed to
- the given function *)
-val fold_file : (string -> 'a -> 'a) -> 'a -> string -> 'a
- (* "iter" like function on file lines, trailing newline is not passed to the
- given function *)
-val iter_file : (string -> unit) -> string -> unit
-
- (** like Hashtbl.fold but keys are processed ordered *)
-val hashtbl_sorted_fold :
- ('a -> 'b -> 'c -> 'c) -> ('a, 'b) Hashtbl.t -> 'c -> 'c
- (** like Hashtbl.iter but keys are processed ordered *)
-val hashtbl_sorted_iter : ('a -> 'b -> unit) -> ('a, 'b) Hashtbl.t -> unit
-
- (** cp frontend *)
-val cp: string -> string -> unit
- (** wget frontend, if output is given it is the destination file, otherwise
- standard wget rules are used. Additionally this function support also the
- "file://" scheme for file system addressing *)
-val wget: ?output: string -> string -> unit
- (** gzip frontend. If keep = true original file will be kept, default is
- false. output is the file on which gzipped data will be saved, default is
- given file with an added ".gz" suffix *)
-val gzip: ?keep: bool -> ?output: string -> string -> unit
- (** gunzip frontend. If keep = true original file will be kept, default is
- false. output is the file on which gunzipped data will be saved, default is
- given file name without trailing ".gz" *)
-val gunzip: ?keep: bool -> ?output: string -> string -> unit
- (** tempfile frontend, return the name of created file. A special purpose
- suffix is used (actually "_http_getter" *)
-val tempfile: unit -> string
- (** mkdir frontend, if parents = true also parent directories will be created.
- If the given directory already exists doesn't act *)
-val mkdir: ?parents: bool -> string -> unit
-
- (** pretty printer for Unix.process_status values *)
-val string_of_proc_status : Unix.process_status -> string
-
- (** raw HTTP downloader, return Some the contents of downloaded resource or
- None if an error occured while downloading. This function support also
- "file://" scheme for filesystem resources *)
-val http_get: string -> string option
- (** 'iter' like method that iter over string slices (unspecified length) of a
- remote resources fetched via HTTP GET requests *)
-val http_get_iter_buf: callback:(string -> unit) -> string -> unit
-
- (** true on blanks-only and #-commented lines, false otherwise *)
-val is_blank_line: string -> bool
-
+++ /dev/null
-(*
- * Copyright (C) 2003-2004:
- * Stefano Zacchiroli <zack@cs.unibo.it>
- * for the HELM Team http://helm.cs.unibo.it/
- *
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://helm.cs.unibo.it/
- *)
-
-exception Bad_request of string
-exception Unresolvable_URI of string
-exception Invalid_URI of string
-exception Invalid_URL of string
-exception Invalid_RDF_class of string
-exception Internal_error of string
-
-type encoding = Enc_normal | Enc_gzipped
-type answer_format = Fmt_text | Fmt_xml
-type ls_flag = Yes | No | Ann
-type ls_object =
- {
- uri: string;
- ann: bool;
- types: ls_flag;
- body: ls_flag;
- proof_tree: ls_flag;
- }
-type ls_item =
- | Ls_section of string
- | Ls_object of ls_object
-
-type xml_uri =
- | Cic of string
- | Theory of string
-type rdf_uri = string * xml_uri
-type nuprl_uri = string
-type uri =
- | Cic_uri of xml_uri
- | Nuprl_uri of nuprl_uri
- | Rdf_uri of rdf_uri
-
-module StringSet = Set.Make (String)
-
(* constants *)
+let configuration_file = "http_getter.conf.xml"
+
let common_headers = [
"Cache-Control", "no-cache";
"Pragma", "no-cache";
<%s>
"
doctype
- Http_getter_env.my_own_url
+ (Lazy.force Http_getter_env.my_own_url)
doctype
doctype);
List.iter
| Fmt_xml ->
Buffer.add_string buf "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n";
bprintf buf "<!DOCTYPE ls SYSTEM \"%s/getdtd?uri=ls.dtd\">\n"
- Http_getter_env.my_own_url;
+ (Lazy.force Http_getter_env.my_own_url);
Buffer.add_string buf "<ls>\n";
List.iter
(function
~patch:(parse_patch req) outchan
| "/getdtd" ->
Http_getter_cache.respond_dtd ~patch:(parse_patch req)
- ~url:(Http_getter_env.dtd_dir ^ "/" ^ (req#param "uri")) outchan
+ ~url:(sprintf "%s/%s"
+ (Helm_registry.get "getter.dtd_dir") (req#param "uri"))
+ outchan
| "/resolve" -> return_resolve (req#param "uri") outchan
| "/register" ->
Http_getter.register ~uri:(req#param "uri") ~url:(req#param "url");
| "/update" ->
Http_getter_env.reload (); (* reload servers list from servers file *)
let log = Http_getter.update () in
- return_html_msg (Ui_logger.html_of_html_msg log) outchan
+ return_html_msg (HelmLogger.html_of_html_msg log) outchan
| "/list_servers" -> return_list_servers outchan
| "/add_server" ->
let name = req#param "url" in
let log = Http_getter.add_server ~position name in
return_html_msg
(sprintf "Added server %s in position %d)<br />\n%s"
- name position (Ui_logger.html_of_html_msg log))
+ name position (HelmLogger.html_of_html_msg log))
outchan
| "/remove_server" ->
let position = parse_position req in
in
return_html_msg
(sprintf "Removed server at position %d<br />\n%s"
- position (Ui_logger.html_of_html_msg log))
+ position (HelmLogger.html_of_html_msg log))
outchan
| "/getalluris" -> return_all_xml_uris outchan
| "/getallrdfuris" -> return_all_rdf_uris (parse_rdf_class req) outchan
(* Main *)
let main () =
+ Helm_registry.load_from configuration_file;
+ Http_getter_env.reload ();
print_string (Http_getter_env.env_to_string ());
flush stdout;
at_exit Http_getter.close_maps;
Sys.catch_break true;
try
- Http_daemon.start'
- ~timeout:(Some 600) ~port:Http_getter_env.port ~mode:`Thread callback
+ Http_daemon.start' ~mode:`Thread
+ ~timeout:(Some 600) ~port:(Helm_registry.get_int "getter.port")
+ callback
with Sys.Break -> () (* 'close_maps' already registered with 'at_exit' *)
let _ = main ()
+++ /dev/null
-(*
- * Copyright (C) 2003-2004:
- * Stefano Zacchiroli <zack@cs.unibo.it>
- * for the HELM Team http://helm.cs.unibo.it/
- *
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://helm.cs.unibo.it/
- *)
-
-open Http_getter_debugger;;
-let debug_print _ = ();; (* override debugger 'debug_print' *)
-
-class threadSafe =
- object (self)
-
- val mutex = Mutex.create ()
-
- (** condition variable: 'no readers is currently reading' *)
- val noReaders = Condition.create ()
-
- (** readers count *)
- val mutable readersCount = 0
-
- method private incrReadersCount = (* internal, not exported *)
- self#doCritical (lazy (
- readersCount <- readersCount + 1
- ))
-
- method private decrReadersCount = (* internal, not exported *)
- self#doCritical (lazy (
- if readersCount > 0 then readersCount <- readersCount - 1;
- ))
-
- method private signalNoReaders = (* internal, not exported *)
- self#doCritical (lazy (
- if readersCount = 0 then Condition.signal noReaders
- ))
-
- method private doCritical: 'a. 'a lazy_t -> 'a =
- fun action ->
- debug_print "<doCritical>";
- (try
- Mutex.lock mutex;
- let res = Lazy.force action in
- Mutex.unlock mutex;
- debug_print "</doCritical>";
- res
- with e ->
- Mutex.unlock mutex;
- raise e);
-
- method private doReader: 'a. 'a lazy_t -> 'a =
- fun action ->
- debug_print "<doReader>";
- let cleanup () =
- self#decrReadersCount;
- self#signalNoReaders
- in
- self#incrReadersCount;
- let res = (try Lazy.force action with e -> (cleanup (); raise e)) in
- cleanup ();
- debug_print "</doReader>";
- res
-
- (* TODO may starve!!!! is what we want or not? *)
- method private doWriter: 'a. 'a lazy_t -> 'a =
- fun action ->
- debug_print "<doWriter>";
- self#doCritical (lazy (
- while readersCount > 0 do
- Condition.wait noReaders mutex
- done;
- let res = Lazy.force action in
- debug_print "</doWriter>";
- res
- ))
-
- end
-
+++ /dev/null
-(*
- * Copyright (C) 2003-2004:
- * Stefano Zacchiroli <zack@cs.unibo.it>
- * for the HELM Team http://helm.cs.unibo.it/
- *
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://helm.cs.unibo.it/
- *)
-
-class threadSafe:
- object
-
- (** execute 'action' in mutual exclusion between all other threads *)
- method private doCritical: 'a lazy_t -> 'a
-
- (** execute 'action' acting as a 'reader' i.e.: multiple readers can act
- at the same time but no writer can act until no readers are acting *)
- method private doReader: 'a lazy_t -> 'a
-
- (** execute 'action' acting as a 'writer' i.e.: when a writer is acting,
- no readers or writer can act, beware that writers can starve *)
- method private doWriter: 'a lazy_t -> 'a
-
- end
-