From d3c72d6856cd185e5b3e9f2e8b928b78c7031ed1 Mon Sep 17 00:00:00 2001 From: Stefano Zacchiroli Date: Mon, 9 Feb 2004 13:22:00 +0000 Subject: [PATCH] split into two major parts: - backend (ocaml API) - frontend (web service) --- helm/http_getter/.depend | 17 +- helm/http_getter/Makefile | 18 +- helm/http_getter/http_getter.ml | 643 ++++++++-------------- helm/http_getter/http_getter.mli | 52 ++ helm/http_getter/http_getter_cache.ml | 10 +- helm/http_getter/http_getter_cache.mli | 12 +- helm/http_getter/http_getter_common.ml | 18 +- helm/http_getter/http_getter_common.mli | 8 +- helm/http_getter/http_getter_const.ml | 4 +- helm/http_getter/http_getter_const.mli | 2 +- helm/http_getter/http_getter_debugger.ml | 34 +- helm/http_getter/http_getter_debugger.mli | 13 +- helm/http_getter/http_getter_env.ml | 68 ++- helm/http_getter/http_getter_env.mli | 8 +- helm/http_getter/http_getter_map.ml | 2 +- helm/http_getter/http_getter_map.mli | 2 +- helm/http_getter/http_getter_misc.ml | 73 +-- helm/http_getter/http_getter_misc.mli | 13 +- helm/http_getter/http_getter_types.ml | 45 +- helm/http_getter/main.ml | 292 ++++++++++ helm/http_getter/threadSafe.ml | 2 +- helm/http_getter/threadSafe.mli | 2 +- helm/http_getter/zack.ml | 475 ---------------- helm/http_getter/zack.mli | 149 ----- 24 files changed, 750 insertions(+), 1212 deletions(-) create mode 100644 helm/http_getter/http_getter.mli create mode 100644 helm/http_getter/main.ml delete mode 100644 helm/http_getter/zack.ml delete mode 100644 helm/http_getter/zack.mli diff --git a/helm/http_getter/.depend b/helm/http_getter/.depend index 9e048f6a9..987d8acf5 100644 --- a/helm/http_getter/.depend +++ b/helm/http_getter/.depend @@ -18,18 +18,23 @@ http_getter_env.cmx: http_getter_const.cmx http_getter_misc.cmx \ http_getter_types.cmx http_getter_env.cmi http_getter_map.cmo: threadSafe.cmi http_getter_map.cmi http_getter_map.cmx: threadSafe.cmx http_getter_map.cmi -http_getter_misc.cmo: http_getter_debugger.cmi zack.cmi http_getter_misc.cmi -http_getter_misc.cmx: http_getter_debugger.cmx zack.cmx http_getter_misc.cmi -http_getter.cmo: http_getter_cache.cmi http_getter_common.cmi \ +http_getter_misc.cmo: http_getter_debugger.cmi http_getter_misc.cmi +http_getter_misc.cmx: http_getter_debugger.cmx http_getter_misc.cmi +http_getter.cmo: http_getter.cmi http_getter_common.cmi http_getter_const.cmi \ + http_getter_env.cmi http_getter_map.cmi http_getter_types.cmo \ + http_getter.cmi +http_getter.cmx: http_getter.cmx http_getter_common.cmx http_getter_const.cmx \ + http_getter_env.cmx http_getter_map.cmx http_getter_types.cmx \ + http_getter.cmi +main.cmo: http_getter.cmi http_getter_cache.cmi http_getter_common.cmi \ http_getter_const.cmi http_getter_debugger.cmi http_getter_env.cmi \ http_getter_map.cmi http_getter_misc.cmi http_getter_types.cmo -http_getter.cmx: http_getter_cache.cmx http_getter_common.cmx \ +main.cmx: http_getter.cmx http_getter_cache.cmx http_getter_common.cmx \ http_getter_const.cmx http_getter_debugger.cmx http_getter_env.cmx \ http_getter_map.cmx http_getter_misc.cmx http_getter_types.cmx threadSafe.cmo: http_getter_debugger.cmi threadSafe.cmi threadSafe.cmx: http_getter_debugger.cmx threadSafe.cmi -zack.cmo: zack.cmi -zack.cmx: zack.cmi http_getter_cache.cmi: http_getter_types.cmo http_getter_common.cmi: http_getter_types.cmo http_getter_env.cmi: http_getter_types.cmo +http_getter.cmi: http_getter_map.cmi diff --git a/helm/http_getter/Makefile b/helm/http_getter/Makefile index 8ff636e31..13e939b06 100644 --- a/helm/http_getter/Makefile +++ b/helm/http_getter/Makefile @@ -1,11 +1,13 @@ -VERSION = 0.2.3 +VERSION = 0.3.0 NAME = http_getter DISTDIR = http-getter-$(VERSION) EXTRA_DIST = AUTHORS COPYING NEWS README BUGS DOCS = doc/http_getter.conf.xml.sample -REQUIRES = http dbm pcre pxp shell threads zip +REQUIRES = \ + http dbm pcre pxp shell threads zip \ + helm-logger COMMONOPTS = -package "$(REQUIRES)" -pp camlp4o OCAMLFIND = ocamlfind OCAMLC = $(OCAMLFIND) ocamlc -thread $(COMMONOPTS) @@ -21,10 +23,10 @@ OCAMLDOC = \ $(shell $(OCAMLFIND) query -i-format threads) \ $(shell $(OCAMLFIND) query -i-format zip) MODULES = \ - http_getter_debugger threadSafe \ - http_getter_types zack http_getter_misc http_getter_const \ - http_getter_env http_getter_common http_getter_map \ - http_getter_cache + http_getter_debugger threadSafe \ + http_getter_types http_getter_misc http_getter_const \ + http_getter_env http_getter_common http_getter_map \ + http_getter_cache http_getter OBJS = $(patsubst %,%.cmo,$(MODULES)) OBJSOPT = $(patsubst %,%.cmx,$(MODULES)) @@ -49,9 +51,9 @@ $(NAME).cmo: $(NAME).ml $(OCAMLC) -c $< $(NAME).cmx: $(NAME).ml $(OCAMLOPT) -c $< -$(NAME): $(OBJS) $(NAME).ml +$(NAME): $(OBJS) main.ml $(OCAMLC) -linkpkg -thread -o $@ $^ -$(NAME).opt: $(OBJSOPT) $(NAME).ml +$(NAME).opt: $(OBJSOPT) main.ml $(OCAMLOPT) -linkpkg -thread -o $@ $^ http_getter.dot: *.ml *.mli diff --git a/helm/http_getter/http_getter.ml b/helm/http_getter/http_getter.ml index ec6564249..32c4882df 100644 --- a/helm/http_getter/http_getter.ml +++ b/helm/http_getter/http_getter.ml @@ -1,5 +1,5 @@ (* - * Copyright (C) 2003: + * Copyright (C) 2003-2004: * Stefano Zacchiroli * for the HELM Team http://helm.cs.unibo.it/ * @@ -26,255 +26,40 @@ * http://helm.cs.unibo.it/ *) -open Http_getter_common;; -open Http_getter_misc;; -open Http_getter_types;; -open Http_getter_debugger;; -open Printf;; +open Printf - (* constants *) +open Http_getter_common +open Http_getter_misc +open Http_getter_debugger +open Http_getter_types -let common_headers = [ - "Cache-Control", "no-cache"; - "Pragma", "no-cache"; - "Expires", "0" -] - - (* HTTP queries argument parsing *) - - (* parse encoding ("format" parameter), default is Enc_normal *) -let parse_enc (req: Http_types.request) = - try - (match req#param "format" with - | "normal" -> Enc_normal - | "gz" -> Enc_gzipped - | s -> raise (Http_getter_bad_request ("Invalid format: " ^ s))) - with Http_types.Param_not_found _ -> Enc_normal -;; - (* parse "patch_dtd" parameter, default is true *) -let parse_patch (req: Http_types.request) = - try - (match req#param "patch_dtd" with - | s when String.lowercase s = "yes" -> true - | s when String.lowercase s = "no" -> false - | s -> raise (Http_getter_bad_request ("Invalid patch_dtd value: " ^ s))) - with Http_types.Param_not_found _ -> true -;; - (* parse output format ("format" parameter), no default value *) -let parse_output_format (req: Http_types.request) = - match req#param "format" with - | s when String.lowercase s = "txt" -> Fmt_text - | s when String.lowercase s = "xml" -> Fmt_xml - | s -> raise (Http_getter_bad_request ("Invalid /ls format: " ^ s)) -;; - (* parse "baseuri" format for /ls method, no default value *) -let parse_ls_uri = - let parse_ls_RE = Pcre.regexp "^(\\w+):(.*)$" in - let trailing_slash_RE = Pcre.regexp "/+$" in - let wrong_uri uri = - raise (Http_getter_bad_request ("Invalid /ls baseuri: " ^ uri)) - in - fun (req: Http_types.request) -> - let baseuri = req#param "baseuri" in - try - let subs = - Pcre.extract ~rex:parse_ls_RE - (Pcre.replace ~rex:trailing_slash_RE baseuri) - in - (match (subs.(1), subs.(2)) with - | "cic", uri -> Cic uri - | "theory", uri -> Theory uri - | _ -> wrong_uri baseuri) - with Not_found -> wrong_uri baseuri -;; +let (index_line_sep_RE, index_sep_RE, trailing_types_RE, + heading_cic_RE, heading_theory_RE, heading_nuprl_RE, + heading_rdf_cic_RE, heading_rdf_theory_RE) = + (Pcre.regexp "[ \t]+", Pcre.regexp "\r\n|\r|\n", + Pcre.regexp "\\.types$", + Pcre.regexp "^cic:", Pcre.regexp "^theory:", Pcre.regexp "^nuprl:", + Pcre.regexp "^helm:rdf.*//cic:", Pcre.regexp "^helm:rdf.*//theory:") (* global maps, shared by all threads *) -let cic_map = new Http_getter_map.map Http_getter_env.cic_dbm in -let nuprl_map = new Http_getter_map.map Http_getter_env.nuprl_dbm in -let rdf_map = new Http_getter_map.map Http_getter_env.rdf_dbm in -let xsl_map = new Http_getter_map.map Http_getter_env.xsl_dbm in +let cic_map = new Http_getter_map.map Http_getter_env.cic_dbm +let nuprl_map = new Http_getter_map.map Http_getter_env.nuprl_dbm +let rdf_map = new Http_getter_map.map Http_getter_env.rdf_dbm +let xsl_map = new Http_getter_map.map Http_getter_env.xsl_dbm -let maps = [ cic_map; nuprl_map; rdf_map; xsl_map ] in -let close_maps () = List.iter (fun m -> m#close) maps in -let clear_maps () = List.iter (fun m -> m#clear) maps in -let sync_maps () = List.iter (fun m -> m#sync) maps in +let maps = [ cic_map; nuprl_map; rdf_map; xsl_map ] +let close_maps () = List.iter (fun m -> m#close) maps +let clear_maps () = List.iter (fun m -> m#clear) maps +let sync_maps () = List.iter (fun m -> m#sync) maps let map_of_uri = function | uri when is_cic_uri uri -> cic_map | uri when is_nuprl_uri uri -> nuprl_map | uri when is_rdf_uri uri -> rdf_map | uri when is_xsl_uri uri -> xsl_map - | uri -> raise (Http_getter_unresolvable_URI uri) -in -let resolve uri = - try - (map_of_uri uri)#resolve uri - with Http_getter_map.Key_not_found _ -> - raise (Http_getter_unresolvable_URI uri) -in -let register uri = - (* Warning: this fail if uri is already registered *) - (map_of_uri uri)#add uri -in -let return_all_foo_uris map doctype filter outchan = - (** return all URIs contained in 'map' which satisfy predicate 'filter'; URIs - are written in an XMLish format ('doctype' is the XML doctype) onto 'outchan' - *) - Http_daemon.send_basic_headers ~code:200 outchan; - Http_daemon.send_header "Content-Type" "text/xml" outchan; - Http_daemon.send_headers common_headers outchan; - Http_daemon.send_CRLF outchan; - output_string - outchan - (sprintf -" - - -<%s> -" - doctype - Http_getter_env.my_own_url - doctype - doctype); - map#iter - (fun uri _ -> - if filter uri then - output_string outchan (sprintf "\t\n" uri)); - output_string outchan (sprintf "\n" doctype) -in -let return_all_xml_uris = return_all_foo_uris cic_map "alluris" in -let return_all_rdf_uris = return_all_foo_uris rdf_map "allrdfuris" in -let return_ls = - let (++) (oldann, oldtypes, oldbody, oldtree) - (newann, newtypes, newbody, newtree) = - ((if newann > oldann then newann else oldann), - (if newtypes > oldtypes then newtypes else oldtypes), - (if newbody > oldbody then newbody else oldbody), - (if newtree > oldtree then newtree else oldtree)) - in - let basepart_RE = - Pcre.regexp - "^([^.]*\\.[^.]*)((\\.body)|(\\.proof_tree)|(\\.types))?(\\.ann)?$" - in - let (types_RE, types_ann_RE, body_RE, body_ann_RE, - proof_tree_RE, proof_tree_ann_RE) = - (Pcre.regexp "\\.types$", Pcre.regexp "\\.types\\.ann$", - Pcre.regexp "\\.body$", Pcre.regexp "\\.body\\.ann$", - Pcre.regexp "\\.proof_tree$", Pcre.regexp "\\.proof_tree\\.ann$") - in - let (slash_RE, til_slash_RE, no_slashes_RE) = - (Pcre.regexp "/", Pcre.regexp "^.*/", Pcre.regexp "^[^/]*$") - in - fun lsuri fmt outchan -> - let pat = - "^" ^ - (match lsuri with Cic p -> ("cic:" ^ p) | Theory p -> ("theory:" ^ p)) - in - let (dir_RE, obj_RE) = - (Pcre.regexp (pat ^ "/"), Pcre.regexp (pat ^ "(\\.|$)")) - in - let dirs = ref StringSet.empty in - let objs = Hashtbl.create 17 in - let store_dir d = - dirs := StringSet.add (List.hd (Pcre.split ~rex:slash_RE d)) !dirs - in - let store_obj o = - let basepart = Pcre.replace ~rex:basepart_RE ~templ:"$1" o in - let no_flags = false, No, No, No in - let oldflags = - try - Hashtbl.find objs basepart - with Not_found -> (* no ann, no types, no body, no proof tree *) - no_flags - in - let newflags = - match o with - | s when Pcre.pmatch ~rex:types_RE s -> (false, Yes, No, No) - | s when Pcre.pmatch ~rex:types_ann_RE s -> (true, Ann, No, No) - | s when Pcre.pmatch ~rex:body_RE s -> (false, No, Yes, No) - | s when Pcre.pmatch ~rex:body_ann_RE s -> (true, No, Ann, No) - | s when Pcre.pmatch ~rex:proof_tree_RE s -> (false, No, No, Yes) - | s when Pcre.pmatch ~rex:proof_tree_ann_RE s -> (true, No, No, Ann) - | s -> no_flags - in - Hashtbl.replace objs basepart (oldflags ++ newflags) - in - cic_map#iter (* BLEARGH Dbm module lacks support for fold-like functions *) - (fun key _ -> - match key with - | uri when Pcre.pmatch ~rex:dir_RE uri -> (* directory hit *) - let localpart = Pcre.replace ~rex:dir_RE uri in - if Pcre.pmatch ~rex:no_slashes_RE localpart then - store_obj localpart - else - store_dir localpart - | uri when Pcre.pmatch ~rex:obj_RE uri -> (* file hit *) - store_obj (Pcre.replace ~rex:til_slash_RE uri) - | uri -> () (* miss *)); - match fmt with - | Fmt_text -> - let body = - (List.fold_left - (fun s d -> sprintf "%sdir, %s\n" s d) "" - (StringSet.elements !dirs)) ^ - (Http_getter_misc.hashtbl_sorted_fold - (fun uri (annflag, typesflag, bodyflag, treeflag) cont -> - sprintf "%sobject, %s, <%s,%s,%s,%s>\n" - cont uri (if annflag then "YES" else "NO") - (string_of_ls_flag typesflag) - (string_of_ls_flag bodyflag) - (string_of_ls_flag treeflag)) - objs "") - in - Http_daemon.respond - ~headers:(("Content-Type", "text/plain") :: common_headers) - ~body outchan - | Fmt_xml -> - let body = - sprintf -" - + | uri -> raise (Unresolvable_URI uri) - -%s - -" - Http_getter_env.my_own_url - ("\n" ^ - (String.concat - "\n" - (List.map - (fun d -> "
" ^ d ^ "
") - (StringSet.elements !dirs))) ^ "\n" ^ - (Http_getter_misc.hashtbl_sorted_fold - (fun uri (annflag, typesflag, bodyflag, treeflag) cont -> - sprintf -"%s -\t -\t -\t -\t - -" - cont uri (if annflag then "YES" else "NO") - (string_of_ls_flag typesflag) - (string_of_ls_flag bodyflag) - (string_of_ls_flag treeflag)) - objs "")) - in - Http_daemon.respond - ~headers:(("Content-Type", "text/xml") :: common_headers) - ~body outchan -in -let (index_line_sep_RE, index_sep_RE, trailing_types_RE, - heading_cic_RE, heading_theory_RE, heading_nuprl_RE, - heading_rdf_cic_RE, heading_rdf_theory_RE) = - (Pcre.regexp "[ \t]+", Pcre.regexp "\r\n|\r|\n", - Pcre.regexp "\\.types$", - Pcre.regexp "^cic:", Pcre.regexp "^theory:", Pcre.regexp "^nuprl:", - Pcre.regexp "^helm:rdf.*//cic:", Pcre.regexp "^helm:rdf.*//theory:") -in let update_from_server logmsg server_url = (* use global maps *) debug_print ("Updating information from " ^ server_url); let xml_url_of_uri = function @@ -285,16 +70,16 @@ let update_from_server logmsg server_url = (* use global maps *) Pcre.replace ~rex:heading_theory_RE ~templ:server_url uri | uri when (Pcre.pmatch ~rex:heading_nuprl_RE uri) -> Pcre.replace ~rex:heading_nuprl_RE ~templ:server_url uri - | uri -> raise (Http_getter_invalid_URI uri) + | uri -> raise (Invalid_URI uri) in let rdf_url_of_uri = function (* TODO as above *) | uri when (Pcre.pmatch ~rex:heading_rdf_cic_RE uri) -> Pcre.replace ~rex:heading_rdf_cic_RE ~templ:server_url uri | uri when (Pcre.pmatch ~rex:heading_rdf_theory_RE uri) -> Pcre.replace ~rex:heading_rdf_theory_RE ~templ:server_url uri - | uri -> raise (Http_getter_invalid_URI uri) + | uri -> raise (Invalid_URI uri) in - let log = ref (logmsg ^ "Processing server: " ^ server_url ^ "
\n") in + let log = ref (`T ("Processing server: " ^ server_url) :: logmsg) in let (xml_index, rdf_index, xsl_index) = (* TODO keeps index in memory, is better to keep them on temp files? *) (http_get (server_url ^ "/" ^ Http_getter_env.xml_index), @@ -305,12 +90,12 @@ let update_from_server logmsg server_url = (* use global maps *) debug_print (sprintf "Warning: useless server %s" server_url); (match xml_index with | Some xml_index -> - (log := !log ^ "Updating XML db ...
\n"; + (log := `T "Updating XML db ...
" :: !log; List.iter (function | l when is_blank_line l -> () (* skip blank and commented lines *) | l -> - try + (try (match Pcre.split ~rex:index_line_sep_RE l with | [uri; "gz"] -> assert (is_cic_uri uri || is_nuprl_uri uri) ; @@ -320,219 +105,225 @@ let update_from_server logmsg server_url = (* use global maps *) assert (is_cic_uri uri || is_nuprl_uri uri) ; (map_of_uri uri)#replace uri ((xml_url_of_uri uri) ^ ".xml") - | _ -> - log := !log ^ "Ignoring invalid line: '" ^ l ^ "'
\n") - with Http_getter_invalid_URI uri -> - log := !log ^ "Ignoring invalid XML URI: '" ^ uri ^ "'
\n") - (Pcre.split ~rex:index_sep_RE xml_index)) (* xml_index lines *) + | _ -> log := `T ("Ignoring invalid line: '" ^ l) :: !log) + with Invalid_URI uri -> + log := `T ("Ignoring invalid XML URI: '" ^ l) :: !log)) + (Pcre.split ~rex:index_sep_RE xml_index); (* xml_index lines *) + log := `T "All done" :: !log) | None -> ()); (match rdf_index with | Some rdf_index -> - (log := !log ^ "Updating RDF db ...
\n"; + (log := `T "Updating RDF db ..." :: !log; List.iter (fun l -> try (match Pcre.split ~rex:index_line_sep_RE l with | [uri; "gz"] -> - rdf_map#replace uri ((rdf_url_of_uri uri) ^ ".xml.gz") - | [uri] -> rdf_map#replace uri ((rdf_url_of_uri uri) ^ ".xml") - | _ -> log := !log ^ "Ignoring invalid line: " ^ l ^ "
\n") - with Http_getter_invalid_URI uri -> - log := !log ^ "Ignoring invalid RDF URI: " ^ uri ^ "
\n") - (Pcre.split ~rex:index_sep_RE rdf_index)) (* rdf_index lines *) + rdf_map#replace uri + ((rdf_url_of_uri uri) ^ ".xml.gz") + | [uri] -> + rdf_map#replace uri ((rdf_url_of_uri uri) ^ ".xml") + | _ -> log := `T ("Ignoring invalid line: '" ^ l) :: !log) + with Invalid_URI uri -> + log := `T ("Ignoring invalid RDF URI: '" ^ l) :: !log) + (Pcre.split ~rex:index_sep_RE rdf_index); (* rdf_index lines *) + log := `T "All done" :: !log) | None -> ()); (match xsl_index with | Some xsl_index -> - (log := !log ^ "Updating XSLT db ...
\n"; + (log := `T "Updating XSLT db ..." :: !log; List.iter (fun l -> xsl_map#replace l (server_url ^ "/" ^ l)) (Pcre.split ~rex:index_sep_RE xsl_index); - log := !log ^ "All done!
\n") + log := `T "All done" :: !log) | None -> ()); debug_print "done with this server"; !log -in + let update_from_all_servers () = (* use global maps *) clear_maps (); let log = List.fold_left update_from_server - "" (* initial logmsg: empty *) + [] (* initial logmsg: empty *) (* reverse order: 1st server is the most important one *) - (List.rev !Http_getter_env.servers) + (List.map snd (List.rev (Http_getter_env.servers ()))) in sync_maps (); - log -in + `Msg (`L (List.rev log)) - (* thread action *) +let update_from_one_server server_url = + let log = update_from_server [] server_url in + `Msg (`L (List.rev log)) -let callback (req: Http_types.request) outchan = - try - debug_print ("Connection from " ^ req#clientAddr); - debug_print ("Received request: " ^ req#path); - (match req#path with - | "/help" -> - return_html_raw - (Http_getter_const.usage_string (Http_getter_env.env_to_string ())) - outchan - | "/getxml" | "/getxslt" | "/getdtd" | "/resolve" | "/register" -> - (let uri = req#param "uri" in (* common parameter *) - match req#path with - | "/getxml" -> - let enc = parse_enc req in - let patch = parse_patch req in - Http_getter_cache.respond_xml - ~url:(resolve uri) ~uri ~enc ~patch outchan - | "/getxslt" -> - let patch = parse_patch req in - Http_getter_cache.respond_xsl ~url:(resolve uri) ~patch outchan - | "/getdtd" -> - let patch = parse_patch req in - Http_getter_cache.respond_dtd - ~patch ~url:(Http_getter_env.dtd_dir ^ "/" ^ uri) outchan - | "/resolve" -> - (try - return_xml_raw - (sprintf "\n" (resolve uri)) - outchan - with Http_getter_unresolvable_URI uri -> - return_xml_raw "\n" outchan) - | "/register" -> - let url = req#param "url" in - register uri url; - return_html_msg "Register done" outchan - | _ -> assert false) - | "/clean_cache" -> - Http_getter_cache.clean (); - return_html_msg "Done." outchan - | "/update" -> - Http_getter_env.reload (); (* reload servers list from servers file *) - let log = update_from_all_servers () in - return_html_msg log outchan - | "/list_servers" -> - return_html_raw - (sprintf "\n%s\n
" - (String.concat "\n" - (List.map - (let i = ref ~-1 in - fun s -> incr i; sprintf "%d%s" !i s) - !Http_getter_env.servers))) - outchan - | "/add_server" -> - let name = req#param "url" in - (try - let position = - try - let res = int_of_string (req#param "position") in - if res < 0 then - raise (Failure "int_of_string"); - res - with Failure "int_of_string" -> - raise (Http_getter_bad_request - (sprintf "position must be a non negative integer (%s given)" - (req#param "position"))) - in - if position = 0 then (* fallback to default value *) - raise (Http_types.Param_not_found "foo") - else if position > 0 then begin (* add server and update all *) - Http_getter_env.add_server ~position name; - let log = update_from_all_servers () in - return_html_msg - (sprintf "Added server %s in position %d)
\n%s" - name position log) - outchan - end else (* position < 0 *) (* error! *) - assert false (* already checked above *) - with Http_types.Param_not_found _ -> (* add as 1st server by default *) - Http_getter_env.add_server ~position:0 name; - let log = update_from_server (* quick update (new server only) *) - (sprintf "Added server %s in head position
\n" name) name - in - return_html_msg log outchan) - | "/remove_server" -> - let position = - try - let res = int_of_string (req#param "position") in - if res < 0 then - raise (Failure "int_of_string"); - res - with Failure "int_of_string" -> - raise (Http_getter_bad_request - (sprintf "position must be a non negative integer (%s given)" - (req#param "position"))) - in - let server_name = - try - List.nth !Http_getter_env.servers position - with Failure "nth" -> - raise (Http_getter_bad_request - (sprintf "no server with position %d" position)) - in - Http_getter_env.remove_server position; - let log = update_from_all_servers () in - return_html_msg - (sprintf "Removed server %s (position %d)
\n%s" - server_name position log) - outchan - | "/getalluris" -> - return_all_xml_uris - (fun uri -> - (Pcre.pmatch ~rex:heading_cic_RE uri) && - not (Pcre.pmatch ~rex:trailing_types_RE uri)) - outchan - | "/getallrdfuris" -> - (let classs = req#param "class" in - try - let filter = - let base = "^helm:rdf:www\\.cs\\.unibo\\.it/helm/rdf/" in - match classs with - | ("forward" as c) | ("backward" as c) -> - (fun uri -> Pcre.pmatch ~pat:(base ^ c) uri) - | c -> raise (Http_getter_invalid_RDF_class c) - in - return_all_rdf_uris filter outchan - with Http_getter_invalid_RDF_class c -> - raise (Http_getter_bad_request ("Invalid RDF class: " ^ c))) - | "/ls" -> return_ls (parse_ls_uri req) (parse_output_format req) outchan - | "/getempty" -> - Http_daemon.respond ~body:Http_getter_const.empty_xml outchan - | invalid_request -> - Http_daemon.respond_error ~status:(`Client_error `Bad_request) outchan); - debug_print "Done!\n" - with - | Http_types.Param_not_found attr_name -> - return_400 (sprintf "Parameter '%s' is missing" attr_name) outchan - | Http_getter_bad_request msg -> return_html_error msg outchan - | Http_getter_internal_error msg -> return_html_internal_error msg outchan - | Shell.Subprocess_error l -> - return_html_internal_error - (String.concat "
\n" - (List.map - (fun (cmd, code) -> - sprintf "Command '%s' returned %s" - cmd (string_of_proc_status code)) - l)) - outchan - | exc -> - return_html_error - ("Uncaught exception: " ^ (Printexc.to_string exc)) - outchan -in +let temp_file_of_uri uri = + let flat_string s s' c = + let cs = String.copy s in + for i = 0 to (String.length s) - 1 do + if String.contains s' s.[i] then cs.[i] <- c + done; + cs + in + let user = try Unix.getlogin () with _ -> "" in + Filename.open_temp_file (user ^ flat_string uri ".-=:;!?/&" '_') "" + +(* API *) - (* daemon initialization *) +let help () = Http_getter_const.usage_string (Http_getter_env.env_to_string ()) -let main () = - print_string (Http_getter_env.env_to_string ()); - flush stdout; - Unix.putenv "http_proxy" ""; - at_exit close_maps; - Sys.catch_break true; +let resolve uri = try - Http_daemon.start' - ~timeout:(Some 600) ~port:Http_getter_env.port ~mode:`Thread callback - with Sys.Break -> () (* 'close_maps' already registered with 'at_exit' *) -in + (map_of_uri uri)#resolve uri + with Http_getter_map.Key_not_found _ -> raise (Unresolvable_URI uri) + + (* Warning: this fail if uri is already registered *) +let register ~uri ~url = (map_of_uri uri)#add uri url + +let update () = update_from_all_servers () + +let getxml ?(format = Enc_normal) ?(patch_dtd = true) uri = + let url = resolve uri in + let (fname, outchan) = temp_file_of_uri uri in + Http_getter_cache.respond_xml ~uri ~url ~enc:format ~patch:patch_dtd outchan; + close_out outchan; + fname + +let getxslt ?(patch_dtd = true) uri = + let url = resolve uri in + let (fname, outchan) = temp_file_of_uri uri in + Http_getter_cache.respond_xsl ~url ~patch:patch_dtd outchan; + close_out outchan; + fname + +let getdtd ?(patch_dtd = true) uri = + let url = Http_getter_env.dtd_dir ^ "/" ^ uri in + let (fname, outchan) = temp_file_of_uri uri in + Http_getter_cache.respond_dtd ~url ~patch:patch_dtd outchan; + close_out outchan; + fname + +let clean_cache () = Http_getter_cache.clean () + +let list_servers () = Http_getter_env.servers () + +let add_server ?(position = 0) name = + if position = 0 then begin + Http_getter_env.add_server ~position:0 name; + update_from_one_server name (* quick update (new server only) *) + end else if position > 0 then begin + Http_getter_env.add_server ~position name; + update () + end else (* already checked bt parse_position *) + assert false + +let remove_server position = + let server_name = + try + List.assoc position (Http_getter_env.servers ()) + with Not_found -> + raise (Invalid_argument (sprintf "no server with position %d" position)) + in + Http_getter_env.remove_server position; + update () + +let return_uris map filter = + let uris = ref [] in + map#iter (fun uri _ -> if filter uri then uris := uri :: !uris); + List.rev !uris + +let getalluris () = + let filter uri = + (Pcre.pmatch ~rex:heading_cic_RE uri) && + not (Pcre.pmatch ~rex:trailing_types_RE uri) + in + return_uris cic_map filter -main () +let getallrdfuris classs = + let filter = + let base = "^helm:rdf:www\\.cs\\.unibo\\.it/helm/rdf/" in + match classs with + | `Forward -> (fun uri -> Pcre.pmatch ~pat:(base ^ "forward") uri) + | `Backward -> (fun uri -> Pcre.pmatch ~pat:(base ^ "backward") uri) + in + return_uris rdf_map filter + +let ls = + let (++) (oldann, oldtypes, oldbody, oldtree) + (newann, newtypes, newbody, newtree) = + ((if newann > oldann then newann else oldann), + (if newtypes > oldtypes then newtypes else oldtypes), + (if newbody > oldbody then newbody else oldbody), + (if newtree > oldtree then newtree else oldtree)) + in + let basepart_RE = + Pcre.regexp + "^([^.]*\\.[^.]*)((\\.body)|(\\.proof_tree)|(\\.types))?(\\.ann)?$" + in + let (types_RE, types_ann_RE, body_RE, body_ann_RE, + proof_tree_RE, proof_tree_ann_RE) = + (Pcre.regexp "\\.types$", Pcre.regexp "\\.types\\.ann$", + Pcre.regexp "\\.body$", Pcre.regexp "\\.body\\.ann$", + Pcre.regexp "\\.proof_tree$", Pcre.regexp "\\.proof_tree\\.ann$") + in + let (slash_RE, til_slash_RE, no_slashes_RE) = + (Pcre.regexp "/", Pcre.regexp "^.*/", Pcre.regexp "^[^/]*$") + in + fun lsuri -> + let pat = + "^" ^ + (match lsuri with Cic p -> ("cic:" ^ p) | Theory p -> ("theory:" ^ p)) + in + let (dir_RE, obj_RE) = + (Pcre.regexp (pat ^ "/"), Pcre.regexp (pat ^ "(\\.|$)")) + in + let dirs = ref StringSet.empty in + let objs = Hashtbl.create 17 in + let store_dir d = + dirs := StringSet.add (List.hd (Pcre.split ~rex:slash_RE d)) !dirs + in + let store_obj o = + let basepart = Pcre.replace ~rex:basepart_RE ~templ:"$1" o in + let no_flags = false, No, No, No in + let oldflags = + try + Hashtbl.find objs basepart + with Not_found -> (* no ann, no types, no body, no proof tree *) + no_flags + in + let newflags = + match o with + | s when Pcre.pmatch ~rex:types_RE s -> (false, Yes, No, No) + | s when Pcre.pmatch ~rex:types_ann_RE s -> (true, Ann, No, No) + | s when Pcre.pmatch ~rex:body_RE s -> (false, No, Yes, No) + | s when Pcre.pmatch ~rex:body_ann_RE s -> (true, No, Ann, No) + | s when Pcre.pmatch ~rex:proof_tree_RE s -> (false, No, No, Yes) + | s when Pcre.pmatch ~rex:proof_tree_ann_RE s -> (true, No, No, Ann) + | s -> no_flags + in + Hashtbl.replace objs basepart (oldflags ++ newflags) + in + cic_map#iter + (* BLEARGH Dbm module lacks support for fold-like functions *) + (fun key _ -> + match key with + | uri when Pcre.pmatch ~rex:dir_RE uri -> (* directory hit *) + let localpart = Pcre.replace ~rex:dir_RE uri in + if Pcre.pmatch ~rex:no_slashes_RE localpart then + store_obj localpart + else + store_dir localpart + | uri when Pcre.pmatch ~rex:obj_RE uri -> (* file hit *) + store_obj (Pcre.replace ~rex:til_slash_RE uri) + | uri -> () (* miss *)); + let ls_items = ref [] in + StringSet.iter (fun dir -> ls_items := Ls_section dir :: !ls_items) !dirs; + Http_getter_misc.hashtbl_sorted_iter + (fun uri (annflag, typesflag, bodyflag, treeflag) -> + ls_items := + Ls_object { + uri = uri; ann = annflag; + types = typesflag; body = typesflag; proof_tree = treeflag + } :: !ls_items) + objs; + List.rev !ls_items diff --git a/helm/http_getter/http_getter.mli b/helm/http_getter/http_getter.mli new file mode 100644 index 000000000..a559917b9 --- /dev/null +++ b/helm/http_getter/http_getter.mli @@ -0,0 +1,52 @@ +(* + * Copyright (C) 2003-2004: + * Stefano Zacchiroli + * for the HELM Team http://helm.cs.unibo.it/ + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +open Http_getter_types + + (** {2 Getter Web Service interface as API *) + +val help: unit -> string +val resolve: string -> string (* uri -> url *) +val register: uri:string -> url:string -> unit +val update: unit -> Ui_logger.html_msg +val getxml : ?format:encoding -> ?patch_dtd:bool -> string -> string +val getxslt : ?patch_dtd:bool -> string -> string +val getdtd : ?patch_dtd:bool -> string -> string +val clean_cache: unit -> unit +val list_servers: unit -> (int * string) list +val add_server: ?position:int -> string -> Ui_logger.html_msg +val remove_server: int -> Ui_logger.html_msg +val getalluris: unit -> string list +val getallrdfuris: [ `Forward | `Backward ] -> string list +val ls: xml_uri -> ls_item list + + (** {2 Misc} *) + +val close_maps: unit -> unit +val update_from_one_server: string -> Ui_logger.html_msg + diff --git a/helm/http_getter/http_getter_cache.ml b/helm/http_getter/http_getter_cache.ml index b77536f0c..8f5bc2312 100644 --- a/helm/http_getter/http_getter_cache.ml +++ b/helm/http_getter/http_getter_cache.ml @@ -1,5 +1,5 @@ (* - * Copyright (C) 2003: + * Copyright (C) 2003-2004: * Stefano Zacchiroli * for the HELM Team http://helm.cs.unibo.it/ * @@ -52,7 +52,7 @@ let threadSafe = new threadSafe ;; let resource_type_of_url = function | url when Pcre.pmatch ~pat:"\\.xml\\.gz$" url -> Enc_gzipped | url when Pcre.pmatch ~pat:"\\.xml$" url -> Enc_normal - | url -> raise (Http_getter_invalid_URL url) + | url -> raise (Invalid_URL url) let extension_of_resource_type = function | Enc_normal -> "xml" | Enc_gzipped -> "xml.gz" @@ -68,7 +68,7 @@ let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan = let resource_type = resource_type_of_url url in let extension = extension_of_resource_type resource_type in let downloadname = - match http_getter_uri_of_string uri with (* parse uri *) + match uri_of_string uri with (* parse uri *) | Cic_uri (Cic baseuri) | Cic_uri (Theory baseuri) -> (* assumption: baseuri starts with "/" *) sprintf "%s%s.%s" Http_getter_env.cic_dir baseuri extension @@ -93,8 +93,8 @@ let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan = in let basename = Pcre.replace ~pat:"\\.gz$" downloadname in let contype = "text/xml" in - (* File cache if needed and return a short circuit file. - Short circuit is needed in situation like: + (* Fill cache if needed and return a short circuit file. + Short circuit is needed in situations like: resource type = normal, cache type = gzipped, required encoding = normal we would like to avoid normal -> gzipped -> normal conversions. To avoid this tmp_short_circuit is used to remember the name of the intermediate diff --git a/helm/http_getter/http_getter_cache.mli b/helm/http_getter/http_getter_cache.mli index 9aa6e53a5..11211288a 100644 --- a/helm/http_getter/http_getter_cache.mli +++ b/helm/http_getter/http_getter_cache.mli @@ -1,5 +1,5 @@ (* - * Copyright (C) 2003: + * Copyright (C) 2003-2004: * Stefano Zacchiroli * for the HELM Team http://helm.cs.unibo.it/ * @@ -29,18 +29,16 @@ open Http_getter_types;; val respond_xml: - ?enc:http_getter_encoding -> ?patch:bool -> url:string -> uri:string -> - out_channel -> + ?enc:encoding -> ?patch:bool -> url:string -> uri:string -> out_channel -> unit val respond_xsl: - ?enc:http_getter_encoding -> ?patch:bool -> url:string -> - out_channel -> + ?enc:encoding -> ?patch:bool -> url:string -> out_channel -> unit val respond_dtd: - ?enc:http_getter_encoding -> ?patch:bool -> url:string -> - out_channel -> + ?enc:encoding -> ?patch:bool -> url:string -> out_channel -> unit val clean: unit -> unit + diff --git a/helm/http_getter/http_getter_common.ml b/helm/http_getter/http_getter_common.ml index ad549330b..6087a467b 100644 --- a/helm/http_getter/http_getter_common.ml +++ b/helm/http_getter/http_getter_common.ml @@ -1,5 +1,5 @@ (* - * Copyright (C) 2003: + * Copyright (C) 2003-2004: * Stefano Zacchiroli * for the HELM Team http://helm.cs.unibo.it/ * @@ -41,22 +41,22 @@ let is_nuprl_uri uri = Pcre.pmatch ~pat:"^nuprl:" uri let is_rdf_uri uri = Pcre.pmatch ~pat:"^helm:rdf(.*):(.*)//(.*)" uri let is_xsl_uri uri = Pcre.pmatch ~pat:"^\\w+\\.xsl" uri -let rec http_getter_uri_of_string = function +let rec uri_of_string = function | uri when is_rdf_uri uri -> (match Pcre.split ~pat:"//" uri with | [ prefix; uri ] -> let rest = - match http_getter_uri_of_string uri with + match uri_of_string uri with | Cic_uri xmluri -> xmluri - | _ -> raise (Http_getter_invalid_URI uri) + | _ -> raise (Invalid_URI uri) in Rdf_uri (prefix, rest) - | _ -> raise (Http_getter_invalid_URI uri)) + | _ -> raise (Invalid_URI uri)) | uri when is_cic_uri uri -> Cic_uri (Cic (Pcre.replace ~pat:"^cic:" uri)) | uri when is_nuprl_uri uri -> Nuprl_uri (Pcre.replace ~pat:"^nuprl:" uri) | uri when is_theory_uri uri -> Cic_uri (Theory (Pcre.replace ~pat:"^theory:" uri)) - | uri -> raise (Http_getter_invalid_URI uri) + | uri -> raise (Invalid_URI uri) let patch_xml line = Pcre.replace @@ -84,10 +84,10 @@ let patch_dtd line = line let pp_error s = - sprintf "

Http Getter error: %s

" s + sprintf "Http Getter error: %s" s let pp_internal_error s = - sprintf "

Http Getter Internal error: %s

" s -let pp_msg s = sprintf "

%s

" s + sprintf "Http Getter Internal error: %s" s +let pp_msg s = sprintf "%s" s let null_pp s = s let mk_return_fun pp_fun contype msg outchan = diff --git a/helm/http_getter/http_getter_common.mli b/helm/http_getter/http_getter_common.mli index 236644452..f4ecb3dc8 100644 --- a/helm/http_getter/http_getter_common.mli +++ b/helm/http_getter/http_getter_common.mli @@ -1,5 +1,5 @@ (* - * Copyright (C) 2003: + * Copyright (C) 2003-2004: * Stefano Zacchiroli * for the HELM Team http://helm.cs.unibo.it/ * @@ -28,15 +28,15 @@ open Http_getter_types;; -val string_of_ls_flag: http_getter_ls_flag -> string -val string_of_encoding: http_getter_encoding -> string +val string_of_ls_flag: ls_flag -> string +val string_of_encoding: encoding -> string val is_cic_uri: string -> bool val is_nuprl_uri: string -> bool val is_rdf_uri: string -> bool val is_xsl_uri: string -> bool -val http_getter_uri_of_string: string -> http_getter_uri +val uri_of_string: string -> uri val patch_xml : string -> string val patch_xsl : string -> string diff --git a/helm/http_getter/http_getter_const.ml b/helm/http_getter/http_getter_const.ml index eefd7c865..a4eac83e5 100644 --- a/helm/http_getter/http_getter_const.ml +++ b/helm/http_getter/http_getter_const.ml @@ -1,5 +1,5 @@ (* - * Copyright (C) 2003: + * Copyright (C) 2003-2004: * Stefano Zacchiroli * for the HELM Team http://helm.cs.unibo.it/ * @@ -28,7 +28,7 @@ open Printf;; -let version = "0.2.1" +let version = "0.3.0" let conffile = "http_getter.conf.xml" (* TODO provide a better usage string *) diff --git a/helm/http_getter/http_getter_const.mli b/helm/http_getter/http_getter_const.mli index e50a469cb..894ccd645 100644 --- a/helm/http_getter/http_getter_const.mli +++ b/helm/http_getter/http_getter_const.mli @@ -1,5 +1,5 @@ (* - * Copyright (C) 2003: + * Copyright (C) 2003-2004: * Stefano Zacchiroli * for the HELM Team http://helm.cs.unibo.it/ * diff --git a/helm/http_getter/http_getter_debugger.ml b/helm/http_getter/http_getter_debugger.ml index b6d1e5042..3f9afd78c 100644 --- a/helm/http_getter/http_getter_debugger.ml +++ b/helm/http_getter/http_getter_debugger.ml @@ -1,5 +1,5 @@ (* - * Copyright (C) 2003: + * Copyright (C) 2003-2004: * Stefano Zacchiroli * for the HELM Team http://helm.cs.unibo.it/ * @@ -26,7 +26,33 @@ * http://helm.cs.unibo.it/ *) - (* debugging settings *) -let debug = true;; -let debug_print s = if debug then prerr_endline ("[HTTP-Getter] " ^ s);; +let debug = ref true + +(* invariant: if logfile is set, then logchan is set too *) +let logfile = ref None +let logchan = ref None + +let set_logfile f = + (match !logchan with None -> () | Some oc -> close_out oc); + match f with + | Some f -> + logfile := Some f; + logchan := Some (open_out f) + | None -> + logfile := None; + logchan := None + +let get_logfile () = !logfile + +let close_logfile () = set_logfile None + +let debug_print s = + let msg = "[HTTP-Getter] " ^ s in + if !debug then + match (!logfile, !logchan) with + | None, _ -> prerr_endline msg + | Some fname, Some oc -> + output_string oc msg; + flush oc + | Some _, None -> assert false diff --git a/helm/http_getter/http_getter_debugger.mli b/helm/http_getter/http_getter_debugger.mli index cd9da6732..461e2a1a7 100644 --- a/helm/http_getter/http_getter_debugger.mli +++ b/helm/http_getter/http_getter_debugger.mli @@ -1,5 +1,5 @@ (* - * Copyright (C) 2003: + * Copyright (C) 2003-2004: * Stefano Zacchiroli * for the HELM Team http://helm.cs.unibo.it/ * @@ -26,6 +26,15 @@ * http://helm.cs.unibo.it/ *) -val debug: bool + (** enable/disable debugging messages *) +val debug: bool ref + + (** output a debugging message *) val debug_print: string -> unit + (** if set to Some fname, fname will be used as a logfile, otherwise stderr + * will be used *) +val get_logfile: unit -> string option +val set_logfile: string option -> unit +val close_logfile: unit -> unit + diff --git a/helm/http_getter/http_getter_env.ml b/helm/http_getter/http_getter_env.ml index 542692795..a7ab80f24 100644 --- a/helm/http_getter/http_getter_env.ml +++ b/helm/http_getter/http_getter_env.ml @@ -1,9 +1,10 @@ (* - * Copyright (C) 2003: + * Copyright (C) 2003-2004: * Stefano Zacchiroli * for the HELM Team http://helm.cs.unibo.it/ * - * This file is part of HELM, an Hypertextual, Electronic * Library of Mathematics, developed at the Computer Science + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science * Department, University of Bologna, Italy. * * HELM is free software; you can redistribute it and/or @@ -25,11 +26,12 @@ * http://helm.cs.unibo.it/ *) -open Http_getter_types;; -open Printf;; -open Pxp_document;; -open Pxp_types;; -open Pxp_yacc;; +open Printf +open Pxp_document +open Pxp_types +open Pxp_yacc + +open Http_getter_types let version = Http_getter_const.version @@ -77,18 +79,25 @@ let safe_getenv ?(from = Both) var = let servers_file = safe_getenv "HTTP_GETTER_SERVERS_FILE" - (* TODO BUG HERE: is commented lines are included in the servers file the - server index (used for example by the remove_server method) gets out of sync! - *) -let parse_servers () = +let load_servers () = + let pos = ref (-1) in List.rev (Http_getter_misc.fold_file - (fun servers line -> - if Http_getter_misc.is_blank_line line then servers else line::servers) + (fun line servers -> + if Http_getter_misc.is_blank_line line then + servers + else + (incr pos; (!pos, line) :: servers)) [] servers_file) -;; -let servers = ref (parse_servers ()) -let reload_servers () = servers := parse_servers () + +let _servers = ref (load_servers ()) +let servers () = !_servers + +let save_servers () = + let oc = open_out servers_file in + List.iter (fun (_,server) -> output_string oc (server ^ "\n")) (servers ()); + close_out oc +let reload_servers () = _servers := load_servers () let cic_dbm = safe_getenv "HTTP_GETTER_CIC_DBM" let nuprl_dbm = safe_getenv "HTTP_GETTER_NUPRL_DBM" @@ -125,8 +134,7 @@ let cache_mode = | "gz" -> Enc_gzipped | mode -> failwith ("Invalid cache mode: " ^ mode) -let reload () = - reload_servers () +let reload () = reload_servers () let env_to_string () = sprintf @@ -159,19 +167,27 @@ servers: dtd_base_url (match cache_mode with Enc_normal -> "Normal" | Enc_gzipped -> "GZipped") conf_file conf_dir - (String.concat "\n\t" (* servers list prepended with server number *) - (List.map - (let idx = ref ~-1 in - fun server -> incr idx; sprintf "%3d: %s" !idx server) - !servers)) + (String.concat "\n\t" (* (position * server) list *) + (List.map (fun (pos, server) -> sprintf "%3d: %s" pos server) + (servers ()))) let add_server ?position url = (match position with - | Some p -> Http_getter_misc.add_line ~fname:servers_file ~position:p url - | None -> Http_getter_misc.add_line ~fname:servers_file url); + | None -> + _servers := !_servers @ [-1, url]; + | Some p when p > 0 -> + let rec add_after pos = function + | [] -> [-1, url] + | hd :: tl when p = 1 -> hd :: (-1, url) :: tl + | hd :: tl (* when p > 1 *) -> hd :: (add_after (pos - 1) tl) + in + _servers := add_after p !_servers + | Some _ -> assert false); + save_servers (); reload_servers () let remove_server position = - Http_getter_misc.remove_line ~fname:servers_file position; + _servers := List.remove_assoc position !_servers; + save_servers (); reload_servers () diff --git a/helm/http_getter/http_getter_env.mli b/helm/http_getter/http_getter_env.mli index 6d4d6312f..fe660d849 100644 --- a/helm/http_getter/http_getter_env.mli +++ b/helm/http_getter/http_getter_env.mli @@ -1,5 +1,5 @@ (* - * Copyright (C) 2003: + * Copyright (C) 2003-2004: * Stefano Zacchiroli * for the HELM Team http://helm.cs.unibo.it/ * @@ -53,9 +53,9 @@ val dtd_base_url : string (* base URL for DTD downloading *) val host : string (* host on which getter listens *) val my_own_url : string (* URL at which contact getter *) -val servers : string list ref (* servers list. DO NOT CHANGE this list, - modifications wont be preserved *) -val cache_mode : http_getter_encoding (* cached files encoding *) +val servers : unit -> (int * string) list + (* (position * server) list *) +val cache_mode : encoding (* cached files encoding *) val conf_file : string (* configuration file's full path *) val conf_dir : string (* directory where conf_file resides *) diff --git a/helm/http_getter/http_getter_map.ml b/helm/http_getter/http_getter_map.ml index b7ac1c605..57ec92736 100644 --- a/helm/http_getter/http_getter_map.ml +++ b/helm/http_getter/http_getter_map.ml @@ -1,5 +1,5 @@ (* - * Copyright (C) 2003: + * Copyright (C) 2003-2004: * Stefano Zacchiroli * for the HELM Team http://helm.cs.unibo.it/ * diff --git a/helm/http_getter/http_getter_map.mli b/helm/http_getter/http_getter_map.mli index 720484f31..7081f1962 100644 --- a/helm/http_getter/http_getter_map.mli +++ b/helm/http_getter/http_getter_map.mli @@ -1,5 +1,5 @@ (* - * Copyright (C) 2003: + * Copyright (C) 2003-2004: * Stefano Zacchiroli * for the HELM Team http://helm.cs.unibo.it/ * diff --git a/helm/http_getter/http_getter_misc.ml b/helm/http_getter/http_getter_misc.ml index ad543b447..c983c2988 100644 --- a/helm/http_getter/http_getter_misc.ml +++ b/helm/http_getter/http_getter_misc.ml @@ -1,5 +1,5 @@ (* - * Copyright (C) 2003: + * Copyright (C) 2003-2004: * Stefano Zacchiroli * for the HELM Team http://helm.cs.unibo.it/ * @@ -26,8 +26,9 @@ * http://helm.cs.unibo.it/ *) -open Http_getter_debugger;; -open Printf;; +open Printf + +open Http_getter_debugger let trailing_dot_gz_RE = Pcre.regexp "\\.gz$" (* for g{,un}zip *) let url_RE = Pcre.regexp "^([\\w.-]+)(:(\\d+))?(/.*)?$" @@ -40,16 +41,18 @@ let bufsiz = 16384 (* for file system I/O *) let tcp_bufsiz = 4096 (* for TCP I/O *) let fold_file f init fname = - let inchan = open_in fname in - let res = - try - Zack.fold_in f init inchan - with e -> close_in inchan; raise e + let ic = open_in fname in + let rec aux acc = + let line = try Some (input_line ic) with End_of_file -> None in + match line with + | None -> acc + | Some line -> aux (f line acc) in - close_in inchan; + let res = try aux init with e -> close_in ic; raise e in + close_in ic; res -let iter_file f = fold_file (fun _ line -> f line) () +let iter_file f = fold_file (fun line _ -> f line) () let hashtbl_sorted_fold f tbl init = let sorted_keys = @@ -57,6 +60,12 @@ let hashtbl_sorted_fold f tbl init = in List.fold_left (fun acc k -> f k (Hashtbl.find tbl k) acc) init sorted_keys +let hashtbl_sorted_iter f tbl = + let sorted_keys = + List.sort compare (Hashtbl.fold (fun key _ keys -> key::keys) tbl []) + in + List.iter (fun k -> f k (Hashtbl.find tbl k)) sorted_keys + let cp src dst = let (ic, oc) = (open_in src, open_out dst) in let buf = String.create bufsiz in @@ -234,52 +243,8 @@ let http_get url = url (Printexc.to_string e)); None - (** apply a transformation "string list -> string list" to file lines *) -let mangle_file ~fname f = - let ic = open_in fname in - let lines = Zack.input_lines ic in - close_in ic; - let oc = open_out fname in - Zack.output_lines (f lines) oc; - close_out oc -;; - -let add_line ~fname ?position line = - mangle_file ~fname - (fun lines -> - match position with - | None -> lines @ [line] - | Some i -> - assert (i >= 0); - let rec add_after i = function - | (acc, []) -> acc @ [line] (* eof *) - | (acc, ((hd::tl) as l)) -> - if i = 0 then - acc @ [line] @ l - else - add_after (i-1) (acc @ [hd], tl) - in - add_after i ([], lines)) -;; - -let remove_line ~fname position = - mangle_file ~fname - (fun lines -> - assert (position >= 0); - let rec remove i = function - | (acc, []) -> acc (* eof *) - | (acc, ((hd::tl) as l)) -> - if i = 0 then - acc @ tl - else - remove (i-1) (acc @ [hd], tl) - in - remove position ([], lines)) -;; - let is_blank_line = let blank_line_RE = Pcre.regexp "(^#)|(^\\s*$)" in fun line -> Pcre.pmatch ~rex:blank_line_RE line -;; diff --git a/helm/http_getter/http_getter_misc.mli b/helm/http_getter/http_getter_misc.mli index 0551161b9..b328742be 100644 --- a/helm/http_getter/http_getter_misc.mli +++ b/helm/http_getter/http_getter_misc.mli @@ -1,5 +1,5 @@ (* - * Copyright (C) 2003: + * Copyright (C) 2003-2004: * Stefano Zacchiroli * for the HELM Team http://helm.cs.unibo.it/ * @@ -32,7 +32,7 @@ exception Mkdir_failure of string * string (** "fold_left" like function on file lines, trailing newline is not passed to the given function *) -val fold_file : ('a -> string -> 'a) -> 'a -> string -> 'a +val fold_file : (string -> 'a -> 'a) -> 'a -> string -> 'a (* "iter" like function on file lines, trailing newline is not passed to the given function *) val iter_file : (string -> unit) -> string -> unit @@ -40,6 +40,8 @@ val iter_file : (string -> unit) -> string -> unit (** like Hashtbl.fold but keys are processed ordered *) val hashtbl_sorted_fold : ('a -> 'b -> 'c -> 'c) -> ('a, 'b) Hashtbl.t -> 'c -> 'c + (** like Hashtbl.iter but keys are processed ordered *) +val hashtbl_sorted_iter : ('a -> 'b -> unit) -> ('a, 'b) Hashtbl.t -> unit (** cp frontend *) val cp: string -> string -> unit @@ -73,13 +75,6 @@ val http_get: string -> string option remote resources fetched via HTTP GET requests *) val http_get_iter_buf: callback:(string -> unit) -> string -> unit - (** add a line to a file (specified by name) _after_ a given line (defaults to - last line). *) -val add_line: fname:string -> ?position:int -> string -> unit - (** remove a line, if any, from a file specified by line number (0 based, i.e. - first line of file is line 0) *) -val remove_line: fname:string -> int -> unit - (** true on blanks-only and #-commented lines, false otherwise *) val is_blank_line: string -> bool diff --git a/helm/http_getter/http_getter_types.ml b/helm/http_getter/http_getter_types.ml index 01af4faec..bf584f6ce 100644 --- a/helm/http_getter/http_getter_types.ml +++ b/helm/http_getter/http_getter_types.ml @@ -1,5 +1,5 @@ (* - * Copyright (C) 2003: + * Copyright (C) 2003-2004: * Stefano Zacchiroli * for the HELM Team http://helm.cs.unibo.it/ * @@ -26,26 +26,37 @@ * http://helm.cs.unibo.it/ *) -exception Http_getter_bad_request of string -exception Http_getter_unresolvable_URI of string -exception Http_getter_invalid_URI of string -exception Http_getter_invalid_URL of string -exception Http_getter_invalid_RDF_class of string -exception Http_getter_internal_error of string +exception Bad_request of string +exception Unresolvable_URI of string +exception Invalid_URI of string +exception Invalid_URL of string +exception Invalid_RDF_class of string +exception Internal_error of string -type http_getter_encoding = Enc_normal | Enc_gzipped -type http_getter_answer_format = Fmt_text | Fmt_xml -type http_getter_ls_flag = No | Yes | Ann +type encoding = Enc_normal | Enc_gzipped +type answer_format = Fmt_text | Fmt_xml +type ls_flag = Yes | No | Ann +type ls_object = + { + uri: string; + ann: bool; + types: ls_flag; + body: ls_flag; + proof_tree: ls_flag; + } +type ls_item = + | Ls_section of string + | Ls_object of ls_object -type http_getter_xml_uri = +type xml_uri = | Cic of string | Theory of string -type http_getter_rdf_uri = string * http_getter_xml_uri -type http_getter_nuprl_uri = string -type http_getter_uri = - | Cic_uri of http_getter_xml_uri - | Nuprl_uri of http_getter_nuprl_uri - | Rdf_uri of http_getter_rdf_uri +type rdf_uri = string * xml_uri +type nuprl_uri = string +type uri = + | Cic_uri of xml_uri + | Nuprl_uri of nuprl_uri + | Rdf_uri of rdf_uri module StringSet = Set.Make (String) diff --git a/helm/http_getter/main.ml b/helm/http_getter/main.ml new file mode 100644 index 000000000..e6b9f6e02 --- /dev/null +++ b/helm/http_getter/main.ml @@ -0,0 +1,292 @@ +(* + * Copyright (C) 2003-2004: + * Stefano Zacchiroli + * for the HELM Team http://helm.cs.unibo.it/ + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +open Printf + +open Http_getter_common +open Http_getter_misc +open Http_getter_types +open Http_getter_debugger + + (* constants *) + +let common_headers = [ + "Cache-Control", "no-cache"; + "Pragma", "no-cache"; + "Expires", "0" +] + + (* HTTP queries argument parsing *) + + (* parse encoding ("format" parameter), default is Enc_normal *) +let parse_enc (req: Http_types.request) = + try + (match req#param "format" with + | "normal" -> Enc_normal + | "gz" -> Enc_gzipped + | s -> raise (Bad_request ("Invalid format: " ^ s))) + with Http_types.Param_not_found _ -> Enc_normal + + (* parse "patch_dtd" parameter, default is true *) +let parse_patch (req: Http_types.request) = + try + (match req#param "patch_dtd" with + | s when String.lowercase s = "yes" -> true + | s when String.lowercase s = "no" -> false + | s -> raise (Bad_request ("Invalid patch_dtd value: " ^ s))) + with Http_types.Param_not_found _ -> true + + (* parse output format ("format" parameter), no default value *) +let parse_output_format (req: Http_types.request) = + match req#param "format" with + | s when String.lowercase s = "txt" -> Fmt_text + | s when String.lowercase s = "xml" -> Fmt_xml + | s -> raise (Bad_request ("Invalid /ls format: " ^ s)) + + (* parse "baseuri" format for /ls method, no default value *) +let parse_ls_uri = + let parse_ls_RE = Pcre.regexp "^(\\w+):(.*)$" in + let trailing_slash_RE = Pcre.regexp "/+$" in + let wrong_uri uri = + raise (Bad_request ("Invalid /ls baseuri: " ^ uri)) + in + fun (req: Http_types.request) -> + let baseuri = req#param "baseuri" in + try + let subs = + Pcre.extract ~rex:parse_ls_RE + (Pcre.replace ~rex:trailing_slash_RE baseuri) + in + (match (subs.(1), subs.(2)) with + | "cic", uri -> Cic uri + | "theory", uri -> Theory uri + | _ -> wrong_uri baseuri) + with Not_found -> wrong_uri baseuri + + (* parse "position" argument, default is 0 *) +let parse_position (req: Http_types.request) = + try + let res = int_of_string (req#param "position") in + if res < 0 then + raise (Failure "int_of_string"); + res + with + | Http_types.Param_not_found _ -> 0 + | Failure "int_of_string" -> + raise (Bad_request + (sprintf "position must be a non negative integer (%s given)" + (req#param "position"))) + +let parse_rdf_class (req: Http_types.request) = + match req#param "class" with + | "forward" -> `Forward + | "backward" -> `Backward + | c -> raise (Bad_request ("Invalid RDF class: " ^ c)) + +let return_all_foo_uris doctype uris outchan = + Http_daemon.send_basic_headers ~code:200 outchan; + Http_daemon.send_header "Content-Type" "text/xml" outchan; + Http_daemon.send_headers common_headers outchan; + Http_daemon.send_CRLF outchan; + output_string + outchan + (sprintf +" + + +<%s> +" + doctype + Http_getter_env.my_own_url + doctype + doctype); + List.iter + (fun uri -> output_string outchan (sprintf "\t\n" uri)) + uris; + output_string outchan (sprintf "\n" doctype) + +let return_all_xml_uris outchan = + return_all_foo_uris "alluris" (Http_getter.getalluris ()) outchan +let return_all_rdf_uris classs outchan = + return_all_foo_uris "allrdfuris" (Http_getter.getallrdfuris classs) outchan + +let return_ls xmluri fmt outchan = + let ls_items = Http_getter.ls xmluri in + let buf = Buffer.create 10240 in + (match fmt with + | Fmt_text -> + List.iter + (function + | Ls_section dir -> bprintf buf "dir, %s\n" dir + | Ls_object obj -> + bprintf buf "object, %s, <%s,%s,%s,%s>\n" + obj.uri (if obj.ann then "YES" else "NO") + (string_of_ls_flag obj.types) + (string_of_ls_flag obj.body) + (string_of_ls_flag obj.proof_tree)) + ls_items + | Fmt_xml -> + Buffer.add_string buf "\n"; + bprintf buf "\n" + Http_getter_env.my_own_url; + Buffer.add_string buf "\n"; + List.iter + (function + | Ls_section dir -> bprintf buf "
%s
\n" dir + | Ls_object obj -> + bprintf buf +" +\t +\t +\t +\t + +" + obj.uri (if obj.ann then "YES" else "NO") + (string_of_ls_flag obj.types) + (string_of_ls_flag obj.body) + (string_of_ls_flag obj.proof_tree)) + ls_items; + Buffer.add_string buf "
\n"); + let body = Buffer.contents buf in + Http_daemon.respond + ~headers:(("Content-Type", "text/plain") :: common_headers) + ~body outchan + +let return_help outchan = return_html_raw (Http_getter.help ()) outchan + +let return_resolve uri outchan = + try + return_xml_raw + (sprintf "\n" (Http_getter.resolve uri)) + outchan + with Unresolvable_URI uri -> + return_xml_raw "\n" outchan + +let return_list_servers outchan = + return_html_raw + (sprintf "\n%s\n
" + (String.concat "\n" + (List.map + (fun (pos, server) -> + sprintf "%d%s" pos server) + (Http_getter.list_servers ())))) + outchan + + (* thread action *) + +let callback (req: Http_types.request) outchan = + try + debug_print ("Connection from " ^ req#clientAddr); + debug_print ("Received request: " ^ req#path); + (match req#path with + | "/help" -> return_help outchan + | "/getxml" -> + let uri = req#param "uri" in + Http_getter_cache.respond_xml ~url:(Http_getter.resolve uri) ~uri + ~enc:(parse_enc req) ~patch:(parse_patch req) outchan + | "/getxslt" -> + Http_getter_cache.respond_xsl + ~url:(Http_getter.resolve (req#param "uri")) + ~patch:(parse_patch req) outchan + | "/getdtd" -> + Http_getter_cache.respond_dtd ~patch:(parse_patch req) + ~url:(Http_getter_env.dtd_dir ^ "/" ^ (req#param "uri")) outchan + | "/resolve" -> return_resolve (req#param "uri") outchan + | "/register" -> + Http_getter.register ~uri:(req#param "uri") ~url:(req#param "url"); + return_html_msg "Register done" outchan + | "/clean_cache" -> + Http_getter.clean_cache (); + return_html_msg "Done." outchan + | "/update" -> + Http_getter_env.reload (); (* reload servers list from servers file *) + let log = Http_getter.update () in + return_html_msg (Ui_logger.html_of_html_msg log) outchan + | "/list_servers" -> return_list_servers outchan + | "/add_server" -> + let name = req#param "url" in + let position = parse_position req in + let log = Http_getter.add_server ~position name in + return_html_msg + (sprintf "Added server %s in position %d)
\n%s" + name position (Ui_logger.html_of_html_msg log)) + outchan + | "/remove_server" -> + let position = parse_position req in + let log = + try + Http_getter.remove_server position + with Invalid_argument _ -> + raise (Bad_request (sprintf "no server with position %d" position)) + in + return_html_msg + (sprintf "Removed server at position %d
\n%s" + position (Ui_logger.html_of_html_msg log)) + outchan + | "/getalluris" -> return_all_xml_uris outchan + | "/getallrdfuris" -> return_all_rdf_uris (parse_rdf_class req) outchan + | "/ls" -> return_ls (parse_ls_uri req) (parse_output_format req) outchan + | "/getempty" -> + Http_daemon.respond ~body:Http_getter_const.empty_xml outchan + | invalid_request -> + Http_daemon.respond_error ~status:(`Client_error `Bad_request) outchan); + debug_print "Done!\n" + with + | Http_types.Param_not_found attr_name -> + return_400 (sprintf "Parameter '%s' is missing" attr_name) outchan + | Bad_request msg -> return_html_error msg outchan + | Internal_error msg -> return_html_internal_error msg outchan + | Shell.Subprocess_error l -> + return_html_internal_error + (String.concat "
\n" + (List.map + (fun (cmd, code) -> + sprintf "Command '%s' returned %s" + cmd (string_of_proc_status code)) + l)) + outchan + | exc -> + return_html_error + ("Uncaught exception: " ^ (Printexc.to_string exc)) + outchan + + (* Main *) + +let main () = + print_string (Http_getter_env.env_to_string ()); + flush stdout; + at_exit Http_getter.close_maps; + Sys.catch_break true; + try + Http_daemon.start' + ~timeout:(Some 600) ~port:Http_getter_env.port ~mode:`Thread callback + with Sys.Break -> () (* 'close_maps' already registered with 'at_exit' *) + +let _ = main () + diff --git a/helm/http_getter/threadSafe.ml b/helm/http_getter/threadSafe.ml index 9902795ff..164b5b477 100644 --- a/helm/http_getter/threadSafe.ml +++ b/helm/http_getter/threadSafe.ml @@ -1,5 +1,5 @@ (* - * Copyright (C) 2003: + * Copyright (C) 2003-2004: * Stefano Zacchiroli * for the HELM Team http://helm.cs.unibo.it/ * diff --git a/helm/http_getter/threadSafe.mli b/helm/http_getter/threadSafe.mli index 92a08630e..0023c89e6 100644 --- a/helm/http_getter/threadSafe.mli +++ b/helm/http_getter/threadSafe.mli @@ -1,5 +1,5 @@ (* - * Copyright (C) 2003: + * Copyright (C) 2003-2004: * Stefano Zacchiroli * for the HELM Team http://helm.cs.unibo.it/ * diff --git a/helm/http_getter/zack.ml b/helm/http_getter/zack.ml deleted file mode 100644 index bc40f0c05..000000000 --- a/helm/http_getter/zack.ml +++ /dev/null @@ -1,475 +0,0 @@ -(* - * Zack's own OCaml library -- set of "helpers" function for the OCaml language - * - * Copyright (C) 2003: - * Stefano Zacchiroli - * - * This module is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This module is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this module; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - *) - -open Printf ;; - -exception Not_implemented ;; - -let (newline, newline_len) = - let default_newline = "\n" in - let newline = - match Sys.os_type with - | "Unix" -> "\n" - | "Win32" | "Cygwin" -> "\r\n" - | "MacOS" -> "\r" - | _ -> default_newline - in - (newline, String.length newline) -;; - -module ZLogic = - struct - - let non pred x = not (pred x) ;; - let conj p1 p2 x = (p1 x) && (p2 x) ;; - let disj p1 p2 x = (p1 x) || (p2 x) ;; - let imply p1 p2 x = (non p1) x || p2 x ;; - - let (&&&) = conj ;; - let (|||) = disj ;; - let (=>) = imply ;; - - end -;; - -module ZArray = - struct - - exception Found of int;; - - (** return the index of first element in ary on which pred is true *) - let index pred ary = - try - Array.iteri (fun idx e -> if pred e then raise (Found idx)) ary; - raise Not_found - with Found idx -> idx - ;; - (** as index but return the element itself instead of the index *) - let find pred ary = ary.(index pred ary) ;; - (** check if at least one element in ary satisfies pred *) - let exists pred ary = - try - ignore (find pred ary); - true - with Not_found -> false - ;; - (** check if all elements in ary satisfy pred *) - let for_all pred ary = not (exists (ZLogic.non pred) ary) ;; - - (** return a fresh array containing all elements of ary that satisfy pred - *) - let filter pred ary = - let indexes = (* indexes of element on which pred is satisfied *) - let (_, indexes) = - Array.fold_left - (fun (i, acc) e -> if pred e then (i+1, i::acc) else (i+1, acc)) - (0, []) - ary - in - List.rev indexes - in - let size = List.length indexes in - let newary = Array.make size ary.(0) in - let rec fill i = function - | [] -> () - | idx::tl -> - newary.(i) <- ary.(idx); - fill (i+1) tl - in - fill 0 indexes; - newary - ;; - - let lrotate () = - raise Not_implemented; () ;; - let rrotate () = - raise Not_implemented; () ;; - - end - -module ZDbm = - struct - (** fold on dbm key and values, processing order is not specified *) - let fold f init dbm = - let res = ref init in - Dbm.iter (fun key value -> res := f !res key value) dbm; - !res - ;; - end - -module ZHashtbl = - struct - let keys tbl = Hashtbl.fold (fun key _ acc -> key :: acc) tbl [] ;; - let values tbl = Hashtbl.fold (fun _ valu acc -> valu :: acc) tbl [] ;; - let remove_all tbl key = - for i = 1 to List.length (Hashtbl.find_all tbl key) do - Hashtbl.remove tbl key - done - ;; - end - -module ZList = - struct - (** tail recursive version of List.map *) - let map' f l = - let rec aux acc = function - | [] -> List.rev acc - | hd :: tl -> aux (f hd :: acc) tl - in - aux [] l - ;; - (** guarded map on lists. List.length output <= List.length input. - Not tail recursive *) - let rec map_if f pred = function - | [] -> [] - | hd::tl when pred hd -> f hd :: map_if f pred tl - | hd::tl -> map_if f pred tl - ;; - (** tail recursive version of map_if *) - let map_if' f pred l = - let rec aux acc = function - | [] -> List.rev acc - | hd::tl when pred hd -> aux (f hd :: acc) tl - | hd::tl -> aux acc tl - in - aux [] l - ;; - (** low level to implement assoc_all and assq_all *) - let assoc_all_gen eq key list = - let rec aux acc = function - | [] -> acc - | (k, v)::tl when (eq k key) -> aux (v :: acc) tl - | _::tl -> aux acc tl - in - List.rev (aux [] list) - ;; - (** return all binding of k in association list l in the order they appear - in l. Uses structural equality *) - let assoc_all k l = assoc_all_gen (=) k l ;; - (** as assoc_all but uses physical equality *) - let assq_all k l = assoc_all_gen (==) k l ;; - let lrotate = function - | [] -> raise (Invalid_argument "Zack.List.lrotate") - | hd::tl -> tl @ [hd] - ;; - let rrotate l = - match List.rev l with - | [] -> raise (Invalid_argument "Zack.List.rrotate") - | hd::tl -> hd :: List.rev tl - ;; - end - -module ZSys = - struct - let copy () = - raise Not_implemented; () ;; - end - -module ZUnix = - struct - - let mkdir () = - raise Not_implemented; () ;; - - let get_stats follow_symlink = - if follow_symlink then Unix.stat else Unix.lstat - ;; - (* low level for is_* predicates *) - let is_file_kind follow_symlink kind fname = - (get_stats follow_symlink fname).Unix.st_kind = kind - ;; - let is_regular ?(follow_symlink = true) = - is_file_kind follow_symlink Unix.S_REG ;; - let is_directory ?(follow_symlink = true) = - is_file_kind follow_symlink Unix.S_DIR ;; - let is_chardev ?(follow_symlink = true) = - is_file_kind follow_symlink Unix.S_CHR ;; - let is_blockdev ?(follow_symlink = true) = - is_file_kind follow_symlink Unix.S_BLK ;; - let is_symlink ?(follow_symlink = false) = - is_file_kind follow_symlink Unix.S_LNK ;; - let is_fifo ?(follow_symlink = true) = - is_file_kind follow_symlink Unix.S_FIFO ;; - let is_socket ?(follow_symlink = true) = - is_file_kind follow_symlink Unix.S_SOCK ;; - - let size ?(follow_symlink = true) fname = - (get_stats follow_symlink fname).Unix.st_size ;; - - (** return a list of all entries contained in a directory. Return order is - not specified *) - let ls dirname = - let dir = Unix.opendir dirname in - let rec aux acc = - match (try Some (Unix.readdir dir) with End_of_file -> None) with - | Some entry -> aux (entry :: acc) - | None -> acc - in - let res = aux [] in - Unix.closedir dir; - res - ;; - - end - -module ZString = - struct - - (** string -> char list *) - let explode s = - let chars = ref [] in - for i = String.length s - 1 downto 0 do - chars := s.[i] :: !chars - done; - !chars - ;; - - (** char list -> string *) - let implode l = - let buf = Buffer.create (List.length l) in - let rec implode' = function - | [] -> Buffer.contents buf - | hd::tl -> - Buffer.add_char buf hd; - implode' tl - in - implode' l - ;; - - (** perl's chomp, remove once trailing "\n", if any *) - let chomp s = - let len = String.length s in - let diff = len - newline_len in - if String.sub s diff newline_len = newline then (* trailing newline *) - String.sub s 0 diff - else - s - ;; - - (** map on string *) - let map f s = - for i = 0 to String.length s do - s.[i] <- f s.[i] - done - ;; - - (** fold_left on string *) - let fold_left f init s = - let len = String.length s in - let rec fold_left' idx acc = - if idx = len then - acc - else (* idx < len *) - fold_left' (idx + 1) (f acc s.[idx]) - in - fold_left' 0 init - ;; - - (* TODO Non funge *) - let fold_right f s init = - let len = String.length s in - let rec fold_right' idx acc = - if idx < 0 then - acc - else (* idx >= 0 *) - fold_right' (idx - 1) (f s.[idx] acc) - in - fold_right' len (init - 1) - ;; - - (** iter on string *) - let iter (f: char -> unit) = fold_left (fun _ c -> f c) () ;; - (* - let string_iter (f: char -> unit) s = - for i = 0 to String.length s do - f s.[i] - done - ;; - *) - - let filter () = - raise Not_implemented; () ;; - - (** create a string of length len and sets each char of them to the result - of applying f to the char's index *) - let init len f = - let str = String.create len in - for i = 0 to len - 1 do - str.[i] <- f i - done; - str - ;; - - end - -module ZRandom = - struct - - type ranges = (int * int) list - - let digit_range = [48, 57] ;; - let alpha_upper_range = [65, 90] ;; - let alpha_lower_range = [97, 122] ;; - let alpha_range = alpha_upper_range @ alpha_lower_range ;; - let alphanum_range = digit_range @ alpha_range ;; - let word_range = alphanum_range @ [95, 95] ;; (* alphanum + '_' *) - - let rec ranges_are_sane = function - | (min, max) :: tl -> - if min > max || min < 0 || max > 255 then - failwith (sprintf "ZRandom: invalid range %d .. %d" min max); - ranges_are_sane tl - | [] -> () - ;; - let size_of_ranges = (* assumption: ranges are sane *) - let rec aux acc = function - | [] -> acc - | ((min, max) as range) :: tl -> aux (acc + (max - min + 1)) tl - in - aux 0 - ;; - let nth_in_ranges idx ranges = (* assumption: ranges are sane *) - if ranges = [] then - failwith "ZRandom: no range provided"; - let rec aux idx = function - | [] -> assert false - | (min, max) :: tl -> - let nth = min + idx in - if nth <= max then nth else aux (nth - max - 1) tl - in - aux idx ranges - ;; - - (* low level for char and string *) - let char' ranges = - let int = Random.int (size_of_ranges ranges) in - Char.chr (nth_in_ranges int ranges) - ;; - - (** generate a random char inside provided ranges. Ranges are provided as - a list of int pairs. Each pair represent an inclusive interval of possible - character codes. Default range is [0, 255] *) - let char ?(ranges = [0,255]) () = - ranges_are_sane ranges; - char' ranges - ;; - - (** generate a string of random characters inside provided range *) - let string ?(ranges = [0,255]) len = - ranges_are_sane ranges; - ZString.init len (fun _ -> char' ranges) - ;; - - end - -module ZStream = - struct - - (** map on streams. Beware that this function build stream using - Stream.from. That kind of stream can't be mixed with ordinary streams *) - let map f stream = - Stream.from - (fun _ -> try Some (f (Stream.next stream)) with Stream.Failure -> None) - ;; - (** fold on streams. Beware that this function build stream using - Stream.from. That kind of stream can't be mixed with ordinary streams *) - let rec fold f init stream = - match (try Some (Stream.next stream) with Stream.Failure -> None) with - | Some item -> fold f (f init item) stream - | None -> init - ;; - - (** given an input channel return the stream of its lines (without - trailing new line) *) - let of_inchan ic = - Stream.from (fun _ -> try Some (input_line ic) with End_of_file -> None) - ;; - - end - - (** fold_left on input channel lines *) -let rec fold_in f init ic = - match (try Some (input_line ic) with End_of_file -> None) with - | Some l -> fold_in f (f init l) ic - | None -> init -;; - - (** iter on input channel lines *) -let iter_in f = fold_in (fun _ line -> f line) () ;; - - (** map on input channel lines *) -let map_in f ic = List.rev (fold_in (fun acc line -> f line :: acc) [] ic) ;; - - (** return list of lines read from an input channel *) -let input_lines ic = List.rev (fold_in (fun acc line -> line :: acc) [] ic) ;; - - (** read all data available on an input channel and return them as a string *) -let input_all = - let strlen = 8192 in - let buflen = 8192 * 2 in - let str = String.create strlen in - fun ic -> - let buf = Buffer.create buflen in - let rec input' () = - let bytes = input ic str 0 strlen in - if bytes = 0 then (* EOF *) - Buffer.contents buf - else begin - Buffer.add_substring buf str 0 bytes; - input' () - end - in - input' () -;; - - (** write a list of lines to an output channel. Newline is added at the end of - each line *) -let rec output_lines lines oc = - match lines with - | [] -> () - | hd::tl -> - output_string oc (hd ^ newline); - output_lines tl oc -;; - - (** read_lines on stdin *) -let read_lines () = input_lines stdin ;; - (** read_all on stdin *) -let read_all () = input_all stdin ;; - - (** Some constructor inverse *) -let unsome = function - | Some x -> x - | None -> raise (Invalid_argument "Zack.unsome") -;; - -module Array = ZArray ;; -module Dbm = ZDbm ;; -module Hashtbl = ZHashtbl ;; -module List = ZList ;; -module Logic = ZLogic ;; -module Random = ZRandom ;; -module Stream = ZStream ;; -module String = ZString ;; -module Sys = ZSys ;; -module Unix = ZUnix ;; - diff --git a/helm/http_getter/zack.mli b/helm/http_getter/zack.mli deleted file mode 100644 index 581bcfd05..000000000 --- a/helm/http_getter/zack.mli +++ /dev/null @@ -1,149 +0,0 @@ -(* - * Zack's own OCaml library -- set of "helpers" function for the OCaml language - * - * Copyright (C) 2003: - * Stefano Zacchiroli - * - * This module is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This module is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this module; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - *) - -exception Not_implemented - -val fold_in : ('a -> string -> 'a) -> 'a -> in_channel -> 'a -val iter_in : (string -> unit) -> in_channel -> unit -val map_in : (string -> 'a) -> in_channel -> 'a list -val input_lines : in_channel -> string list -val input_all : in_channel -> string -val output_lines : string list -> out_channel -> unit -val read_lines : unit -> string list -val read_all : unit -> string -val unsome : 'a option -> 'a - -module Array : - sig - val index : ('a -> bool) -> 'a array -> int - val find : ('a -> bool) -> 'a array -> 'a - - val exists : ('a -> bool) -> 'a array -> bool - val for_all : ('a -> bool) -> 'a array -> bool - - val filter : ('a -> bool) -> 'a array -> 'a array - -(* val lrotate : ?step:int -> 'a array -> 'a array *) -(* val rrotate : ?step:int -> 'a array -> 'a array *) - end - -module Dbm : - sig - val fold : ('a -> string -> string -> 'a) -> 'a -> Dbm.t -> 'a - end - -module Hashtbl : - sig - val keys : ('a, 'b) Hashtbl.t -> 'a list - val values : ('a, 'b) Hashtbl.t -> 'b list - - val remove_all : ('a, 'b) Hashtbl.t -> 'a -> unit - end - -module List : - sig - val map' : ('a -> 'b) -> 'a list -> 'b list - val map_if : ('a -> 'b) -> ('a -> bool) -> 'a list -> 'b list - val map_if' : ('a -> 'b) -> ('a -> bool) -> 'a list -> 'b list - - val assoc_all : 'a -> ('a * 'b) list -> 'b list - val assq_all : 'a -> ('a * 'b) list -> 'b list - - val lrotate : 'a list -> 'a list - val rrotate : 'a list -> 'a list -(* val List.lrotate: ?step:int -> 'a list -> 'a list *) -(* val List.rrotate: ?step:int -> 'a list -> 'a list *) - end - -module Logic : - sig - val non : ('a -> bool) -> 'a -> bool - val conj : ('a -> bool) -> ('a -> bool) -> 'a -> bool - val disj : ('a -> bool) -> ('a -> bool) -> 'a -> bool - val imply : ('a -> bool) -> ('a -> bool) -> 'a -> bool - - val ( &&& ) : ('a -> bool) -> ('a -> bool) -> 'a -> bool - val ( ||| ) : ('a -> bool) -> ('a -> bool) -> 'a -> bool - val ( => ) : ('a -> bool) -> ('a -> bool) -> 'a -> bool - end - -module Random : - sig - val digit_range : (int * int) list - val alpha_upper_range : (int * int) list - val alpha_lower_range : (int * int) list - val alpha_range : (int * int) list - val alphanum_range : (int * int) list - val word_range : (int * int) list - - val char : ?ranges:(int * int) list -> unit -> char - val string : ?ranges:(int * int) list -> int -> string - end - -module Stream : - sig - val map : ('a -> 'b) -> 'a Stream.t -> 'b Stream.t - val fold : ('a -> 'b -> 'a) -> 'a -> 'b Stream.t -> 'a - - val of_inchan : in_channel -> string Stream.t - end - -module String : - sig - val explode : string -> char list - val implode : char list -> string - - val chomp : string -> string - - val map : (char -> char) -> string -> unit - val fold_left : ('a -> char -> 'a) -> 'a -> string -> 'a -(* val fold_right : (char -> 'a -> 'a) -> string -> 'a -> 'a *) - val iter : (char -> unit) -> string -> unit -(* val filter : (char -> bool) -> string -> string *) - - val init : int -> (int -> char) -> string - end - -(* -module Sys : - sig - val copy : src:string -> dst:string -> unit - end -*) - -module Unix : - sig -(* val mkdir : ?parents:bool -> string -> unit *) - - val is_regular : ?follow_symlink:bool -> string -> bool - val is_directory : ?follow_symlink:bool -> string -> bool - val is_chardev : ?follow_symlink:bool -> string -> bool - val is_blockdev : ?follow_symlink:bool -> string -> bool - val is_symlink : ?follow_symlink:bool -> string -> bool - val is_fifo : ?follow_symlink:bool -> string -> bool - val is_socket : ?follow_symlink:bool -> string -> bool - - val size : ?follow_symlink:bool -> string -> int - - val ls : string -> string list - end - -- 2.39.2