From f44ab01307f10d4165c76e3108542a5bc2035766 Mon Sep 17 00:00:00 2001 From: Stefano Zacchiroli Date: Tue, 6 Apr 2004 09:53:26 +0000 Subject: [PATCH] - added via_http parameter so that when the getter is used locally (i.e. as a library), http specific messages aren't generated - use polymorphic variants for some configuration parameters - better logging on /update (
tags are now generated) --- helm/ocaml/getter/http_getter.ml | 49 ++++++---- helm/ocaml/getter/http_getter_cache.ml | 72 ++++++++------- helm/ocaml/getter/http_getter_cache.mli | 18 ++-- helm/ocaml/getter/http_getter_common.ml | 108 +++++++++++++++-------- helm/ocaml/getter/http_getter_common.mli | 12 ++- 5 files changed, 160 insertions(+), 99 deletions(-) diff --git a/helm/ocaml/getter/http_getter.ml b/helm/ocaml/getter/http_getter.ml index 97e8c2f43..a817f7448 100644 --- a/helm/ocaml/getter/http_getter.ml +++ b/helm/ocaml/getter/http_getter.ml @@ -98,6 +98,7 @@ let update_from_server logger server_url = (* use global maps *) | uri -> raise (Invalid_URI uri) in logger (`T ("Processing server: " ^ server_url)); + logger `BR; let (xml_index, rdf_index, xsl_index) = (* TODO keeps index in memory, is better to keep them on temp files? *) (http_get (server_url ^ "/" ^ (Lazy.force Http_getter_env.xml_index)), @@ -108,7 +109,8 @@ let update_from_server logger server_url = (* use global maps *) debug_print (sprintf "Warning: useless server %s" server_url); (match xml_index with | Some xml_index -> - logger (`T "Updating XML db ...
"); + logger (`T "Updating XML db ..."); + logger `BR; List.iter (function | l when is_blank_line l -> () (* skip blank and commented lines *) @@ -123,15 +125,20 @@ let update_from_server logger server_url = (* use global maps *) assert (is_cic_uri uri || is_nuprl_uri uri) ; (map_of_uri uri)#replace uri ((xml_url_of_uri uri) ^ ".xml") - | _ -> logger (`T ("Ignoring invalid line: '" ^ l))) + | _ -> + logger (`T ("Ignoring invalid line: '" ^ l)); + logger `BR) with Invalid_URI uri -> - logger (`T ("Ignoring invalid XML URI: '" ^ l)))) + logger (`T ("Ignoring invalid XML URI: '" ^ l)); + logger `BR)) (Pcre.split ~rex:index_sep_RE xml_index); (* xml_index lines *) - logger (`T "All done") + logger (`T "All done"); + logger `BR | None -> ()); (match rdf_index with | Some rdf_index -> logger (`T "Updating RDF db ..."); + logger `BR; List.iter (fun l -> try @@ -142,19 +149,25 @@ let update_from_server logger server_url = (* use global maps *) | [uri] -> (Lazy.force rdf_map) # replace uri ((rdf_url_of_uri uri) ^ ".xml") - | _ -> logger (`T ("Ignoring invalid line: '" ^ l))) + | _ -> + logger (`T ("Ignoring invalid line: '" ^ l)); + logger `BR) with Invalid_URI uri -> - logger (`T ("Ignoring invalid RDF URI: '" ^ l))) + logger (`T ("Ignoring invalid RDF URI: '" ^ l)); + logger `BR) (Pcre.split ~rex:index_sep_RE rdf_index); (* rdf_index lines *) - logger (`T "All done") + logger (`T "All done"); + logger `BR | None -> ()); (match xsl_index with | Some xsl_index -> logger (`T "Updating XSLT db ..."); + logger `BR; List.iter (fun l -> (Lazy.force xsl_map) # replace l (server_url ^ "/" ^ l)) (Pcre.split ~rex:index_sep_RE xsl_index); - logger (`T "All done") + logger (`T "All done"); + logger `BR | None -> ()); debug_print "done with this server" @@ -230,11 +243,11 @@ let update_remote logger () = logger (`T answer) let getxml_remote ~format ~patch_dtd uri = - ClientHTTP.get_and_save_to_tmp - (sprintf "%sgetxml?uri=%s&format=%s&patch_dtd=%s" - (getter_url ()) uri - (match format with Enc_normal -> "normal" | Enc_gzipped -> "gzipped") - (match patch_dtd with true -> "yes" | false -> "no")) + ClientHTTP.get_and_save_to_tmp + (sprintf "%sgetxml?uri=%s&format=%s&patch_dtd=%s" + (getter_url ()) uri + (match format with `Normal -> "normal" | `Gzipped -> "gzipped") + (match patch_dtd with true -> "yes" | false -> "no")) (* API *) @@ -261,14 +274,14 @@ let update ?(logger = fun _ -> ()) () = else update_from_all_servers logger () -let getxml ?(format = Enc_normal) ?(patch_dtd = true) uri = +let getxml ?(format = `Normal) ?(patch_dtd = true) uri = if remote () then getxml_remote ~format ~patch_dtd uri else begin let url = resolve uri in let (fname, outchan) = temp_file_of_uri uri in - Http_getter_cache.respond_xml ~uri ~url ~enc:format ~patch:patch_dtd - outchan; + Http_getter_cache.respond_xml ~via_http:false ~enc:format ~patch:patch_dtd + ~uri ~url outchan; close_out outchan; fname end @@ -279,7 +292,7 @@ let getxslt ?(patch_dtd = true) uri = else begin let url = resolve uri in let (fname, outchan) = temp_file_of_uri uri in - Http_getter_cache.respond_xsl ~url ~patch:patch_dtd outchan; + Http_getter_cache.respond_xsl ~via_http:false ~url ~patch:patch_dtd outchan; close_out outchan; fname end @@ -290,7 +303,7 @@ let getdtd ?(patch_dtd = true) uri = else begin let url = Lazy.force Http_getter_env.dtd_dir ^ "/" ^ uri in let (fname, outchan) = temp_file_of_uri uri in - Http_getter_cache.respond_dtd ~url ~patch:patch_dtd outchan; + Http_getter_cache.respond_dtd ~via_http:false ~url ~patch:patch_dtd outchan; close_out outchan; fname end diff --git a/helm/ocaml/getter/http_getter_cache.ml b/helm/ocaml/getter/http_getter_cache.ml index 1dd18fa1e..75730ac21 100644 --- a/helm/ocaml/getter/http_getter_cache.ml +++ b/helm/ocaml/getter/http_getter_cache.ml @@ -59,22 +59,24 @@ let finally cleanup f = raise (Http_getter_types.Cache_failure (Printexc.to_string e)) let resource_type_of_url = function - | url when Pcre.pmatch ~pat:"\\.xml\\.gz$" url -> Enc_gzipped - | url when Pcre.pmatch ~pat:"\\.xml$" url -> Enc_normal + | url when Pcre.pmatch ~pat:"\\.xml\\.gz$" url -> `Gzipped + | url when Pcre.pmatch ~pat:"\\.xml$" url -> `Normal | url -> raise (Invalid_URL url) let extension_of_resource_type = function - | Enc_normal -> "xml" - | Enc_gzipped -> "xml.gz" + | `Normal -> "xml" + | `Gzipped -> "xml.gz" (* basename = resource name without trailing ".gz", if any *) let is_in_cache basename = Sys.file_exists (match Lazy.force Http_getter_env.cache_mode with - | Enc_normal -> basename - | Enc_gzipped -> basename ^ ".gz") + | `Normal -> basename + | `Gzipped -> basename ^ ".gz") -let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan = +let respond_xml + ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url ~uri outchan + = let resource_type = resource_type_of_url url in let extension = extension_of_resource_type resource_type in let downloadname = @@ -100,7 +102,7 @@ let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan = (Lazy.force Http_getter_env.rdf_dir) escaped_prefix baseuri extension in let patch_fun = - if patch then Http_getter_common.patch_xml else (fun x -> x) + if patch then Http_getter_common.patch_xml ~via_http () else (fun x -> x) in let basename = Pcre.replace ~pat:"\\.gz$" downloadname in let contype = "text/xml" in @@ -116,13 +118,13 @@ let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan = debug_print "Cache MISS :-("; mkdir ~parents:true (Filename.dirname downloadname); match (resource_type, Lazy.force Http_getter_env.cache_mode) with - | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped -> + | `Normal, `Normal | `Gzipped, `Gzipped -> wget ~output:downloadname url; None - | Enc_normal, Enc_gzipped -> (* resource normal, cache gzipped *) + | `Normal, `Gzipped -> (* resource normal, cache gzipped *) let tmp = tempfile () in let (res, cleanup) = - if enc = Enc_normal then (* user wants normal: don't delete it! *) + if enc = `Normal then (* user wants normal: don't delete it! *) (Some (tmp, enc), (fun () -> ())) else (None, (fun () -> Sys.remove tmp)) @@ -132,10 +134,10 @@ let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan = gzip ~output:(basename ^ ".gz") ~keep:true tmp; (* fill cache *) res )); - | Enc_gzipped, Enc_normal -> (* resource gzipped, cache normal *) + | `Gzipped, `Normal -> (* resource gzipped, cache normal *) let tmp = tempfile () in let (res, cleanup) = - if enc = Enc_gzipped then (* user wants .gz: don't delete it! *) + if enc = `Gzipped then (* user wants .gz: don't delete it! *) (Some (tmp, enc), (fun () -> ())) else (None, (fun () -> Sys.remove tmp)) @@ -154,70 +156,74 @@ let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan = threadSafe#doReader (lazy( assert (is_in_cache basename); match (enc, Lazy.force Http_getter_env.cache_mode) with - | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped -> + | `Normal, `Normal | `Gzipped, `Gzipped -> (* resource in cache is already in the required format *) (match enc with - | Enc_normal -> + | `Normal -> debug_print "No format mangling required (encoding = normal)"; - return_file ~fname:basename ~contype ~patch_fun outchan - | Enc_gzipped -> + return_file ~via_http ~fname:basename ~contype ~patch_fun outchan + | `Gzipped -> debug_print "No format mangling required (encoding = gzipped)"; return_file - ~fname:(basename ^ ".gz") ~contype ~contenc:"x-gzip" + ~via_http ~fname:(basename ^ ".gz") ~contype ~contenc:"x-gzip" ~patch_fun ~gunzip:true outchan) - | Enc_normal, Enc_gzipped | Enc_gzipped, Enc_normal -> + | `Normal, `Gzipped | `Gzipped, `Normal -> (match tmp_short_circuit with | None -> (* no short circuit possible, use cache *) debug_print "No short circuit available, use cache"; let tmp = tempfile () in finally (fun () -> Sys.remove tmp) (lazy ( (match enc with - | Enc_normal -> + | `Normal -> (* required format is normal, cached version is gzipped *) gunzip (* gunzip to tmp *) ~output:tmp ~keep:true (basename ^ ".gz"); - return_file ~fname:tmp ~contype ~patch_fun outchan; - | Enc_gzipped -> + return_file ~via_http ~fname:tmp ~contype ~patch_fun outchan; + | `Gzipped -> (* required format is gzipped, cached version is normal *) gzip ~output:tmp ~keep:true basename; (* gzip to tmp *) return_file - ~fname:tmp ~contype ~contenc:"x-gzip" + ~via_http ~fname:tmp ~contype ~contenc:"x-gzip" ~patch_fun ~gunzip:true outchan) )) - | Some (fname, Enc_normal) -> (* short circuit available, use it! *) + | Some (fname, `Normal) -> (* short circuit available, use it! *) debug_print "Using short circuit (encoding = normal)"; finally (fun () -> Sys.remove fname) (lazy ( - return_file ~fname ~contype ~patch_fun outchan + return_file ~via_http ~fname ~contype ~patch_fun outchan )) - | Some (fname, Enc_gzipped) -> (* short circuit available, use it! *) + | Some (fname, `Gzipped) -> (* short circuit available, use it! *) debug_print "Using short circuit (encoding = gzipped)"; finally (fun () -> Sys.remove fname) (lazy ( - return_file ~fname ~contype ~contenc:"x-gzip" ~patch_fun + return_file ~via_http ~fname ~contype ~contenc:"x-gzip" ~patch_fun ~gunzip:true outchan ))) )) (* TODO enc is not yet supported *) -let respond_xsl ?(enc = Enc_normal) ?(patch = true) ~url outchan = +let respond_xsl + ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url outchan + = let patch_fun = - if patch then Http_getter_common.patch_xsl else (fun x -> x) + if patch then Http_getter_common.patch_xsl ~via_http () else (fun x -> x) in let fname = tempfile () in finally (fun () -> Sys.remove fname) (lazy ( wget ~output:fname url; - return_file ~fname ~contype:"text/xml" ~patch_fun outchan + return_file ~via_http ~fname ~contype:"text/xml" ~patch_fun outchan )) (* TODO enc is not yet supported *) -let respond_dtd ?(enc = Enc_normal) ?(patch = true) ~url outchan = +let respond_dtd + ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url outchan + = let patch_fun = - if patch then Http_getter_common.patch_dtd else (fun x -> x) + if patch then Http_getter_common.patch_dtd ~via_http () else (fun x -> x) in if Sys.file_exists url then (* TODO check this: old getter here used text/xml *) - return_file ~fname:url ~contype:"text/plain" ~patch_fun outchan + return_file ~via_http ~fname:url ~contype:"text/plain" ~patch_fun outchan else raise (Dtd_not_found url) diff --git a/helm/ocaml/getter/http_getter_cache.mli b/helm/ocaml/getter/http_getter_cache.mli index a026e72c0..7789a023b 100644 --- a/helm/ocaml/getter/http_getter_cache.mli +++ b/helm/ocaml/getter/http_getter_cache.mli @@ -28,19 +28,23 @@ open Http_getter_types;; -(** all these methods could raise Http_getter_types.Cache_failure *) +(** all these methods could raise Http_getter_types.Cache_failure. + * @param via_http (default: true) @see Http_getter_common.return_file *) val respond_xml: - ?enc:encoding -> ?patch:bool -> url:string -> uri:string -> out_channel -> - unit + ?via_http:bool -> ?enc:encoding -> ?patch:bool -> url:string -> uri:string -> + out_channel -> + unit val respond_xsl: - ?enc:encoding -> ?patch:bool -> url:string -> out_channel -> - unit + ?via_http:bool -> ?enc:encoding -> ?patch:bool -> url:string -> + out_channel -> + unit val respond_dtd: - ?enc:encoding -> ?patch:bool -> url:string -> out_channel -> - unit + ?via_http:bool -> ?enc:encoding -> ?patch:bool -> url:string -> + out_channel -> + unit val clean: unit -> unit diff --git a/helm/ocaml/getter/http_getter_common.ml b/helm/ocaml/getter/http_getter_common.ml index 62a52c792..bbed79722 100644 --- a/helm/ocaml/getter/http_getter_common.ml +++ b/helm/ocaml/getter/http_getter_common.ml @@ -31,8 +31,8 @@ open Printf;; let string_of_ls_flag = function No -> "NO" | Yes -> "YES" | Ann -> "ANN" let string_of_encoding = function - | Enc_normal -> "Normal" - | Enc_gzipped -> "GZipped" + | `Normal -> "Normal" + | `Gzipped -> "GZipped" let is_cic_obj_uri uri = Pcre.pmatch ~pat:"^cic:" uri let is_theory_uri uri = Pcre.pmatch ~pat:"^theory:" uri @@ -58,35 +58,64 @@ let rec uri_of_string = function Cic_uri (Theory (Pcre.replace ~pat:"^theory:" uri)) | uri -> raise (Invalid_URI uri) -let patch_xml line = - Pcre.replace - ~pat:(sprintf "DOCTYPE (.*) SYSTEM\\s+\"%s/" - (Lazy.force Http_getter_env.dtd_base_url)) - ~templ:(sprintf "DOCTYPE $1 SYSTEM \"%s/getdtd?uri=" - (Lazy.force Http_getter_env.my_own_url)) - line -let patch_xsl line = - let mk_patch_fun tag line = - Pcre.replace - ~pat:(sprintf "%s\\s+href=\"" tag) - ~templ:(sprintf "%s href=\"%s/getxslt?uri=" - tag (Lazy.force Http_getter_env.my_own_url)) - line - in - let (patch_import, patch_include) = - (mk_patch_fun "xsl:import", mk_patch_fun "xsl:include") - in - patch_include (patch_import line) -let patch_dtd line = - Pcre.replace - ~pat:(sprintf "ENTITY (.*) SYSTEM\\s+\"(%s/)?" - (Lazy.force Http_getter_env.dtd_base_url)) - ~templ:(sprintf "ENTITY $1 SYSTEM \"%s/getdtd?uri=" - (Lazy.force Http_getter_env.my_own_url)) - line +let patch_xsl ?(via_http = true) () = + fun line -> + let mk_patch_fun tag line = + Pcre.replace + ~pat:(sprintf "%s\\s+href=\"" tag) + ~templ:(sprintf "%s href=\"%s/getxslt?uri=" + tag (Lazy.force Http_getter_env.my_own_url)) + line + in + let (patch_import, patch_include) = + (mk_patch_fun "xsl:import", mk_patch_fun "xsl:include") + in + patch_include (patch_import line) + +let patch_entity ?(via_http = true) () = + if via_http then + fun line -> + Pcre.replace + ~pat:(sprintf "ENTITY (.*) SYSTEM\\s+\"(%s/)?" + (Lazy.force Http_getter_env.dtd_base_url)) + ~templ:(sprintf "ENTITY $1 SYSTEM \"%s/getdtd?uri=" + (Lazy.force Http_getter_env.my_own_url)) + line + else + fun line -> + Pcre.replace + ~pat:(sprintf "ENTITY (.*) SYSTEM\\s+\"(%s/)?" + (Lazy.force Http_getter_env.dtd_base_url)) + ~templ:(sprintf "ENTITY $1 SYSTEM \"file://%s/" + (Lazy.force Http_getter_env.dtd_dir)) + line + +let patch_doctype ?(via_http = true) () = + if via_http then + fun line -> + Pcre.replace + ~pat:(sprintf "DOCTYPE (.*) SYSTEM\\s+\"%s/" + (Lazy.force Http_getter_env.dtd_base_url)) + ~templ:(sprintf "DOCTYPE $1 SYSTEM \"%s/getdtd?uri=" + (Lazy.force Http_getter_env.my_own_url)) + line + else + fun line -> + Pcre.replace + ~pat:(sprintf "DOCTYPE (.*) SYSTEM\\s+\"%s/" + (Lazy.force Http_getter_env.dtd_base_url)) + ~templ:(sprintf "DOCTYPE $1 SYSTEM \"file://%s/" + (Lazy.force Http_getter_env.dtd_dir)) + line + +let patch_dtd = patch_entity + +let patch_xml ?via_http () line = + patch_doctype ?via_http () (patch_entity ?via_http () line) let return_file - ~fname ?contype ?contenc ?(patch_fun = fun x -> x) ?(gunzip = false) outchan + ~fname ?contype ?contenc + ?(patch_fun = fun x -> x) ?(gunzip = false) ?(via_http = true) outchan = let headers = match (contype, contenc) with @@ -95,9 +124,11 @@ let return_file | (None, Some e) -> ["Content-Encoding", e] | (None, None) -> [] in - Http_daemon.send_basic_headers ~code:200 outchan; - Http_daemon.send_headers headers outchan; - Http_daemon.send_CRLF outchan; + if via_http then begin + Http_daemon.send_basic_headers ~code:200 outchan; + Http_daemon.send_headers headers outchan; + Http_daemon.send_CRLF outchan + end; if gunzip then begin (* gunzip needed, uncompress file, apply patch_fun to it, compress the result and sent it to client *) let (tmp1, tmp2) = @@ -107,20 +138,23 @@ let return_file Http_getter_misc.gunzip ~keep:true ~output:tmp1 fname;(* gunzip to tmp1 *) let new_file = open_out tmp2 in Http_getter_misc.iter_file (* tmp2 = patch(tmp1) *) - (fun line -> output_string new_file (patch_fun line ^ "\n")) + (fun line -> + output_string new_file (patch_fun line ^ "\n"); + flush outchan) tmp1; close_out new_file; Http_getter_misc.gzip ~output:tmp1 tmp2; (* tmp1 = gzip(tmp2); rm tmp2 *) Http_getter_misc.iter_file (* send tmp1 to client as is*) - (fun line -> output_string outchan (line ^ "\n")) + (fun line -> output_string outchan (line ^ "\n"); flush outchan) tmp1; Sys.remove tmp1 (* rm tmp1 *) with e -> Sys.remove tmp1; raise e - end else (* no need to gunzip, apply patch_fun directly to file *) + end else begin (* no need to gunzip, apply patch_fun directly to file *) Http_getter_misc.iter_file - (fun line -> output_string outchan (patch_fun line ^ "\n")) - fname + (fun line -> output_string outchan (patch_fun line ^ "\n"); flush outchan) + fname; + end ;; diff --git a/helm/ocaml/getter/http_getter_common.mli b/helm/ocaml/getter/http_getter_common.mli index 1828c82e8..b4f733d8f 100644 --- a/helm/ocaml/getter/http_getter_common.mli +++ b/helm/ocaml/getter/http_getter_common.mli @@ -38,9 +38,10 @@ val is_xsl_uri: string -> bool val uri_of_string: string -> uri -val patch_xml : string -> string -val patch_xsl : string -> string -val patch_dtd : string -> string +val patch_xml : ?via_http:bool -> unit -> string -> string +val patch_dtd : ?via_http:bool -> unit -> string -> string + (* TODO via_http not yet supported for patch_xsl *) +val patch_xsl : ?via_http:bool -> unit -> string -> string (** @param fname name of the file to be sent @@ -50,11 +51,14 @@ val patch_dtd : string -> string @param gunzip is meaningful only if a patch function is provided. If gunzip is true patch_fun is applied to the uncompressed version of the file. The file is then compressed again and send to client + @param via_http (default: true) if true http specific communications are used + (e.g. headers, crlf before body) and sent via outchan, otherwise they're not. + Set it to false when saving to a local file @param outchan output channel over which sent file fname *) val return_file: fname:string -> ?contype:string -> ?contenc:string -> - ?patch_fun:(string -> string) -> ?gunzip:bool -> + ?patch_fun:(string -> string) -> ?gunzip:bool -> ?via_http:bool -> out_channel -> unit -- 2.39.2