X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2Focaml%2Fgetter%2Fhttp_getter_cache.ml;h=144b9ac5f4c5ad99f9ebad8fb528a4977878a3f0;hb=3fd8583f198e7e09995c3a65b5f05a853c8d1646;hp=1dd18fa1e3f46b78c46626a91ab5f45ae3d62aa4;hpb=3af56c5a48f7cad33fd701e0061fe143e0e2a7c5;p=helm.git diff --git a/helm/ocaml/getter/http_getter_cache.ml b/helm/ocaml/getter/http_getter_cache.ml index 1dd18fa1e..144b9ac5f 100644 --- a/helm/ocaml/getter/http_getter_cache.ml +++ b/helm/ocaml/getter/http_getter_cache.ml @@ -32,11 +32,10 @@ clients, uwobo (java implementation, not yet tested with the OCaml one) starts looping sending output to one of the client *) -open Http_getter_common;; -open Http_getter_debugger;; -open Http_getter_misc;; -open Http_getter_types;; -open Printf;; +open Http_getter_common +open Http_getter_misc +open Http_getter_types +open Printf (* expose ThreadSafe.threadSafe methods *) class threadSafe = @@ -59,22 +58,24 @@ let finally cleanup f = raise (Http_getter_types.Cache_failure (Printexc.to_string e)) let resource_type_of_url = function - | url when Pcre.pmatch ~pat:"\\.xml\\.gz$" url -> Enc_gzipped - | url when Pcre.pmatch ~pat:"\\.xml$" url -> Enc_normal + | url when Pcre.pmatch ~pat:"\\.xml\\.gz$" url -> `Gzipped + | url when Pcre.pmatch ~pat:"\\.xml$" url -> `Normal | url -> raise (Invalid_URL url) let extension_of_resource_type = function - | Enc_normal -> "xml" - | Enc_gzipped -> "xml.gz" + | `Normal -> "xml" + | `Gzipped -> "xml.gz" (* basename = resource name without trailing ".gz", if any *) let is_in_cache basename = Sys.file_exists (match Lazy.force Http_getter_env.cache_mode with - | Enc_normal -> basename - | Enc_gzipped -> basename ^ ".gz") + | `Normal -> basename + | `Gzipped -> basename ^ ".gz") -let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan = +let respond_xml + ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url ~uri outchan + = let resource_type = resource_type_of_url url in let extension = extension_of_resource_type resource_type in let downloadname = @@ -100,7 +101,16 @@ let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan = (Lazy.force Http_getter_env.rdf_dir) escaped_prefix baseuri extension in let patch_fun = - if patch then Http_getter_common.patch_xml else (fun x -> x) + let xmlbases = + if Http_getter_common.is_theory_uri uri then + Some (Filename.dirname uri, Filename.dirname url) + else + None + in + if patch then + Http_getter_common.patch_xml ?xmlbases ~via_http () + else + (fun x -> x) in let basename = Pcre.replace ~pat:"\\.gz$" downloadname in let contype = "text/xml" in @@ -113,16 +123,16 @@ let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan = let fill_cache () = threadSafe#doWriter (lazy( if not (is_in_cache basename) then begin (* cache MISS *) - debug_print "Cache MISS :-("; + Http_getter_logger.log ~level:2 "Cache MISS :-("; mkdir ~parents:true (Filename.dirname downloadname); match (resource_type, Lazy.force Http_getter_env.cache_mode) with - | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped -> + | `Normal, `Normal | `Gzipped, `Gzipped -> wget ~output:downloadname url; None - | Enc_normal, Enc_gzipped -> (* resource normal, cache gzipped *) + | `Normal, `Gzipped -> (* resource normal, cache gzipped *) let tmp = tempfile () in let (res, cleanup) = - if enc = Enc_normal then (* user wants normal: don't delete it! *) + if enc = `Normal then (* user wants normal: don't delete it! *) (Some (tmp, enc), (fun () -> ())) else (None, (fun () -> Sys.remove tmp)) @@ -132,10 +142,10 @@ let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan = gzip ~output:(basename ^ ".gz") ~keep:true tmp; (* fill cache *) res )); - | Enc_gzipped, Enc_normal -> (* resource gzipped, cache normal *) + | `Gzipped, `Normal -> (* resource gzipped, cache normal *) let tmp = tempfile () in let (res, cleanup) = - if enc = Enc_gzipped then (* user wants .gz: don't delete it! *) + if enc = `Gzipped then (* user wants .gz: don't delete it! *) (Some (tmp, enc), (fun () -> ())) else (None, (fun () -> Sys.remove tmp)) @@ -146,7 +156,7 @@ let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan = res )); end else begin - debug_print "Cache HIT :-)"; + Http_getter_logger.log ~level:2 "Cache HIT :-)"; None end )) in @@ -154,70 +164,79 @@ let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan = threadSafe#doReader (lazy( assert (is_in_cache basename); match (enc, Lazy.force Http_getter_env.cache_mode) with - | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped -> + | `Normal, `Normal | `Gzipped, `Gzipped -> (* resource in cache is already in the required format *) (match enc with - | Enc_normal -> - debug_print "No format mangling required (encoding = normal)"; - return_file ~fname:basename ~contype ~patch_fun outchan - | Enc_gzipped -> - debug_print "No format mangling required (encoding = gzipped)"; + | `Normal -> + Http_getter_logger.log ~level:2 + "No format mangling required (encoding = normal)"; + return_file ~via_http ~fname:basename ~contype ~patch_fun outchan + | `Gzipped -> + Http_getter_logger.log ~level:2 + "No format mangling required (encoding = gzipped)"; return_file - ~fname:(basename ^ ".gz") ~contype ~contenc:"x-gzip" + ~via_http ~fname:(basename ^ ".gz") ~contype ~contenc:"x-gzip" ~patch_fun ~gunzip:true outchan) - | Enc_normal, Enc_gzipped | Enc_gzipped, Enc_normal -> + | `Normal, `Gzipped | `Gzipped, `Normal -> (match tmp_short_circuit with | None -> (* no short circuit possible, use cache *) - debug_print "No short circuit available, use cache"; + Http_getter_logger.log ~level:2 + "No short circuit available, use cache"; let tmp = tempfile () in finally (fun () -> Sys.remove tmp) (lazy ( (match enc with - | Enc_normal -> + | `Normal -> (* required format is normal, cached version is gzipped *) gunzip (* gunzip to tmp *) ~output:tmp ~keep:true (basename ^ ".gz"); - return_file ~fname:tmp ~contype ~patch_fun outchan; - | Enc_gzipped -> + return_file ~via_http ~fname:tmp ~contype ~patch_fun outchan; + | `Gzipped -> (* required format is gzipped, cached version is normal *) gzip ~output:tmp ~keep:true basename; (* gzip to tmp *) return_file - ~fname:tmp ~contype ~contenc:"x-gzip" + ~via_http ~fname:tmp ~contype ~contenc:"x-gzip" ~patch_fun ~gunzip:true outchan) )) - | Some (fname, Enc_normal) -> (* short circuit available, use it! *) - debug_print "Using short circuit (encoding = normal)"; + | Some (fname, `Normal) -> (* short circuit available, use it! *) + Http_getter_logger.log ~level:2 + "Using short circuit (encoding = normal)"; finally (fun () -> Sys.remove fname) (lazy ( - return_file ~fname ~contype ~patch_fun outchan + return_file ~via_http ~fname ~contype ~patch_fun outchan )) - | Some (fname, Enc_gzipped) -> (* short circuit available, use it! *) - debug_print "Using short circuit (encoding = gzipped)"; + | Some (fname, `Gzipped) -> (* short circuit available, use it! *) + Http_getter_logger.log ~level:2 + "Using short circuit (encoding = gzipped)"; finally (fun () -> Sys.remove fname) (lazy ( - return_file ~fname ~contype ~contenc:"x-gzip" ~patch_fun + return_file ~via_http ~fname ~contype ~contenc:"x-gzip" ~patch_fun ~gunzip:true outchan ))) )) (* TODO enc is not yet supported *) -let respond_xsl ?(enc = Enc_normal) ?(patch = true) ~url outchan = +let respond_xsl + ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url outchan + = let patch_fun = - if patch then Http_getter_common.patch_xsl else (fun x -> x) + if patch then Http_getter_common.patch_xsl ~via_http () else (fun x -> x) in let fname = tempfile () in finally (fun () -> Sys.remove fname) (lazy ( wget ~output:fname url; - return_file ~fname ~contype:"text/xml" ~patch_fun outchan + return_file ~via_http ~fname ~contype:"text/xml" ~patch_fun outchan )) (* TODO enc is not yet supported *) -let respond_dtd ?(enc = Enc_normal) ?(patch = true) ~url outchan = +let respond_dtd + ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url outchan + = let patch_fun = - if patch then Http_getter_common.patch_dtd else (fun x -> x) + if patch then Http_getter_common.patch_dtd ~via_http () else (fun x -> x) in if Sys.file_exists url then (* TODO check this: old getter here used text/xml *) - return_file ~fname:url ~contype:"text/plain" ~patch_fun outchan + return_file ~via_http ~fname:url ~contype:"text/plain" ~patch_fun outchan else raise (Dtd_not_found url)