X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2Focaml%2Fgetter%2Fhttp_getter_cache.ml;h=144b9ac5f4c5ad99f9ebad8fb528a4977878a3f0;hb=3fd8583f198e7e09995c3a65b5f05a853c8d1646;hp=a3f91220eea13957d40b377ed0dead0f9925e13f;hpb=384c369d4cbf5dd6cf1013902d3a218260400e73;p=helm.git diff --git a/helm/ocaml/getter/http_getter_cache.ml b/helm/ocaml/getter/http_getter_cache.ml index a3f91220e..144b9ac5f 100644 --- a/helm/ocaml/getter/http_getter_cache.ml +++ b/helm/ocaml/getter/http_getter_cache.ml @@ -32,11 +32,10 @@ clients, uwobo (java implementation, not yet tested with the OCaml one) starts looping sending output to one of the client *) -open Http_getter_common;; -open Http_getter_debugger;; -open Http_getter_misc;; -open Http_getter_types;; -open Printf;; +open Http_getter_common +open Http_getter_misc +open Http_getter_types +open Printf (* expose ThreadSafe.threadSafe methods *) class threadSafe = @@ -46,25 +45,37 @@ class threadSafe = method virtual doReader : 'a. 'a lazy_t -> 'a method virtual doWriter : 'a. 'a lazy_t -> 'a end -;; -let threadSafe = new threadSafe ;; + +let threadSafe = new threadSafe + +let finally cleanup f = + try + let res = Lazy.force f in + cleanup (); + res + with e -> + cleanup (); + raise (Http_getter_types.Cache_failure (Printexc.to_string e)) let resource_type_of_url = function - | url when Pcre.pmatch ~pat:"\\.xml\\.gz$" url -> Enc_gzipped - | url when Pcre.pmatch ~pat:"\\.xml$" url -> Enc_normal + | url when Pcre.pmatch ~pat:"\\.xml\\.gz$" url -> `Gzipped + | url when Pcre.pmatch ~pat:"\\.xml$" url -> `Normal | url -> raise (Invalid_URL url) + let extension_of_resource_type = function - | Enc_normal -> "xml" - | Enc_gzipped -> "xml.gz" + | `Normal -> "xml" + | `Gzipped -> "xml.gz" (* basename = resource name without trailing ".gz", if any *) let is_in_cache basename = Sys.file_exists (match Lazy.force Http_getter_env.cache_mode with - | Enc_normal -> basename - | Enc_gzipped -> basename ^ ".gz") + | `Normal -> basename + | `Gzipped -> basename ^ ".gz") -let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan = +let respond_xml + ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url ~uri outchan + = let resource_type = resource_type_of_url url in let extension = extension_of_resource_type resource_type in let downloadname = @@ -74,7 +85,8 @@ let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan = sprintf "%s%s.%s" (Lazy.force Http_getter_env.cic_dir) baseuri extension | Nuprl_uri baseuri -> (* assumption: baseuri starts with "/" *) - sprintf "%s%s.%s" (Lazy.force Http_getter_env.nuprl_dir) baseuri extension + sprintf "%s%s.%s" (Lazy.force Http_getter_env.nuprl_dir) baseuri + extension | Rdf_uri (prefix, ((Cic baseuri) as qbaseuri)) | Rdf_uri (prefix, ((Theory baseuri) as qbaseuri)) -> let escaped_prefix = @@ -89,7 +101,16 @@ let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan = (Lazy.force Http_getter_env.rdf_dir) escaped_prefix baseuri extension in let patch_fun = - if patch then Http_getter_common.patch_xml else (fun x -> x) + let xmlbases = + if Http_getter_common.is_theory_uri uri then + Some (Filename.dirname uri, Filename.dirname url) + else + None + in + if patch then + Http_getter_common.patch_xml ?xmlbases ~via_http () + else + (fun x -> x) in let basename = Pcre.replace ~pat:"\\.gz$" downloadname in let contype = "text/xml" in @@ -102,34 +123,40 @@ let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan = let fill_cache () = threadSafe#doWriter (lazy( if not (is_in_cache basename) then begin (* cache MISS *) - debug_print "Cache MISS :-("; + Http_getter_logger.log ~level:2 "Cache MISS :-("; mkdir ~parents:true (Filename.dirname downloadname); match (resource_type, Lazy.force Http_getter_env.cache_mode) with - | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped -> + | `Normal, `Normal | `Gzipped, `Gzipped -> wget ~output:downloadname url; None - | Enc_normal, Enc_gzipped -> (* resource normal, cache gzipped *) + | `Normal, `Gzipped -> (* resource normal, cache gzipped *) let tmp = tempfile () in - wget ~output:tmp url; - gzip ~output:(basename ^ ".gz") ~keep:true tmp; (* fill cache *) - if enc = Enc_normal then (* user wants normal: don't delete it! *) - Some (tmp, enc) - else begin - Sys.remove tmp; - None - end - | Enc_gzipped, Enc_normal -> (* resource gzipped, cache normal *) + let (res, cleanup) = + if enc = `Normal then (* user wants normal: don't delete it! *) + (Some (tmp, enc), (fun () -> ())) + else + (None, (fun () -> Sys.remove tmp)) + in + finally cleanup (lazy ( + wget ~output:tmp url; + gzip ~output:(basename ^ ".gz") ~keep:true tmp; (* fill cache *) + res + )); + | `Gzipped, `Normal -> (* resource gzipped, cache normal *) let tmp = tempfile () in - wget ~output:tmp url; - gunzip ~output:basename ~keep:true tmp; (* fill cache *) - if enc = Enc_gzipped then (* user wants gzipped: don't delete it! *) - Some (tmp, enc) - else begin - Sys.remove tmp; - None - end + let (res, cleanup) = + if enc = `Gzipped then (* user wants .gz: don't delete it! *) + (Some (tmp, enc), (fun () -> ())) + else + (None, (fun () -> Sys.remove tmp)) + in + finally cleanup (lazy ( + wget ~output:tmp url; + gunzip ~output:basename ~keep:true tmp; (* fill cache *) + res + )); end else begin - debug_print "Cache HIT :-)"; + Http_getter_logger.log ~level:2 "Cache HIT :-)"; None end )) in @@ -137,75 +164,85 @@ let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan = threadSafe#doReader (lazy( assert (is_in_cache basename); match (enc, Lazy.force Http_getter_env.cache_mode) with - | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped -> + | `Normal, `Normal | `Gzipped, `Gzipped -> (* resource in cache is already in the required format *) (match enc with - | Enc_normal -> - debug_print "No format mangling required (encoding = normal)"; - return_file ~fname:basename ~contype ~patch_fun outchan - | Enc_gzipped -> - debug_print "No format mangling required (encoding = gzipped)"; + | `Normal -> + Http_getter_logger.log ~level:2 + "No format mangling required (encoding = normal)"; + return_file ~via_http ~fname:basename ~contype ~patch_fun outchan + | `Gzipped -> + Http_getter_logger.log ~level:2 + "No format mangling required (encoding = gzipped)"; return_file - ~fname:(basename ^ ".gz") ~contype ~contenc:"x-gzip" + ~via_http ~fname:(basename ^ ".gz") ~contype ~contenc:"x-gzip" ~patch_fun ~gunzip:true outchan) - | Enc_normal, Enc_gzipped | Enc_gzipped, Enc_normal -> + | `Normal, `Gzipped | `Gzipped, `Normal -> (match tmp_short_circuit with | None -> (* no short circuit possible, use cache *) - debug_print "No short circuit available, use cache"; + Http_getter_logger.log ~level:2 + "No short circuit available, use cache"; let tmp = tempfile () in - (match enc with - | Enc_normal -> - (* required format is normal, cached version is gzipped *) - gunzip (* gunzip to tmp *) - ~output:tmp ~keep:true (basename ^ ".gz"); - return_file ~fname:tmp ~contype ~patch_fun outchan; - | Enc_gzipped -> - (* required format is gzipped, cached version is normal *) - gzip ~output:tmp ~keep:true basename; (* gzip to tmp *) - return_file - ~fname:tmp ~contype ~contenc:"x-gzip" - ~patch_fun ~gunzip:true - outchan); - Sys.remove tmp - | Some (fname, Enc_normal) -> (* short circuit available, use it! *) - debug_print "Using short circuit (encoding = normal)"; - return_file ~fname ~contype ~patch_fun outchan; - Sys.remove fname - | Some (fname, Enc_gzipped) -> (* short circuit available, use it! *) - debug_print "Using short circuit (encoding = gzipped)"; - return_file - ~fname ~contype ~contenc:"x-gzip" ~patch_fun ~gunzip:true outchan; - Sys.remove fname) + finally (fun () -> Sys.remove tmp) (lazy ( + (match enc with + | `Normal -> + (* required format is normal, cached version is gzipped *) + gunzip (* gunzip to tmp *) + ~output:tmp ~keep:true (basename ^ ".gz"); + return_file ~via_http ~fname:tmp ~contype ~patch_fun outchan; + | `Gzipped -> + (* required format is gzipped, cached version is normal *) + gzip ~output:tmp ~keep:true basename; (* gzip to tmp *) + return_file + ~via_http ~fname:tmp ~contype ~contenc:"x-gzip" + ~patch_fun ~gunzip:true + outchan) + )) + | Some (fname, `Normal) -> (* short circuit available, use it! *) + Http_getter_logger.log ~level:2 + "Using short circuit (encoding = normal)"; + finally (fun () -> Sys.remove fname) (lazy ( + return_file ~via_http ~fname ~contype ~patch_fun outchan + )) + | Some (fname, `Gzipped) -> (* short circuit available, use it! *) + Http_getter_logger.log ~level:2 + "Using short circuit (encoding = gzipped)"; + finally (fun () -> Sys.remove fname) (lazy ( + return_file ~via_http ~fname ~contype ~contenc:"x-gzip" ~patch_fun + ~gunzip:true outchan + ))) )) -;; (* TODO enc is not yet supported *) -let respond_xsl ?(enc = Enc_normal) ?(patch = true) ~url outchan = +let respond_xsl + ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url outchan + = let patch_fun = - if patch then Http_getter_common.patch_xsl else (fun x -> x) + if patch then Http_getter_common.patch_xsl ~via_http () else (fun x -> x) in let fname = tempfile () in - wget ~output:fname url; - return_file ~fname ~contype:"text/xml" ~patch_fun outchan; - Sys.remove fname -;; + finally (fun () -> Sys.remove fname) (lazy ( + wget ~output:fname url; + return_file ~via_http ~fname ~contype:"text/xml" ~patch_fun outchan + )) (* TODO enc is not yet supported *) -let respond_dtd ?(enc = Enc_normal) ?(patch = true) ~url outchan = +let respond_dtd + ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url outchan + = let patch_fun = - if patch then Http_getter_common.patch_dtd else (fun x -> x) + if patch then Http_getter_common.patch_dtd ~via_http () else (fun x -> x) in if Sys.file_exists url then (* TODO check this: old getter here used text/xml *) - return_file ~fname:url ~contype:"text/plain" ~patch_fun outchan + return_file ~via_http ~fname:url ~contype:"text/plain" ~patch_fun outchan else - return_html_error ("Can't find DTD: " ^ url) outchan -;; + raise (Dtd_not_found url) let clean () = let module E = Http_getter_env in List.iter (function dir -> ignore (Unix.system ("rm -rf " ^ dir ^ "/*"))) [ Lazy.force E.cic_dir; Lazy.force E.nuprl_dir; Lazy.force E.rdf_dir ] -;; +