X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2Fhttp_getter%2Fhttp_getter_cache.ml;h=3cc9e40546fb4e05ce2b0fec9d45da27740a7724;hb=1c7fb836e2af4f2f3d18afd0396701f2094265ff;hp=8d180e1f86a18c52eeda7adac40a614583487947;hpb=15fbff73f74c07183edcc9618312e129b0835dc6;p=helm.git diff --git a/helm/http_getter/http_getter_cache.ml b/helm/http_getter/http_getter_cache.ml index 8d180e1f8..3cc9e4054 100644 --- a/helm/http_getter/http_getter_cache.ml +++ b/helm/http_getter/http_getter_cache.ml @@ -1,5 +1,7 @@ (* - * Copyright (C) 2003, HELM Team. + * Copyright (C) 2003: + * Stefano Zacchiroli + * for the HELM Team http://helm.cs.unibo.it/ * * This file is part of HELM, an Hypertextual, Electronic * Library of Mathematics, developed at the Computer Science @@ -21,14 +23,32 @@ * MA 02111-1307, USA. * * For details, see the HELM World-Wide-Web page, - * http://cs.unibo.it/helm/. + * http://helm.cs.unibo.it/ *) +(* TODO cache expires control!!! *) +(* TODO uwobo loop: + if two large proof (already in cache) are requested at the same time by two + clients, uwobo (java implementation, not yet tested with the OCaml one) + starts looping sending output to one of the client *) + open Http_getter_common;; +open Http_getter_debugger;; open Http_getter_misc;; open Http_getter_types;; open Printf;; + (* expose ThreadSafe.threadSafe methods *) +class threadSafe = + object + inherit ThreadSafe.threadSafe + method virtual doCritical : 'a. 'a lazy_t -> 'a + method virtual doReader : 'a. 'a lazy_t -> 'a + method virtual doWriter : 'a. 'a lazy_t -> 'a + end +;; +let threadSafe = new threadSafe ;; + let resource_type_of_url = function | url when Pcre.pmatch ~pat:"\\.xml\\.gz$" url -> Enc_gzipped | url when Pcre.pmatch ~pat:"\\.xml$" url -> Enc_normal @@ -49,9 +69,12 @@ let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan = let extension = extension_of_resource_type resource_type in let downloadname = match http_getter_uri_of_string uri with (* parse uri *) - | Xml_uri (Cic baseuri) | Xml_uri (Theory baseuri) -> + | Cic_uri (Cic baseuri) | Cic_uri (Theory baseuri) -> + (* assumption: baseuri starts with "/" *) + sprintf "%s%s.%s" Http_getter_env.cic_dir baseuri extension + | Nuprl_uri baseuri -> (* assumption: baseuri starts with "/" *) - sprintf "%s%s.%s" Http_getter_env.xml_dir baseuri extension + sprintf "%s%s.%s" Http_getter_env.nuprl_dir baseuri extension | Rdf_uri (prefix, ((Cic baseuri) as qbaseuri)) | Rdf_uri (prefix, ((Theory baseuri) as qbaseuri)) -> let escaped_prefix = @@ -69,35 +92,93 @@ let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan = if patch then Http_getter_common.patch_xml else (fun x -> x) in let basename = Pcre.replace ~pat:"\\.gz$" downloadname in - if not (is_in_cache basename) then begin (* download and fill cache *) - mkdir ~parents:true (Filename.dirname downloadname); - wget ~output:downloadname url; - match (resource_type, Http_getter_env.cache_mode) with - | Enc_normal, Enc_normal -> - (if enc = Enc_gzipped then gzip ~keep:true downloadname) - | Enc_gzipped, Enc_gzipped -> - (if enc = Enc_normal then gunzip ~keep:true downloadname) - | Enc_normal, Enc_gzipped -> gzip ~keep:(enc = Enc_normal) downloadname - | Enc_gzipped, Enc_normal -> gunzip ~keep:(enc = Enc_gzipped) downloadname - end else begin (* resource already in cache *) + let contype = "text/xml" in + (* File cache if needed and return a short circuit file. + Short circuit is needed in situation like: + resource type = normal, cache type = gzipped, required encoding = normal + we would like to avoid normal -> gzipped -> normal conversions. To avoid + this tmp_short_circuit is used to remember the name of the intermediate + file name *) + let fill_cache () = + threadSafe#doWriter (lazy( + if not (is_in_cache basename) then begin (* cache MISS *) + debug_print "Cache MISS :-("; + mkdir ~parents:true (Filename.dirname downloadname); + match (resource_type, Http_getter_env.cache_mode) with + | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped -> + wget ~output:downloadname url; + None + | Enc_normal, Enc_gzipped -> (* resource normal, cache gzipped *) + let tmp = tempfile () in + wget ~output:tmp url; + gzip ~output:(basename ^ ".gz") ~keep:true tmp; (* fill cache *) + if enc = Enc_normal then (* user wants normal: don't delete it! *) + Some (tmp, enc) + else begin + Sys.remove tmp; + None + end + | Enc_gzipped, Enc_normal -> (* resource gzipped, cache normal *) + let tmp = tempfile () in + wget ~output:tmp url; + gunzip ~output:basename ~keep:true tmp; (* fill cache *) + if enc = Enc_gzipped then (* user wants gzipped: don't delete it! *) + Some (tmp, enc) + else begin + Sys.remove tmp; + None + end + end else begin + debug_print "Cache HIT :-)"; + None + end + )) in + let tmp_short_circuit = fill_cache () in + threadSafe#doReader (lazy( + assert (is_in_cache basename); match (enc, Http_getter_env.cache_mode) with - | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped -> () - | Enc_normal, Enc_gzipped -> gunzip ~keep:true (basename ^ ".gz") - | Enc_gzipped, Enc_normal -> gzip ~keep:true basename - end; (* now resource is in cache *) - (* invariant: file to be sent back to client is available on disk in the - format the client likes *) - (match enc with (* send file to client *) - | Enc_normal -> - return_file ~fname:basename ~contype:"text/xml" ~patch_fun outchan - | Enc_gzipped -> - return_file - ~fname:(basename ^ ".gz") ~contype:"text/xml" ~contenc:"x-gzip" - ~patch_fun outchan); - match (enc, Http_getter_env.cache_mode) with (* remove temp files *) - | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped -> () - | Enc_normal, Enc_gzipped -> Sys.remove basename - | Enc_gzipped, Enc_normal -> Sys.remove (basename ^ ".gz") + | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped -> + (* resource in cache is already in the required format *) + (match enc with + | Enc_normal -> + debug_print "No format mangling required (encoding = normal)"; + return_file ~fname:basename ~contype ~patch_fun outchan + | Enc_gzipped -> + debug_print "No format mangling required (encoding = gzipped)"; + return_file + ~fname:(basename ^ ".gz") ~contype ~contenc:"x-gzip" + ~patch_fun ~gunzip:true + outchan) + | Enc_normal, Enc_gzipped | Enc_gzipped, Enc_normal -> + (match tmp_short_circuit with + | None -> (* no short circuit possible, use cache *) + debug_print "No short circuit available, use cache"; + let tmp = tempfile () in + (match enc with + | Enc_normal -> + (* required format is normal, cached version is gzipped *) + gunzip (* gunzip to tmp *) + ~output:tmp ~keep:true (basename ^ ".gz"); + return_file ~fname:tmp ~contype ~patch_fun outchan; + | Enc_gzipped -> + (* required format is gzipped, cached version is normal *) + gzip ~output:tmp ~keep:true basename; (* gzip to tmp *) + return_file + ~fname:tmp ~contype ~contenc:"x-gzip" + ~patch_fun ~gunzip:true + outchan); + Sys.remove tmp + | Some (fname, Enc_normal) -> (* short circuit available, use it! *) + debug_print "Using short circuit (encoding = normal)"; + return_file ~fname ~contype ~patch_fun outchan; + Sys.remove fname + | Some (fname, Enc_gzipped) -> (* short circuit available, use it! *) + debug_print "Using short circuit (encoding = gzipped)"; + return_file + ~fname ~contype ~contenc:"x-gzip" ~patch_fun ~gunzip:true outchan; + Sys.remove fname) + )) +;; (* TODO enc is not yet supported *) let respond_xsl ?(enc = Enc_normal) ?(patch = true) ~url outchan = @@ -108,6 +189,7 @@ let respond_xsl ?(enc = Enc_normal) ?(patch = true) ~url outchan = wget ~output:fname url; return_file ~fname ~contype:"text/xml" ~patch_fun outchan; Sys.remove fname +;; (* TODO enc is not yet supported *) let respond_dtd ?(enc = Enc_normal) ?(patch = true) ~url outchan =