(* * Copyright (C) 2003-2004: * Stefano Zacchiroli * for the HELM Team http://helm.cs.unibo.it/ * * This file is part of HELM, an Hypertextual, Electronic * Library of Mathematics, developed at the Computer Science * Department, University of Bologna, Italy. * * HELM is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * HELM is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HELM; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, * MA 02111-1307, USA. * * For details, see the HELM World-Wide-Web page, * http://helm.cs.unibo.it/ *) (* TODO cache expires control!!! *) (* TODO uwobo loop: if two large proof (already in cache) are requested at the same time by two clients, uwobo (java implementation, not yet tested with the OCaml one) starts looping sending output to one of the client *) open Http_getter_common;; open Http_getter_debugger;; open Http_getter_misc;; open Http_getter_types;; open Printf;; (* expose ThreadSafe.threadSafe methods *) class threadSafe = object inherit ThreadSafe.threadSafe method virtual doCritical : 'a. 'a lazy_t -> 'a method virtual doReader : 'a. 'a lazy_t -> 'a method virtual doWriter : 'a. 'a lazy_t -> 'a end ;; let threadSafe = new threadSafe ;; let resource_type_of_url = function | url when Pcre.pmatch ~pat:"\\.xml\\.gz$" url -> Enc_gzipped | url when Pcre.pmatch ~pat:"\\.xml$" url -> Enc_normal | url -> raise (Invalid_URL url) let extension_of_resource_type = function | Enc_normal -> "xml" | Enc_gzipped -> "xml.gz" (* basename = resource name without trailing ".gz", if any *) let is_in_cache basename = Sys.file_exists (match Lazy.force Http_getter_env.cache_mode with | Enc_normal -> basename | Enc_gzipped -> basename ^ ".gz") let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan = let resource_type = resource_type_of_url url in let extension = extension_of_resource_type resource_type in let downloadname = match uri_of_string uri with (* parse uri *) | Cic_uri (Cic baseuri) | Cic_uri (Theory baseuri) -> (* assumption: baseuri starts with "/" *) sprintf "%s%s.%s" (Lazy.force Http_getter_env.cic_dir) baseuri extension | Nuprl_uri baseuri -> (* assumption: baseuri starts with "/" *) sprintf "%s%s.%s" (Lazy.force Http_getter_env.nuprl_dir) baseuri extension | Rdf_uri (prefix, ((Cic baseuri) as qbaseuri)) | Rdf_uri (prefix, ((Theory baseuri) as qbaseuri)) -> let escaped_prefix = (Pcre.replace ~pat:"/" ~templ:"_" (Pcre.replace ~pat:"_" ~templ:"__" (prefix ^ (match qbaseuri with | Cic _ -> "//cic:" | Theory _ -> "//theory:")))) in sprintf "%s/%s%s.%s" (Lazy.force Http_getter_env.rdf_dir) escaped_prefix baseuri extension in let patch_fun = if patch then Http_getter_common.patch_xml else (fun x -> x) in let basename = Pcre.replace ~pat:"\\.gz$" downloadname in let contype = "text/xml" in (* Fill cache if needed and return a short circuit file. Short circuit is needed in situations like: resource type = normal, cache type = gzipped, required encoding = normal we would like to avoid normal -> gzipped -> normal conversions. To avoid this tmp_short_circuit is used to remember the name of the intermediate file name *) let fill_cache () = threadSafe#doWriter (lazy( if not (is_in_cache basename) then begin (* cache MISS *) debug_print "Cache MISS :-("; mkdir ~parents:true (Filename.dirname downloadname); match (resource_type, Lazy.force Http_getter_env.cache_mode) with | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped -> wget ~output:downloadname url; None | Enc_normal, Enc_gzipped -> (* resource normal, cache gzipped *) let tmp = tempfile () in wget ~output:tmp url; gzip ~output:(basename ^ ".gz") ~keep:true tmp; (* fill cache *) if enc = Enc_normal then (* user wants normal: don't delete it! *) Some (tmp, enc) else begin Sys.remove tmp; None end | Enc_gzipped, Enc_normal -> (* resource gzipped, cache normal *) let tmp = tempfile () in wget ~output:tmp url; gunzip ~output:basename ~keep:true tmp; (* fill cache *) if enc = Enc_gzipped then (* user wants gzipped: don't delete it! *) Some (tmp, enc) else begin Sys.remove tmp; None end end else begin debug_print "Cache HIT :-)"; None end )) in let tmp_short_circuit = fill_cache () in threadSafe#doReader (lazy( assert (is_in_cache basename); match (enc, Lazy.force Http_getter_env.cache_mode) with | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped -> (* resource in cache is already in the required format *) (match enc with | Enc_normal -> debug_print "No format mangling required (encoding = normal)"; return_file ~fname:basename ~contype ~patch_fun outchan | Enc_gzipped -> debug_print "No format mangling required (encoding = gzipped)"; return_file ~fname:(basename ^ ".gz") ~contype ~contenc:"x-gzip" ~patch_fun ~gunzip:true outchan) | Enc_normal, Enc_gzipped | Enc_gzipped, Enc_normal -> (match tmp_short_circuit with | None -> (* no short circuit possible, use cache *) debug_print "No short circuit available, use cache"; let tmp = tempfile () in (match enc with | Enc_normal -> (* required format is normal, cached version is gzipped *) gunzip (* gunzip to tmp *) ~output:tmp ~keep:true (basename ^ ".gz"); return_file ~fname:tmp ~contype ~patch_fun outchan; | Enc_gzipped -> (* required format is gzipped, cached version is normal *) gzip ~output:tmp ~keep:true basename; (* gzip to tmp *) return_file ~fname:tmp ~contype ~contenc:"x-gzip" ~patch_fun ~gunzip:true outchan); Sys.remove tmp | Some (fname, Enc_normal) -> (* short circuit available, use it! *) debug_print "Using short circuit (encoding = normal)"; return_file ~fname ~contype ~patch_fun outchan; Sys.remove fname | Some (fname, Enc_gzipped) -> (* short circuit available, use it! *) debug_print "Using short circuit (encoding = gzipped)"; return_file ~fname ~contype ~contenc:"x-gzip" ~patch_fun ~gunzip:true outchan; Sys.remove fname) )) ;; (* TODO enc is not yet supported *) let respond_xsl ?(enc = Enc_normal) ?(patch = true) ~url outchan = let patch_fun = if patch then Http_getter_common.patch_xsl else (fun x -> x) in let fname = tempfile () in wget ~output:fname url; return_file ~fname ~contype:"text/xml" ~patch_fun outchan; Sys.remove fname ;; (* TODO enc is not yet supported *) let respond_dtd ?(enc = Enc_normal) ?(patch = true) ~url outchan = let patch_fun = if patch then Http_getter_common.patch_dtd else (fun x -> x) in if Sys.file_exists url then (* TODO check this: old getter here used text/xml *) return_file ~fname:url ~contype:"text/plain" ~patch_fun outchan else return_html_error ("Can't find DTD: " ^ url) outchan ;; let clean () = let module E = Http_getter_env in List.iter (function dir -> ignore (Unix.system ("rm -rf " ^ dir ^ "/*"))) [ Lazy.force E.cic_dir; Lazy.force E.nuprl_dir; Lazy.force E.rdf_dir ] ;;