+++ /dev/null
-(*
- * Copyright (C) 2003-2004:
- * Stefano Zacchiroli <zack@cs.unibo.it>
- * for the HELM Team http://helm.cs.unibo.it/
- *
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://helm.cs.unibo.it/
- *)
-
-(* TODO cache expires control!!! *)
-(* TODO uwobo loop:
- if two large proof (already in cache) are requested at the same time by two
- clients, uwobo (java implementation, not yet tested with the OCaml one)
- starts looping sending output to one of the client *)
-
-open Http_getter_common
-open Http_getter_misc
-open Http_getter_types
-open Printf
-
- (* expose ThreadSafe.threadSafe methods *)
-class threadSafe =
- object
- inherit ThreadSafe.threadSafe
- method virtual doCritical : 'a. 'a lazy_t -> 'a
- method virtual doReader : 'a. 'a lazy_t -> 'a
- method virtual doWriter : 'a. 'a lazy_t -> 'a
- end
-
-let threadSafe = new threadSafe
-
-let finally cleanup f =
- try
- let res = Lazy.force f in
- cleanup ();
- res
- with e ->
- cleanup ();
- raise (Http_getter_types.Cache_failure (Printexc.to_string e))
-
-let resource_type_of_url = function
- | url when Pcre.pmatch ~pat:"\\.xml\\.gz$" url -> `Gzipped
- | url when Pcre.pmatch ~pat:"\\.xml$" url -> `Normal
- | url -> raise (Invalid_URL url)
-
-let extension_of_resource_type = function
- | `Normal -> "xml"
- | `Gzipped -> "xml.gz"
-
- (* basename = resource name without trailing ".gz", if any *)
-let is_in_cache basename =
- Sys.file_exists
- (match Lazy.force Http_getter_env.cache_mode with
- | `Normal -> basename
- | `Gzipped -> basename ^ ".gz")
-
-let respond_xml
- ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url ~uri outchan
- =
- let resource_type = resource_type_of_url url in
- let extension = extension_of_resource_type resource_type in
- let downloadname =
- match uri_of_string uri with (* parse uri *)
- | Cic_uri (Cic baseuri) | Cic_uri (Theory baseuri) ->
- (* assumption: baseuri starts with "/" *)
- sprintf "%s%s.%s" (Lazy.force Http_getter_env.cic_dir) baseuri extension
- | Nuprl_uri baseuri ->
- (* assumption: baseuri starts with "/" *)
- sprintf "%s%s.%s" (Lazy.force Http_getter_env.nuprl_dir) baseuri
- extension
- | Rdf_uri (prefix, ((Cic baseuri) as qbaseuri))
- | Rdf_uri (prefix, ((Theory baseuri) as qbaseuri)) ->
- let escaped_prefix =
- (Pcre.replace ~pat:"/" ~templ:"_"
- (Pcre.replace ~pat:"_" ~templ:"__"
- (prefix ^
- (match qbaseuri with
- | Cic _ -> "//cic:"
- | Theory _ -> "//theory:"))))
- in
- sprintf "%s/%s%s.%s"
- (Lazy.force Http_getter_env.rdf_dir) escaped_prefix baseuri extension
- in
- let patch_fun =
- let xmlbases =
- if Http_getter_common.is_theory_uri uri then
- Some (Filename.dirname uri, Filename.dirname url)
- else
- None
- in
- if patch then
- Http_getter_common.patch_xml ?xmlbases ~via_http ()
- else
- (fun x -> x)
- in
- let basename = Pcre.replace ~pat:"\\.gz$" downloadname in
- let contype = "text/xml" in
- (* Fill cache if needed and return a short circuit file.
- Short circuit is needed in situations like:
- resource type = normal, cache type = gzipped, required encoding = normal
- we would like to avoid normal -> gzipped -> normal conversions. To avoid
- this tmp_short_circuit is used to remember the name of the intermediate
- file name *)
- let fill_cache () =
- threadSafe#doWriter (lazy(
- if not (is_in_cache basename) then begin (* cache MISS *)
- Http_getter_logger.log ~level:2 "Cache MISS :-(";
- mkdir ~parents:true (Filename.dirname downloadname);
- match (resource_type, Lazy.force Http_getter_env.cache_mode) with
- | `Normal, `Normal | `Gzipped, `Gzipped ->
- wget ~output:downloadname url;
- None
- | `Normal, `Gzipped -> (* resource normal, cache gzipped *)
- let tmp = tempfile () in
- let (res, cleanup) =
- if enc = `Normal then (* user wants normal: don't delete it! *)
- (Some (tmp, enc), (fun () -> ()))
- else
- (None, (fun () -> Sys.remove tmp))
- in
- finally cleanup (lazy (
- wget ~output:tmp url;
- gzip ~output:(basename ^ ".gz") ~keep:true tmp; (* fill cache *)
- res
- ));
- | `Gzipped, `Normal -> (* resource gzipped, cache normal *)
- let tmp = tempfile () in
- let (res, cleanup) =
- if enc = `Gzipped then (* user wants .gz: don't delete it! *)
- (Some (tmp, enc), (fun () -> ()))
- else
- (None, (fun () -> Sys.remove tmp))
- in
- finally cleanup (lazy (
- wget ~output:tmp url;
- gunzip ~output:basename ~keep:true tmp; (* fill cache *)
- res
- ));
- end else begin
- Http_getter_logger.log ~level:2 "Cache HIT :-)";
- None
- end
- )) in
- let tmp_short_circuit = fill_cache () in
- threadSafe#doReader (lazy(
- assert (is_in_cache basename);
- match (enc, Lazy.force Http_getter_env.cache_mode) with
- | `Normal, `Normal | `Gzipped, `Gzipped ->
- (* resource in cache is already in the required format *)
- (match enc with
- | `Normal ->
- Http_getter_logger.log ~level:2
- "No format mangling required (encoding = normal)";
- return_file ~via_http ~fname:basename ~contype ~patch_fun outchan
- | `Gzipped ->
- Http_getter_logger.log ~level:2
- "No format mangling required (encoding = gzipped)";
- return_file
- ~via_http ~fname:(basename ^ ".gz") ~contype ~contenc:"x-gzip"
- ~patch_fun ~gunzip:true
- outchan)
- | `Normal, `Gzipped | `Gzipped, `Normal ->
- (match tmp_short_circuit with
- | None -> (* no short circuit possible, use cache *)
- Http_getter_logger.log ~level:2
- "No short circuit available, use cache";
- let tmp = tempfile () in
- finally (fun () -> Sys.remove tmp) (lazy (
- (match enc with
- | `Normal ->
- (* required format is normal, cached version is gzipped *)
- gunzip (* gunzip to tmp *)
- ~output:tmp ~keep:true (basename ^ ".gz");
- return_file ~via_http ~fname:tmp ~contype ~patch_fun outchan;
- | `Gzipped ->
- (* required format is gzipped, cached version is normal *)
- gzip ~output:tmp ~keep:true basename; (* gzip to tmp *)
- return_file
- ~via_http ~fname:tmp ~contype ~contenc:"x-gzip"
- ~patch_fun ~gunzip:true
- outchan)
- ))
- | Some (fname, `Normal) -> (* short circuit available, use it! *)
- Http_getter_logger.log ~level:2
- "Using short circuit (encoding = normal)";
- finally (fun () -> Sys.remove fname) (lazy (
- return_file ~via_http ~fname ~contype ~patch_fun outchan
- ))
- | Some (fname, `Gzipped) -> (* short circuit available, use it! *)
- Http_getter_logger.log ~level:2
- "Using short circuit (encoding = gzipped)";
- finally (fun () -> Sys.remove fname) (lazy (
- return_file ~via_http ~fname ~contype ~contenc:"x-gzip" ~patch_fun
- ~gunzip:true outchan
- )))
- ))
-
- (* TODO enc is not yet supported *)
-let respond_xsl
- ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url outchan
- =
- let patch_fun =
- if patch then Http_getter_common.patch_xsl ~via_http () else (fun x -> x)
- in
- let fname = tempfile () in
- finally (fun () -> Sys.remove fname) (lazy (
- wget ~output:fname url;
- return_file ~via_http ~fname ~contype:"text/xml" ~patch_fun outchan
- ))
-
- (* TODO enc is not yet supported *)
-let respond_dtd
- ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url outchan
- =
- let patch_fun =
- if patch then Http_getter_common.patch_dtd ~via_http () else (fun x -> x)
- in
- if Sys.file_exists url then
- (* TODO check this: old getter here used text/xml *)
- return_file ~via_http ~fname:url ~contype:"text/plain" ~patch_fun outchan
- else
- raise (Dtd_not_found url)
-
-let clean () =
- let module E = Http_getter_env in
- List.iter
- (function dir -> ignore (Unix.system ("rm -rf " ^ dir ^ "/*")))
- [ Lazy.force E.cic_dir; Lazy.force E.nuprl_dir; Lazy.force E.rdf_dir ]
-