2 * Copyright (C) 2003-2004:
3 * Stefano Zacchiroli <zack@cs.unibo.it>
4 * for the HELM Team http://helm.cs.unibo.it/
6 * This file is part of HELM, an Hypertextual, Electronic
7 * Library of Mathematics, developed at the Computer Science
8 * Department, University of Bologna, Italy.
10 * HELM is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version 2
13 * of the License, or (at your option) any later version.
15 * HELM is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with HELM; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 * For details, see the HELM World-Wide-Web page,
26 * http://helm.cs.unibo.it/
29 (* TODO cache expires control!!! *)
31 if two large proof (already in cache) are requested at the same time by two
32 clients, uwobo (java implementation, not yet tested with the OCaml one)
33 starts looping sending output to one of the client *)
35 open Http_getter_common;;
36 open Http_getter_debugger;;
37 open Http_getter_misc;;
38 open Http_getter_types;;
41 (* expose ThreadSafe.threadSafe methods *)
44 inherit ThreadSafe.threadSafe
45 method virtual doCritical : 'a. 'a lazy_t -> 'a
46 method virtual doReader : 'a. 'a lazy_t -> 'a
47 method virtual doWriter : 'a. 'a lazy_t -> 'a
50 let threadSafe = new threadSafe
52 let finally cleanup f =
54 let res = Lazy.force f in
59 raise (Http_getter_types.Cache_failure (Printexc.to_string e))
61 let resource_type_of_url = function
62 | url when Pcre.pmatch ~pat:"\\.xml\\.gz$" url -> `Gzipped
63 | url when Pcre.pmatch ~pat:"\\.xml$" url -> `Normal
64 | url -> raise (Invalid_URL url)
66 let extension_of_resource_type = function
68 | `Gzipped -> "xml.gz"
70 (* basename = resource name without trailing ".gz", if any *)
71 let is_in_cache basename =
73 (match Lazy.force Http_getter_env.cache_mode with
75 | `Gzipped -> basename ^ ".gz")
78 ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url ~uri outchan
80 let resource_type = resource_type_of_url url in
81 let extension = extension_of_resource_type resource_type in
83 match uri_of_string uri with (* parse uri *)
84 | Cic_uri (Cic baseuri) | Cic_uri (Theory baseuri) ->
85 (* assumption: baseuri starts with "/" *)
86 sprintf "%s%s.%s" (Lazy.force Http_getter_env.cic_dir) baseuri extension
87 | Nuprl_uri baseuri ->
88 (* assumption: baseuri starts with "/" *)
89 sprintf "%s%s.%s" (Lazy.force Http_getter_env.nuprl_dir) baseuri
91 | Rdf_uri (prefix, ((Cic baseuri) as qbaseuri))
92 | Rdf_uri (prefix, ((Theory baseuri) as qbaseuri)) ->
94 (Pcre.replace ~pat:"/" ~templ:"_"
95 (Pcre.replace ~pat:"_" ~templ:"__"
99 | Theory _ -> "//theory:"))))
102 (Lazy.force Http_getter_env.rdf_dir) escaped_prefix baseuri extension
105 if patch then Http_getter_common.patch_xml ~via_http () else (fun x -> x)
107 let basename = Pcre.replace ~pat:"\\.gz$" downloadname in
108 let contype = "text/xml" in
109 (* Fill cache if needed and return a short circuit file.
110 Short circuit is needed in situations like:
111 resource type = normal, cache type = gzipped, required encoding = normal
112 we would like to avoid normal -> gzipped -> normal conversions. To avoid
113 this tmp_short_circuit is used to remember the name of the intermediate
116 threadSafe#doWriter (lazy(
117 if not (is_in_cache basename) then begin (* cache MISS *)
118 debug_print "Cache MISS :-(";
119 mkdir ~parents:true (Filename.dirname downloadname);
120 match (resource_type, Lazy.force Http_getter_env.cache_mode) with
121 | `Normal, `Normal | `Gzipped, `Gzipped ->
122 wget ~output:downloadname url;
124 | `Normal, `Gzipped -> (* resource normal, cache gzipped *)
125 let tmp = tempfile () in
127 if enc = `Normal then (* user wants normal: don't delete it! *)
128 (Some (tmp, enc), (fun () -> ()))
130 (None, (fun () -> Sys.remove tmp))
132 finally cleanup (lazy (
133 wget ~output:tmp url;
134 gzip ~output:(basename ^ ".gz") ~keep:true tmp; (* fill cache *)
137 | `Gzipped, `Normal -> (* resource gzipped, cache normal *)
138 let tmp = tempfile () in
140 if enc = `Gzipped then (* user wants .gz: don't delete it! *)
141 (Some (tmp, enc), (fun () -> ()))
143 (None, (fun () -> Sys.remove tmp))
145 finally cleanup (lazy (
146 wget ~output:tmp url;
147 gunzip ~output:basename ~keep:true tmp; (* fill cache *)
151 debug_print "Cache HIT :-)";
155 let tmp_short_circuit = fill_cache () in
156 threadSafe#doReader (lazy(
157 assert (is_in_cache basename);
158 match (enc, Lazy.force Http_getter_env.cache_mode) with
159 | `Normal, `Normal | `Gzipped, `Gzipped ->
160 (* resource in cache is already in the required format *)
163 debug_print "No format mangling required (encoding = normal)";
164 return_file ~via_http ~fname:basename ~contype ~patch_fun outchan
166 debug_print "No format mangling required (encoding = gzipped)";
168 ~via_http ~fname:(basename ^ ".gz") ~contype ~contenc:"x-gzip"
169 ~patch_fun ~gunzip:true
171 | `Normal, `Gzipped | `Gzipped, `Normal ->
172 (match tmp_short_circuit with
173 | None -> (* no short circuit possible, use cache *)
174 debug_print "No short circuit available, use cache";
175 let tmp = tempfile () in
176 finally (fun () -> Sys.remove tmp) (lazy (
179 (* required format is normal, cached version is gzipped *)
180 gunzip (* gunzip to tmp *)
181 ~output:tmp ~keep:true (basename ^ ".gz");
182 return_file ~via_http ~fname:tmp ~contype ~patch_fun outchan;
184 (* required format is gzipped, cached version is normal *)
185 gzip ~output:tmp ~keep:true basename; (* gzip to tmp *)
187 ~via_http ~fname:tmp ~contype ~contenc:"x-gzip"
188 ~patch_fun ~gunzip:true
191 | Some (fname, `Normal) -> (* short circuit available, use it! *)
192 debug_print "Using short circuit (encoding = normal)";
193 finally (fun () -> Sys.remove fname) (lazy (
194 return_file ~via_http ~fname ~contype ~patch_fun outchan
196 | Some (fname, `Gzipped) -> (* short circuit available, use it! *)
197 debug_print "Using short circuit (encoding = gzipped)";
198 finally (fun () -> Sys.remove fname) (lazy (
199 return_file ~via_http ~fname ~contype ~contenc:"x-gzip" ~patch_fun
204 (* TODO enc is not yet supported *)
206 ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url outchan
209 if patch then Http_getter_common.patch_xsl ~via_http () else (fun x -> x)
211 let fname = tempfile () in
212 finally (fun () -> Sys.remove fname) (lazy (
213 wget ~output:fname url;
214 return_file ~via_http ~fname ~contype:"text/xml" ~patch_fun outchan
217 (* TODO enc is not yet supported *)
219 ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url outchan
222 if patch then Http_getter_common.patch_dtd ~via_http () else (fun x -> x)
224 if Sys.file_exists url then
225 (* TODO check this: old getter here used text/xml *)
226 return_file ~via_http ~fname:url ~contype:"text/plain" ~patch_fun outchan
228 raise (Dtd_not_found url)
231 let module E = Http_getter_env in
233 (function dir -> ignore (Unix.system ("rm -rf " ^ dir ^ "/*")))
234 [ Lazy.force E.cic_dir; Lazy.force E.nuprl_dir; Lazy.force E.rdf_dir ]