2 * Copyright (C) 2003-2004:
3 * Stefano Zacchiroli <zack@cs.unibo.it>
4 * for the HELM Team http://helm.cs.unibo.it/
6 * This file is part of HELM, an Hypertextual, Electronic
7 * Library of Mathematics, developed at the Computer Science
8 * Department, University of Bologna, Italy.
10 * HELM is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version 2
13 * of the License, or (at your option) any later version.
15 * HELM is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with HELM; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 * For details, see the HELM World-Wide-Web page,
26 * http://helm.cs.unibo.it/
29 (* TODO cache expires control!!! *)
31 if two large proof (already in cache) are requested at the same time by two
32 clients, uwobo (java implementation, not yet tested with the OCaml one)
33 starts looping sending output to one of the client *)
35 open Http_getter_common
37 open Http_getter_types
40 (* expose ThreadSafe.threadSafe methods *)
43 inherit ThreadSafe.threadSafe
44 method virtual doCritical : 'a. 'a lazy_t -> 'a
45 method virtual doReader : 'a. 'a lazy_t -> 'a
46 method virtual doWriter : 'a. 'a lazy_t -> 'a
49 let threadSafe = new threadSafe
51 let finally cleanup f =
53 let res = Lazy.force f in
58 raise (Http_getter_types.Cache_failure (Printexc.to_string e))
60 let resource_type_of_url = function
61 | url when Pcre.pmatch ~pat:"\\.xml\\.gz$" url -> `Gzipped
62 | url when Pcre.pmatch ~pat:"\\.xml$" url -> `Normal
63 | url -> raise (Invalid_URL url)
65 let extension_of_resource_type = function
67 | `Gzipped -> "xml.gz"
69 (* basename = resource name without trailing ".gz", if any *)
70 let is_in_cache basename =
72 (match Lazy.force Http_getter_env.cache_mode with
74 | `Gzipped -> basename ^ ".gz")
77 ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url ~uri outchan
79 let resource_type = resource_type_of_url url in
80 let extension = extension_of_resource_type resource_type in
82 match uri_of_string uri with (* parse uri *)
83 | Cic_uri (Cic baseuri) | Cic_uri (Theory baseuri) ->
84 (* assumption: baseuri starts with "/" *)
85 sprintf "%s%s.%s" (Lazy.force Http_getter_env.cic_dir) baseuri extension
86 | Nuprl_uri baseuri ->
87 (* assumption: baseuri starts with "/" *)
88 sprintf "%s%s.%s" (Lazy.force Http_getter_env.nuprl_dir) baseuri
90 | Rdf_uri (prefix, ((Cic baseuri) as qbaseuri))
91 | Rdf_uri (prefix, ((Theory baseuri) as qbaseuri)) ->
93 (Pcre.replace ~pat:"/" ~templ:"_"
94 (Pcre.replace ~pat:"_" ~templ:"__"
98 | Theory _ -> "//theory:"))))
101 (Lazy.force Http_getter_env.rdf_dir) escaped_prefix baseuri extension
105 Http_getter_common.patch_xml
106 ~xmlbases:(Filename.dirname uri, Filename.dirname url) ~via_http ()
110 let basename = Pcre.replace ~pat:"\\.gz$" downloadname in
111 let contype = "text/xml" in
112 (* Fill cache if needed and return a short circuit file.
113 Short circuit is needed in situations like:
114 resource type = normal, cache type = gzipped, required encoding = normal
115 we would like to avoid normal -> gzipped -> normal conversions. To avoid
116 this tmp_short_circuit is used to remember the name of the intermediate
119 threadSafe#doWriter (lazy(
120 if not (is_in_cache basename) then begin (* cache MISS *)
121 Http_getter_logger.log ~level:2 "Cache MISS :-(";
122 mkdir ~parents:true (Filename.dirname downloadname);
123 match (resource_type, Lazy.force Http_getter_env.cache_mode) with
124 | `Normal, `Normal | `Gzipped, `Gzipped ->
125 wget ~output:downloadname url;
127 | `Normal, `Gzipped -> (* resource normal, cache gzipped *)
128 let tmp = tempfile () in
130 if enc = `Normal then (* user wants normal: don't delete it! *)
131 (Some (tmp, enc), (fun () -> ()))
133 (None, (fun () -> Sys.remove tmp))
135 finally cleanup (lazy (
136 wget ~output:tmp url;
137 gzip ~output:(basename ^ ".gz") ~keep:true tmp; (* fill cache *)
140 | `Gzipped, `Normal -> (* resource gzipped, cache normal *)
141 let tmp = tempfile () in
143 if enc = `Gzipped then (* user wants .gz: don't delete it! *)
144 (Some (tmp, enc), (fun () -> ()))
146 (None, (fun () -> Sys.remove tmp))
148 finally cleanup (lazy (
149 wget ~output:tmp url;
150 gunzip ~output:basename ~keep:true tmp; (* fill cache *)
154 Http_getter_logger.log ~level:2 "Cache HIT :-)";
158 let tmp_short_circuit = fill_cache () in
159 threadSafe#doReader (lazy(
160 assert (is_in_cache basename);
161 match (enc, Lazy.force Http_getter_env.cache_mode) with
162 | `Normal, `Normal | `Gzipped, `Gzipped ->
163 (* resource in cache is already in the required format *)
166 Http_getter_logger.log ~level:2
167 "No format mangling required (encoding = normal)";
168 return_file ~via_http ~fname:basename ~contype ~patch_fun outchan
170 Http_getter_logger.log ~level:2
171 "No format mangling required (encoding = gzipped)";
173 ~via_http ~fname:(basename ^ ".gz") ~contype ~contenc:"x-gzip"
174 ~patch_fun ~gunzip:true
176 | `Normal, `Gzipped | `Gzipped, `Normal ->
177 (match tmp_short_circuit with
178 | None -> (* no short circuit possible, use cache *)
179 Http_getter_logger.log ~level:2
180 "No short circuit available, use cache";
181 let tmp = tempfile () in
182 finally (fun () -> Sys.remove tmp) (lazy (
185 (* required format is normal, cached version is gzipped *)
186 gunzip (* gunzip to tmp *)
187 ~output:tmp ~keep:true (basename ^ ".gz");
188 return_file ~via_http ~fname:tmp ~contype ~patch_fun outchan;
190 (* required format is gzipped, cached version is normal *)
191 gzip ~output:tmp ~keep:true basename; (* gzip to tmp *)
193 ~via_http ~fname:tmp ~contype ~contenc:"x-gzip"
194 ~patch_fun ~gunzip:true
197 | Some (fname, `Normal) -> (* short circuit available, use it! *)
198 Http_getter_logger.log ~level:2
199 "Using short circuit (encoding = normal)";
200 finally (fun () -> Sys.remove fname) (lazy (
201 return_file ~via_http ~fname ~contype ~patch_fun outchan
203 | Some (fname, `Gzipped) -> (* short circuit available, use it! *)
204 Http_getter_logger.log ~level:2
205 "Using short circuit (encoding = gzipped)";
206 finally (fun () -> Sys.remove fname) (lazy (
207 return_file ~via_http ~fname ~contype ~contenc:"x-gzip" ~patch_fun
212 (* TODO enc is not yet supported *)
214 ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url outchan
217 if patch then Http_getter_common.patch_xsl ~via_http () else (fun x -> x)
219 let fname = tempfile () in
220 finally (fun () -> Sys.remove fname) (lazy (
221 wget ~output:fname url;
222 return_file ~via_http ~fname ~contype:"text/xml" ~patch_fun outchan
225 (* TODO enc is not yet supported *)
227 ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url outchan
230 if patch then Http_getter_common.patch_dtd ~via_http () else (fun x -> x)
232 if Sys.file_exists url then
233 (* TODO check this: old getter here used text/xml *)
234 return_file ~via_http ~fname:url ~contype:"text/plain" ~patch_fun outchan
236 raise (Dtd_not_found url)
239 let module E = Http_getter_env in
241 (function dir -> ignore (Unix.system ("rm -rf " ^ dir ^ "/*")))
242 [ Lazy.force E.cic_dir; Lazy.force E.nuprl_dir; Lazy.force E.rdf_dir ]