2 * Copyright (C) 2003-2004:
3 * Stefano Zacchiroli <zack@cs.unibo.it>
4 * for the HELM Team http://helm.cs.unibo.it/
6 * This file is part of HELM, an Hypertextual, Electronic
7 * Library of Mathematics, developed at the Computer Science
8 * Department, University of Bologna, Italy.
10 * HELM is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version 2
13 * of the License, or (at your option) any later version.
15 * HELM is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with HELM; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 * For details, see the HELM World-Wide-Web page,
26 * http://helm.cs.unibo.it/
29 (* TODO cache expires control!!! *)
31 if two large proof (already in cache) are requested at the same time by two
32 clients, uwobo (java implementation, not yet tested with the OCaml one)
33 starts looping sending output to one of the client *)
35 open Http_getter_common
37 open Http_getter_types
40 (* expose ThreadSafe.threadSafe methods *)
43 inherit ThreadSafe.threadSafe
44 method virtual doCritical : 'a. 'a lazy_t -> 'a
45 method virtual doReader : 'a. 'a lazy_t -> 'a
46 method virtual doWriter : 'a. 'a lazy_t -> 'a
49 let threadSafe = new threadSafe
51 let finally cleanup f =
53 let res = Lazy.force f in
58 raise (Http_getter_types.Cache_failure (Printexc.to_string e))
60 let resource_type_of_url = function
61 | url when Pcre.pmatch ~pat:"\\.xml\\.gz$" url -> `Gzipped
62 | url when Pcre.pmatch ~pat:"\\.xml$" url -> `Normal
63 | url -> raise (Invalid_URL url)
65 let extension_of_resource_type = function
67 | `Gzipped -> "xml.gz"
69 (* basename = resource name without trailing ".gz", if any *)
70 let is_in_cache basename =
72 (match Lazy.force Http_getter_env.cache_mode with
74 | `Gzipped -> basename ^ ".gz")
77 ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url ~uri outchan
79 let local_part = Http_getter_misc.local_url url in
80 let local_resource = local_part <> None in
81 let resource_type = resource_type_of_url url in
82 let extension = extension_of_resource_type resource_type in
84 match uri_of_string uri with (* parse uri *)
85 | Cic_uri (Cic baseuri) | Cic_uri (Theory baseuri) ->
86 (* assumption: baseuri starts with "/" *)
87 sprintf "%s%s.%s" (Lazy.force Http_getter_env.cic_dir) baseuri extension
88 | Nuprl_uri baseuri ->
89 (* assumption: baseuri starts with "/" *)
90 sprintf "%s%s.%s" (Lazy.force Http_getter_env.nuprl_dir) baseuri
92 | Rdf_uri (prefix, ((Cic baseuri) as qbaseuri))
93 | Rdf_uri (prefix, ((Theory baseuri) as qbaseuri)) ->
95 (Pcre.replace ~pat:"/" ~templ:"_"
96 (Pcre.replace ~pat:"_" ~templ:"__"
100 | Theory _ -> "//theory:"))))
103 (Lazy.force Http_getter_env.rdf_dir) escaped_prefix baseuri extension
107 if Http_getter_common.is_theory_uri uri then
108 Some (Filename.dirname uri, Filename.dirname url)
113 Some (Http_getter_common.patch_xml ?xmlbases ~via_http ())
117 let basename = Pcre.replace ~pat:"\\.gz$" downloadname in
118 let contype = "text/xml" in
119 (* Fill cache if needed and return a short circuit file.
120 Short circuit is needed in situations like:
121 resource type = normal, cache type = gzipped, required encoding = normal
122 we would like to avoid normal -> gzipped -> normal conversions. To avoid
123 this tmp_short_circuit is used to remember the name of the intermediate
126 threadSafe#doWriter (lazy(
127 if local_resource then begin (* resource available via file system *)
128 Http_getter_logger.log ~level:2 "Local resource, avoid caching";
130 end else if not (is_in_cache basename) then begin (* cache miss *)
131 Http_getter_logger.log ~level:2 "Cache MISS :-(";
132 mkdir ~parents:true (Filename.dirname downloadname);
133 match (resource_type, Lazy.force Http_getter_env.cache_mode) with
134 | `Normal, `Normal | `Gzipped, `Gzipped ->
135 wget ~output:downloadname url;
137 | `Normal, `Gzipped -> (* resource normal, cache gzipped *)
138 let tmp = tempfile () in
140 if enc = `Normal then (* user wants normal: don't delete it! *)
141 (Some (tmp, enc), (fun () -> ()))
143 (None, (fun () -> Sys.remove tmp))
145 finally cleanup (lazy (
146 wget ~output:tmp url;
147 gzip ~output:(basename ^ ".gz") ~keep:true tmp; (* fill cache *)
150 | `Gzipped, `Normal -> (* resource gzipped, cache normal *)
151 let tmp = tempfile () in
153 if enc = `Gzipped then (* user wants .gz: don't delete it! *)
154 (Some (tmp, enc), (fun () -> ()))
156 (None, (fun () -> Sys.remove tmp))
158 finally cleanup (lazy (
159 wget ~output:tmp url;
160 gunzip ~output:basename ~keep:true tmp; (* fill cache *)
163 end else begin (* cache hit *)
164 Http_getter_logger.log ~level:2 "Cache HIT :-)";
168 let tmp_short_circuit = fill_cache () in
169 threadSafe#doReader (lazy(
170 assert (local_resource || is_in_cache basename);
171 let basename = match local_part with Some f -> f | None -> basename in
173 if local_resource then
176 Lazy.force Http_getter_env.cache_mode
178 match (enc, resource_type) with
179 | `Normal, `Normal | `Gzipped, `Gzipped ->
180 (* resource (in cache or local) is already in the required format *)
183 Http_getter_logger.log ~level:2
184 "No format mangling required (encoding = normal)";
185 return_file ~via_http ~fname:basename ~contype ?patch_fun ~enc
188 Http_getter_logger.log ~level:2
189 "No format mangling required (encoding = gzipped)";
191 ~via_http ~fname:(basename ^ ".gz") ~contype ~contenc:"x-gzip"
192 ?patch_fun ~gunzip:true ~enc
194 | `Normal, `Gzipped | `Gzipped, `Normal ->
195 (match tmp_short_circuit with
196 | None -> (* no short circuit possible, use cache *)
197 Http_getter_logger.log ~level:2
198 "No short circuit available, use cache (or local resource)";
199 let tmp = tempfile () in
200 finally (fun () -> Sys.remove tmp) (lazy (
203 (* required format normal, cached (or local) version gzipped *)
204 gunzip (* gunzip to tmp *)
205 ~output:tmp ~keep:true (basename ^ ".gz");
206 return_file ~via_http ~fname:tmp ~contype ?patch_fun ~enc outchan
208 (* required format is gzipped, cached version is normal *)
209 gzip ~output:tmp ~keep:true basename; (* gzip to tmp *)
211 ~via_http ~fname:tmp ~contype ~contenc:"x-gzip"
212 ?patch_fun ~gunzip:true ~enc
215 | Some (fname, `Normal) -> (* short circuit available, use it! *)
216 Http_getter_logger.log ~level:2
217 "Using short circuit (encoding = normal)";
218 finally (fun () -> Sys.remove fname) (lazy (
219 return_file ~via_http ~fname ~contype ?patch_fun ~enc outchan
221 | Some (fname, `Gzipped) -> (* short circuit available, use it! *)
222 Http_getter_logger.log ~level:2
223 "Using short circuit (encoding = gzipped)";
224 finally (fun () -> Sys.remove fname) (lazy (
225 return_file ~via_http ~fname ~contype ~contenc:"x-gzip" ?patch_fun
226 ~gunzip:true ~enc outchan
230 (* TODO enc is not yet supported *)
232 ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url outchan
235 if patch then Http_getter_common.patch_xsl ~via_http () else (fun x -> x)
237 let fname = tempfile () in
238 finally (fun () -> Sys.remove fname) (lazy (
239 wget ~output:fname url;
240 return_file ~via_http ~fname ~contype:"text/xml" ~patch_fun ~enc outchan
243 (* TODO enc is not yet supported *)
245 ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url outchan
248 if patch then Http_getter_common.patch_dtd ~via_http () else (fun x -> x)
250 if Sys.file_exists url then
251 (* TODO check this: old getter here used text/xml *)
252 return_file ~via_http ~fname:url ~contype:"text/plain" ~patch_fun ~enc
255 raise (Dtd_not_found url)
258 let module E = Http_getter_env in
260 (function dir -> ignore (Unix.system ("rm -rf " ^ dir ^ "/*")))
261 [ Lazy.force E.cic_dir; Lazy.force E.nuprl_dir; Lazy.force E.rdf_dir ]