3 * Stefano Zacchiroli <zack@cs.unibo.it>
4 * for the HELM Team http://helm.cs.unibo.it/
6 * This file is part of HELM, an Hypertextual, Electronic
7 * Library of Mathematics, developed at the Computer Science
8 * Department, University of Bologna, Italy.
10 * HELM is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version 2
13 * of the License, or (at your option) any later version.
15 * HELM is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with HELM; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 * For details, see the HELM World-Wide-Web page,
26 * http://helm.cs.unibo.it/
29 (* TODO cache expires control!!! *)
31 if two large proof (already in cache) are requested at the same time by two
32 clients, uwobo (java implementation, not yet tested with the OCaml one)
33 starts looping sending output to one of the client *)
35 open Http_getter_common;;
36 open Http_getter_debugger;;
37 open Http_getter_misc;;
38 open Http_getter_types;;
41 (* expose ThreadSafe.threadSafe methods *)
44 inherit ThreadSafe.threadSafe
45 method virtual doCritical : 'a. 'a lazy_t -> 'a
46 method virtual doReader : 'a. 'a lazy_t -> 'a
47 method virtual doWriter : 'a. 'a lazy_t -> 'a
50 let threadSafe = new threadSafe ;;
52 let resource_type_of_url = function
53 | url when Pcre.pmatch ~pat:"\\.xml\\.gz$" url -> Enc_gzipped
54 | url when Pcre.pmatch ~pat:"\\.xml$" url -> Enc_normal
55 | url -> raise (Http_getter_invalid_URL url)
56 let extension_of_resource_type = function
58 | Enc_gzipped -> "xml.gz"
60 (* basename = resource name without trailing ".gz", if any *)
61 let is_in_cache basename =
63 (match Http_getter_env.cache_mode with
64 | Enc_normal -> basename
65 | Enc_gzipped -> basename ^ ".gz")
67 let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan =
68 let resource_type = resource_type_of_url url in
69 let extension = extension_of_resource_type resource_type in
71 match http_getter_uri_of_string uri with (* parse uri *)
72 | Cic_uri (Cic baseuri) | Cic_uri (Theory baseuri) ->
73 (* assumption: baseuri starts with "/" *)
74 sprintf "%s%s.%s" Http_getter_env.cic_dir baseuri extension
75 | Nuprl_uri baseuri ->
76 (* assumption: baseuri starts with "/" *)
77 sprintf "%s%s.%s" Http_getter_env.nuprl_dir baseuri extension
78 | Rdf_uri (prefix, ((Cic baseuri) as qbaseuri))
79 | Rdf_uri (prefix, ((Theory baseuri) as qbaseuri)) ->
81 (Pcre.replace ~pat:"/" ~templ:"_"
82 (Pcre.replace ~pat:"_" ~templ:"__"
86 | Theory _ -> "//theory:"))))
89 Http_getter_env.rdf_dir escaped_prefix baseuri extension
92 if patch then Http_getter_common.patch_xml else (fun x -> x)
94 let basename = Pcre.replace ~pat:"\\.gz$" downloadname in
95 let contype = "text/xml" in
96 (* File cache if needed and return a short circuit file.
97 Short circuit is needed in situation like:
98 resource type = normal, cache type = gzipped, required encoding = normal
99 we would like to avoid normal -> gzipped -> normal conversions. To avoid
100 this tmp_short_circuit is used to remember the name of the intermediate
103 threadSafe#doWriter (lazy(
104 if not (is_in_cache basename) then begin (* cache MISS *)
105 debug_print "Cache MISS :-(";
106 mkdir ~parents:true (Filename.dirname downloadname);
107 match (resource_type, Http_getter_env.cache_mode) with
108 | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped ->
109 wget ~output:downloadname url;
111 | Enc_normal, Enc_gzipped -> (* resource normal, cache gzipped *)
112 let tmp = tempfile () in
113 wget ~output:tmp url;
114 gzip ~output:(basename ^ ".gz") ~keep:true tmp; (* fill cache *)
115 if enc = Enc_normal then (* user wants normal: don't delete it! *)
121 | Enc_gzipped, Enc_normal -> (* resource gzipped, cache normal *)
122 let tmp = tempfile () in
123 wget ~output:tmp url;
124 gunzip ~output:basename ~keep:true tmp; (* fill cache *)
125 if enc = Enc_gzipped then (* user wants gzipped: don't delete it! *)
132 debug_print "Cache HIT :-)";
136 let tmp_short_circuit = fill_cache () in
137 threadSafe#doReader (lazy(
138 assert (is_in_cache basename);
139 match (enc, Http_getter_env.cache_mode) with
140 | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped ->
141 (* resource in cache is already in the required format *)
144 debug_print "No format mangling required (encoding = normal)";
145 return_file ~fname:basename ~contype ~patch_fun outchan
147 debug_print "No format mangling required (encoding = gzipped)";
149 ~fname:(basename ^ ".gz") ~contype ~contenc:"x-gzip"
150 ~patch_fun ~gunzip:true
152 | Enc_normal, Enc_gzipped | Enc_gzipped, Enc_normal ->
153 (match tmp_short_circuit with
154 | None -> (* no short circuit possible, use cache *)
155 debug_print "No short circuit available, use cache";
156 let tmp = tempfile () in
159 (* required format is normal, cached version is gzipped *)
160 gunzip (* gunzip to tmp *)
161 ~output:tmp ~keep:true (basename ^ ".gz");
162 return_file ~fname:tmp ~contype ~patch_fun outchan;
164 (* required format is gzipped, cached version is normal *)
165 gzip ~output:tmp ~keep:true basename; (* gzip to tmp *)
167 ~fname:tmp ~contype ~contenc:"x-gzip"
168 ~patch_fun ~gunzip:true
171 | Some (fname, Enc_normal) -> (* short circuit available, use it! *)
172 debug_print "Using short circuit (encoding = normal)";
173 return_file ~fname ~contype ~patch_fun outchan;
175 | Some (fname, Enc_gzipped) -> (* short circuit available, use it! *)
176 debug_print "Using short circuit (encoding = gzipped)";
178 ~fname ~contype ~contenc:"x-gzip" ~patch_fun ~gunzip:true outchan;
183 (* TODO enc is not yet supported *)
184 let respond_xsl ?(enc = Enc_normal) ?(patch = true) ~url outchan =
186 if patch then Http_getter_common.patch_xsl else (fun x -> x)
188 let fname = tempfile () in
189 wget ~output:fname url;
190 return_file ~fname ~contype:"text/xml" ~patch_fun outchan;
194 (* TODO enc is not yet supported *)
195 let respond_dtd ?(enc = Enc_normal) ?(patch = true) ~url outchan =
197 if patch then Http_getter_common.patch_dtd else (fun x -> x)
199 if Sys.file_exists url then
200 (* TODO check this: old getter here used text/xml *)
201 return_file ~fname:url ~contype:"text/plain" ~patch_fun outchan
203 return_html_error ("Can't find DTD: " ^ url) outchan
207 let module E = Http_getter_env in
209 (function dir -> ignore (Unix.system ("rm -rf " ^ dir ^ "/*"))
210 ) [E.cic_dir ; E.nuprl_dir ; E.rdf_dir ]