1 (* Copyright (C) 2003-2005, HELM Team.
3 * This file is part of HELM, an Hypertextual, Electronic
4 * Library of Mathematics, developed at the Computer Science
5 * Department, University of Bologna, Italy.
7 * HELM is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * HELM is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with HELM; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
22 * For details, see the HELM World-Wide-Web page,
23 * http://helm.cs.unibo.it/
28 open Http_getter_common
29 open Http_getter_const
31 open Http_getter_types
35 let configuration_file = BuildTimeOpts.conffile
37 let common_headers = [
38 "Cache-Control", "no-cache";
43 (* HTTP queries argument parsing *)
45 (* parse encoding ("format" parameter), default is `Normal *)
46 let parse_enc (req: Http_types.request) =
48 (match req#param "format" with
51 | s -> raise (Bad_request ("Invalid format: " ^ s)))
52 with Http_types.Param_not_found _ -> `Normal
54 (* parse "patch_dtd" parameter, default is true *)
55 let parse_patch (req: Http_types.request) =
57 (match req#param "patch_dtd" with
58 | s when String.lowercase s = "yes" -> true
59 | s when String.lowercase s = "no" -> false
60 | s -> raise (Bad_request ("Invalid patch_dtd value: " ^ s)))
61 with Http_types.Param_not_found _ -> true
63 (* parse output format ("format" parameter), no default value *)
64 let parse_output_format meth (req: Http_types.request) =
65 match req#param "format" with
66 | s when String.lowercase s = "txt" -> `Text
67 | s when String.lowercase s = "xml" -> `Xml
68 | s -> raise (Bad_request ("Invalid /" ^ meth ^ " format: " ^ s))
70 let xml_escape = Netencoding.Html.encode ~in_enc:`Enc_utf8 ()
72 let html_tag ?exn () =
73 let xml_decl = "<?xml version=\"1.0\"?>\n" in
76 let (exn, arg) = (xml_escape exn, xml_escape arg) in
78 ("%s<html xmlns=\"%s\"\nxmlns:helm=\"%s\"\n"
79 ^^ "helm:exception=\"%s\"\nhelm:exception_arg=\"%s\">\n")
80 xml_decl xhtml_ns helm_ns exn arg
82 sprintf "%s<html xmlns=\"%s\"\nxmlns:helm=\"%s\">\n"
83 xml_decl xhtml_ns helm_ns
85 let mk_return_fun pp_fun contype msg outchan =
87 ~body:(pp_fun msg) ~headers:["Content-Type", contype] outchan
89 let pp_msg s = sprintf "%s<body>%s</body></html>" (html_tag ()) s
92 let return_html_error exn =
95 ("%s\n<body>Http Getter error: <span style=\"color:red\">%s"
96 ^^ "</span></body></html>")
99 mk_return_fun pp_error "text/xml"
101 let return_html_internal_error exn =
102 let pp_internal_error s =
104 ("%s\n<body>Http Getter Internal error: <span style=\"color:red\">%s"
105 ^^ "</span></body></html>")
108 mk_return_fun pp_internal_error "text/xml"
110 let return_html_msg = mk_return_fun pp_msg "text/xml"
111 let return_html_raw = mk_return_fun null_pp "text/xml"
112 let return_xml_raw = mk_return_fun null_pp "text/xml"
113 let return_400 exn body = return_html_error exn body
115 let return_all_uris doctype uris outchan =
116 Http_daemon.send_basic_headers ~code:(`Code 200) outchan;
117 Http_daemon.send_header "Content-Type" "text/xml" outchan;
118 Http_daemon.send_headers common_headers outchan;
119 Http_daemon.send_CRLF outchan;
123 "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>
124 <!DOCTYPE %s SYSTEM \"%s/getdtd?uri=%s.dtd\">
129 (Lazy.force Http_getter_env.my_own_url)
133 (fun uri -> output_string outchan (sprintf "\t<uri value=\"%s\" />\n" uri))
135 output_string outchan (sprintf "</%s>\n" doctype)
137 let return_all_xml_uris fmt outchan =
138 let uris = Http_getter.getalluris () in
141 let buf = Buffer.create 10240 in
142 List.iter (bprintf buf "%s\n") uris ;
143 let body = Buffer.contents buf in
145 ~headers:(("Content-Type", "text/plain") :: common_headers)
147 | `Xml -> return_all_uris "alluris" uris outchan
149 let return_ls regexp fmt outchan =
150 let ls_items = Http_getter.ls regexp in
151 let buf = Buffer.create 10240 in
156 | Ls_section dir -> bprintf buf "dir, %s\n" dir
158 bprintf buf "object, %s, <%s,%s,%s,%s>\n"
159 obj.uri (if obj.ann then "YES" else "NO")
160 (string_of_ls_flag obj.types)
161 (string_of_ls_flag obj.body)
162 (string_of_ls_flag obj.proof_tree))
165 Buffer.add_string buf "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n";
166 bprintf buf "<!DOCTYPE ls SYSTEM \"%s/getdtd?uri=ls.dtd\">\n"
167 (Lazy.force Http_getter_env.my_own_url);
168 Buffer.add_string buf "<ls>\n";
171 | Ls_section dir -> bprintf buf "<section>%s</section>\n" dir
174 "<object name=\"%s\">
175 \t<ann value=\"%s\" />
176 \t<types value=\"%s\" />
177 \t<body value=\"%s\" />
178 \t<proof_tree value=\"%s\" />
181 obj.uri (if obj.ann then "YES" else "NO")
182 (string_of_ls_flag obj.types)
183 (string_of_ls_flag obj.body)
184 (string_of_ls_flag obj.proof_tree))
186 Buffer.add_string buf "</ls>\n");
187 let body = Buffer.contents buf in
189 ~headers:(("Content-Type", "text/plain") :: common_headers)
192 let return_help outchan = return_html_raw (Http_getter.help ()) outchan
194 let return_resolve writable uri outchan =
197 (sprintf "<url value=\"%s\" />\n" (Http_getter.resolve ~writable uri))
200 | Unresolvable_URI _ -> return_xml_raw "<unresolvable />\n" outchan
201 | Key_not_found _ -> return_xml_raw "<not_found />\n" outchan
203 let log_failure msg = Http_getter_logger.log ("Request not fulfilled: " ^ msg)
205 let convert_file ~from_enc ~to_enc fname =
206 let remove f = fun () -> if Sys.file_exists f then Sys.remove f in
207 match from_enc, to_enc with
209 | `Gzipped, `Gzipped -> fname, (fun () -> ())
210 | `Normal, `Gzipped ->
211 let tmp = Http_getter_misc.tempfile () in
212 Http_getter_misc.gzip ~keep:true ~output:tmp fname;
214 | `Gzipped, `Normal ->
215 let tmp = Http_getter_misc.tempfile () in
216 Http_getter_misc.gunzip ~keep:true ~output:tmp fname;
219 let is_gzip fname = Http_getter_misc.extension fname = ".gz"
221 let patch_fun_for uri url =
223 if Http_getter_common.is_theory_uri uri then
224 Some (Filename.dirname uri, Filename.dirname url)
228 Http_getter_common.patch_xml ?xmlbases ~via_http:true ()
230 let respond_dtd patch_dtd fname outchan =
231 let via_http = false in
233 if patch_dtd then Some (Http_getter_common.patch_dtd ~via_http ())
236 Http_getter_common.return_file ~via_http:true ~fname ~contype:"text/plain"
237 ~gunzip:false ?patch_fun ~enc:`Normal outchan
240 ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url outchan
243 if patch then Http_getter_common.patch_xsl ~via_http () else (fun x -> x)
245 let fname = tempfile () in
246 finally (fun () -> Sys.remove fname) (lazy (
247 wget ~output:fname url;
248 return_file ~via_http ~fname ~contype:"text/xml" ~patch_fun ~enc outchan
251 Http_getter_cache.respond_xsl
252 ~url:(Http_getter.resolve (req#param "uri"))
253 ~patch:(parse_patch req) outchan *)
255 let respond_xslt patch_xslt xslt_name outchan =
256 let fname = Http_getter.getxslt xslt_name in
258 if patch_xslt then Some (Http_getter_common.patch_xsl ~via_http:true ())
261 Http_getter_common.return_file ~fname ~contype:"text/xml" ?patch_fun
262 ~gunzip:false ~via_http:true ~enc:`Normal outchan
266 let callback (req: Http_types.request) outchan =
268 Http_getter_logger.log ("Connection from " ^ req#clientAddr);
269 Http_getter_logger.log ("Received request: " ^ req#uri);
271 | "/help" -> return_help outchan
273 let uri = req#param "uri" in
274 let fname = Http_getter.getxml uri in (* local name, in cache *)
276 let remote_name = Http_getter.resolve ~writable:false uri in
277 let src_enc = if is_gzip fname then `Gzipped else `Normal in
278 let enc = parse_enc req in
279 let fname, cleanup = convert_file ~from_enc:src_enc ~to_enc:enc fname in
280 let contenc = if enc = `Gzipped then Some "x-gzip" else None in
283 then Some (patch_fun_for uri remote_name)
286 let gunzip = (enc = `Gzipped) in
288 Http_getter_common.return_file
289 ~fname ~contype:"text/xml" ?contenc ?patch_fun ~gunzip
290 ~via_http:true ~enc outchan;
291 with exn -> cleanup (); raise exn);
293 | "/getxslt" -> respond_xslt (parse_patch req) (req#param "uri") outchan
295 let fname = Http_getter.getdtd (req#param "uri") in
296 respond_dtd (parse_patch req) fname outchan
299 match req#param ~default:"false" "writable" with
303 return_resolve writable (req#param "uri") outchan
305 Http_getter.clean_cache ();
306 return_html_msg "Done." outchan
308 return_all_xml_uris (parse_output_format "getalluris" req) outchan
310 return_ls (req#param "baseuri") (parse_output_format "ls" req) outchan
312 Http_daemon.respond ~body:Http_getter_const.empty_xml outchan
314 Http_daemon.respond_error ~code:(`Status (`Client_error `Bad_request))
316 Http_getter_logger.log "Done!\n"
318 | Http_types.Param_not_found attr_name ->
319 let msg = sprintf "Parameter '%s' is missing" attr_name in
321 return_400 ("bad_request", msg) msg outchan
324 return_html_error ("bad_request", msg) msg outchan
325 | Internal_error msg ->
327 return_html_internal_error ("internal_error", msg) msg outchan
329 let msg = "uncaught exception: " ^ (Printexc.to_string exn) in
331 | Http_getter_types.Key_not_found uri ->
332 return_html_error ("key_not_found", uri) msg outchan
335 return_html_error ("uncaught_exception", msg) msg outchan)
337 let batch_update = ref false
343 Arg.parse args (fun _-> ()) "http_getter honors the following options:\n";
344 Helm_registry.load_from configuration_file;
346 print_string (Http_getter_env.env_to_string ());
348 Sys.catch_break true;
349 let d_spec = Http_daemon.daemon_spec
350 ~mode:`Thread ~timeout:(Some 600)
351 ~port:(Lazy.force Http_getter_env.port)
353 ~callback:callback ()
356 Http_daemon.main d_spec