2 * Copyright (C) 2003-2004:
3 * Stefano Zacchiroli <zack@cs.unibo.it>
4 * for the HELM Team http://helm.cs.unibo.it/
6 * This file is part of HELM, an Hypertextual, Electronic
7 * Library of Mathematics, developed at the Computer Science
8 * Department, University of Bologna, Italy.
10 * HELM is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version 2
13 * of the License, or (at your option) any later version.
15 * HELM is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with HELM; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 * For details, see the HELM World-Wide-Web page,
26 * http://helm.cs.unibo.it/
31 open Http_getter_common
32 open Http_getter_const
34 open Http_getter_types
38 let configuration_file = BuildTimeOpts.conffile
40 let common_headers = [
41 "Cache-Control", "no-cache";
46 (* HTTP queries argument parsing *)
48 (* parse encoding ("format" parameter), default is `Normal *)
49 let parse_enc (req: Http_types.request) =
51 (match req#param "format" with
54 | s -> raise (Bad_request ("Invalid format: " ^ s)))
55 with Http_types.Param_not_found _ -> `Normal
57 (* parse "patch_dtd" parameter, default is true *)
58 let parse_patch (req: Http_types.request) =
60 (match req#param "patch_dtd" with
61 | s when String.lowercase s = "yes" -> true
62 | s when String.lowercase s = "no" -> false
63 | s -> raise (Bad_request ("Invalid patch_dtd value: " ^ s)))
64 with Http_types.Param_not_found _ -> true
66 (* parse output format ("format" parameter), no default value *)
67 let parse_output_format meth (req: Http_types.request) =
68 match req#param "format" with
69 | s when String.lowercase s = "txt" -> `Text
70 | s when String.lowercase s = "xml" -> `Xml
71 | s -> raise (Bad_request ("Invalid /" ^ meth ^ " format: " ^ s))
73 (* parse "position" argument, default is 0 *)
74 let parse_position (req: Http_types.request) =
76 let res = int_of_string (req#param "position") in
78 raise (Failure "int_of_string");
81 | Http_types.Param_not_found _ -> 0
82 | Failure "int_of_string" ->
84 (sprintf "position must be a non negative integer (%s given)"
85 (req#param "position")))
87 let parse_rdf_class (req: Http_types.request) =
88 match req#param "class" with
89 | "forward" -> `Forward
90 | "backward" -> `Backward
91 | c -> raise (Bad_request ("Invalid RDF class: " ^ c))
93 let html_tag ?exn () =
94 let xml_decl = "<?xml version=\"1.0\"?>\n" in
98 "%s<html xmlns=\"%s\"\nxmlns:helm=\"%s\"\nhelm:exception=\"%s\">\n"
99 xml_decl xhtml_ns helm_ns exn
101 sprintf "%s<html xmlns=\"%s\"\nxmlns:helm=\"%s\">\n"
102 xml_decl xhtml_ns helm_ns
104 let mk_return_fun pp_fun contype msg outchan =
106 ~body:(pp_fun msg) ~headers:["Content-Type", contype] outchan
107 let pp_msg s = sprintf "%s<body>%s</body></html>" (html_tag ()) s
109 let return_html_error exn =
111 sprintf "%s\n<body>Http Getter error: <span style=\"color:red\">%s</span></body></html>"
114 mk_return_fun pp_error "text/xml"
115 let return_html_internal_error exn =
116 let pp_internal_error s =
117 sprintf "%s\n<body>Http Getter Internal error: <span style=\"color:red\">%s</span></body></html>"
120 mk_return_fun pp_internal_error "text/xml"
121 let return_html_msg = mk_return_fun pp_msg "text/xml"
122 let return_html_raw = mk_return_fun null_pp "text/xml"
123 let return_xml_raw = mk_return_fun null_pp "text/xml"
124 let return_400 exn body = return_html_error exn body
126 let return_all_foo_uris doctype uris outchan =
127 Http_daemon.send_basic_headers ~code:(`Code 200) outchan;
128 Http_daemon.send_header "Content-Type" "text/xml" outchan;
129 Http_daemon.send_headers common_headers outchan;
130 Http_daemon.send_CRLF outchan;
134 "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>
135 <!DOCTYPE %s SYSTEM \"%s/getdtd?uri=%s.dtd\">
140 (Lazy.force Http_getter_env.my_own_url)
144 (fun uri -> output_string outchan (sprintf "\t<uri value=\"%s\" />\n" uri))
146 output_string outchan (sprintf "</%s>\n" doctype)
148 let return_all_xml_uris fmt outchan =
149 let uris = Http_getter.getalluris () in
152 let buf = Buffer.create 10240 in
153 List.iter (bprintf buf "%s\n") uris ;
154 let body = Buffer.contents buf in
156 ~headers:(("Content-Type", "text/plain") :: common_headers)
159 return_all_foo_uris "alluris" uris outchan
161 let return_all_rdf_uris classs outchan =
162 return_all_foo_uris "allrdfuris" (Http_getter.getallrdfuris classs) outchan
164 let return_ls regexp fmt outchan =
165 let ls_items = Http_getter.ls regexp in
166 let buf = Buffer.create 10240 in
171 | Ls_section dir -> bprintf buf "dir, %s\n" dir
173 bprintf buf "object, %s, <%s,%s,%s,%s>\n"
174 obj.uri (if obj.ann then "YES" else "NO")
175 (string_of_ls_flag obj.types)
176 (string_of_ls_flag obj.body)
177 (string_of_ls_flag obj.proof_tree))
180 Buffer.add_string buf "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n";
181 bprintf buf "<!DOCTYPE ls SYSTEM \"%s/getdtd?uri=ls.dtd\">\n"
182 (Lazy.force Http_getter_env.my_own_url);
183 Buffer.add_string buf "<ls>\n";
186 | Ls_section dir -> bprintf buf "<section>%s</section>\n" dir
189 "<object name=\"%s\">
190 \t<ann value=\"%s\" />
191 \t<types value=\"%s\" />
192 \t<body value=\"%s\" />
193 \t<proof_tree value=\"%s\" />
196 obj.uri (if obj.ann then "YES" else "NO")
197 (string_of_ls_flag obj.types)
198 (string_of_ls_flag obj.body)
199 (string_of_ls_flag obj.proof_tree))
201 Buffer.add_string buf "</ls>\n");
202 let body = Buffer.contents buf in
204 ~headers:(("Content-Type", "text/plain") :: common_headers)
207 let return_help outchan = return_html_raw (Http_getter.help ()) outchan
209 let return_resolve uri outchan =
212 (sprintf "<url value=\"%s\" />\n" (Http_getter.resolve uri))
215 | Unresolvable_URI _ -> return_xml_raw "<unresolvable />\n" outchan
216 | Key_not_found _ -> return_xml_raw "<not_found />\n" outchan
218 let return_list_servers outchan =
220 (sprintf "%s<body><table>\n%s\n</table></body></html>"
224 (fun (pos, server) ->
225 sprintf "<tr><td>%d</td><td>%s</td></tr>" pos server)
226 (Http_getter.list_servers ()))))
229 let log_failure msg = Http_getter_logger.log ("Request not fulfilled: " ^ msg)
231 (** given an action (i.e. a function which expects a logger and do something
232 * using it as a logger), perform it sending its output incrementally to the
233 * given output channel. Response is sent embedded in an HTML document.
234 * Channel is closed afterwards. *)
235 let send_log_to ?prepend action outchan =
236 Http_daemon.send_basic_headers ~code:(`Code 200) outchan;
237 Http_daemon.send_header "Content-Type" "text/xml" outchan;
238 Http_daemon.send_CRLF outchan;
239 output_string outchan (sprintf "%s<body>\n" (html_tag ()));
243 | Some text -> output_string outchan text; flush outchan);
245 output_string outchan (HelmLogger.html_of_html_tag tag);
249 output_string outchan "\n</body></html>";
254 let callback (req: Http_types.request) outchan =
256 Http_getter_logger.log ("Connection from " ^ req#clientAddr);
257 Http_getter_logger.log ("Received request: " ^ req#path);
259 | "/help" -> return_help outchan
261 let uri = req#param "uri" in
262 Http_getter_cache.respond_xml ~url:(Http_getter.resolve uri) ~uri
263 ~enc:(parse_enc req) ~patch:(parse_patch req) outchan
265 Http_getter_cache.respond_xsl
266 ~url:(Http_getter.resolve (req#param "uri"))
267 ~patch:(parse_patch req) outchan
269 Http_getter_cache.respond_dtd ~patch:(parse_patch req)
270 ~url:(sprintf "%s/%s"
271 (Helm_registry.get "getter.dtd_dir") (req#param "uri"))
273 | "/resolve" -> return_resolve (req#param "uri") outchan
275 Http_getter.register ~uri:(req#param "uri") ~url:(req#param "url");
276 return_html_msg "Register done" outchan
278 Http_getter.unregister (req#param "uri");
279 return_html_msg "Unregister done" outchan
281 Http_getter.clean_cache ();
282 return_html_msg "Done." outchan
284 Http_getter_env.reload (); (* reload servers list from servers file *)
285 send_log_to (fun logger -> Http_getter.update ~logger ()) outchan
286 | "/list_servers" -> return_list_servers outchan
288 let name = req#param "url" in
289 let position = parse_position req in
291 sprintf "Added server %s in position %d)<br />\n" name position
294 (fun logger -> Http_getter.add_server ~logger ~position name) outchan
295 | "/remove_server" ->
296 let position = parse_position req in
297 if not (Http_getter.has_server position) then
298 raise (Bad_request (sprintf "no server with position %d" position))
301 sprintf "Removed server at position %d<br />\n" position
304 (fun logger -> Http_getter.remove_server ~logger position) outchan
306 return_all_xml_uris (parse_output_format "getalluris" req) outchan
307 | "/getallrdfuris" -> return_all_rdf_uris (parse_rdf_class req) outchan
309 return_ls (req#param "baseuri") (parse_output_format "ls" req) outchan
311 Http_daemon.respond ~body:Http_getter_const.empty_xml outchan
313 Http_daemon.respond_error ~code:(`Status (`Client_error `Bad_request))
315 Http_getter_logger.log "Done!\n"
317 | Http_types.Param_not_found attr_name ->
318 let msg = sprintf "Parameter '%s' is missing" attr_name in
320 return_400 "Bad_request" msg outchan
323 return_html_error "Bad_request" msg outchan
324 | Internal_error msg ->
326 return_html_internal_error "Internal_error" msg outchan
327 | Shell.Subprocess_error l ->
331 sprintf "Command '%s' returned %s" cmd (string_of_proc_status code))
334 log_failure (String.concat ", " msgs);
335 return_html_internal_error "Subprocess_error"
336 (String.concat "<br />\n" msgs) outchan
338 let msg = "Uncaught exception: " ^ (Printexc.to_string exc) in
340 return_html_error "Uncaught_exception" msg outchan
345 Helm_registry.load_from configuration_file;
346 Http_getter_logger.set_log_level
347 (Helm_registry.get_opt_default Helm_registry.get_int 1 "getter.log_level");
348 Http_getter_logger.set_log_file
349 (Helm_registry.get_opt Helm_registry.get_string "getter.log_file");
350 Http_getter_env.reload ();
351 print_string (Http_getter_env.env_to_string ());
354 try Sys.argv.(1) = "-update" with Invalid_argument _ -> false
356 if batch_update then (* batch mode: performs update and exit *)
357 Http_getter.update ~logger:Http_getter.stdout_logger ()
358 else begin (* daemon mode: start http daemon *)
359 at_exit Http_getter.close_maps;
360 Sys.catch_break true;
362 Http_daemon.start' ~mode:`Thread
363 ~timeout:(Some 600) ~port:(Helm_registry.get_int "getter.port")
365 with Sys.Break -> () (* 'close_maps' already registered with 'at_exit' *)