-(*
- * Copyright (C) 2003-2004:
- * Stefano Zacchiroli <zack@cs.unibo.it>
- * for the HELM Team http://helm.cs.unibo.it/
+(* Copyright (C) 2003-2005, HELM Team.
+ *
+ * This file is part of HELM, an Hypertextual, Electronic
+ * Library of Mathematics, developed at the Computer Science
+ * Department, University of Bologna, Italy.
+ *
+ * HELM is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * HELM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
*
- * This file is part of HELM, an Hypertextual, Electronic
- * Library of Mathematics, developed at the Computer Science
- * Department, University of Bologna, Italy.
- *
- * HELM is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * HELM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with HELM; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
- *
- * For details, see the HELM World-Wide-Web page,
- * http://helm.cs.unibo.it/
+ * You should have received a copy of the GNU General Public License
+ * along with HELM; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ * For details, see the HELM World-Wide-Web page,
+ * http://helm.cs.unibo.it/
*)
open Printf
open Http_getter_common
+open Http_getter_const
open Http_getter_misc
open Http_getter_types
-open Http_getter_debugger
(* constants *)
-let configuration_file = "/projects/helm/etc/http_getter.conf.xml"
+let configuration_file = BuildTimeOpts.conffile
let common_headers = [
"Cache-Control", "no-cache";
(* HTTP queries argument parsing *)
- (* parse encoding ("format" parameter), default is Enc_normal *)
+ (* parse encoding ("format" parameter), default is `Normal *)
let parse_enc (req: Http_types.request) =
try
(match req#param "format" with
- | "normal" -> Enc_normal
- | "gz" -> Enc_gzipped
+ | "normal" -> `Normal
+ | "gz" -> `Gzipped
| s -> raise (Bad_request ("Invalid format: " ^ s)))
- with Http_types.Param_not_found _ -> Enc_normal
+ with Http_types.Param_not_found _ -> `Normal
(* parse "patch_dtd" parameter, default is true *)
let parse_patch (req: Http_types.request) =
(* parse output format ("format" parameter), no default value *)
let parse_output_format meth (req: Http_types.request) =
match req#param "format" with
- | s when String.lowercase s = "txt" -> Fmt_text
- | s when String.lowercase s = "xml" -> Fmt_xml
+ | s when String.lowercase s = "txt" -> `Text
+ | s when String.lowercase s = "xml" -> `Xml
| s -> raise (Bad_request ("Invalid /" ^ meth ^ " format: " ^ s))
- (* parse "baseuri" format for /ls method, no default value *)
-let parse_ls_uri =
- let parse_ls_RE = Pcre.regexp "^(\\w+):(.*)$" in
- let trailing_slash_RE = Pcre.regexp "/+$" in
- let wrong_uri uri =
- raise (Bad_request ("Invalid /ls baseuri: " ^ uri))
+let xml_escape = Netencoding.Html.encode ~in_enc:`Enc_utf8 ()
+
+let html_tag ?exn () =
+ let xml_decl = "<?xml version=\"1.0\"?>\n" in
+ match exn with
+ | Some (exn, arg) ->
+ let (exn, arg) = (xml_escape exn, xml_escape arg) in
+ sprintf
+ ("%s<html xmlns=\"%s\"\nxmlns:helm=\"%s\"\n"
+ ^^ "helm:exception=\"%s\"\nhelm:exception_arg=\"%s\">\n")
+ xml_decl xhtml_ns helm_ns exn arg
+ | None ->
+ sprintf "%s<html xmlns=\"%s\"\nxmlns:helm=\"%s\">\n"
+ xml_decl xhtml_ns helm_ns
+
+let mk_return_fun pp_fun contype msg outchan =
+ Http_daemon.respond
+ ~body:(pp_fun msg) ~headers:["Content-Type", contype] outchan
+
+let pp_msg s = sprintf "%s<body>%s</body></html>" (html_tag ()) s
+let null_pp s = s
+
+let return_html_error exn =
+ let pp_error s =
+ sprintf
+ ("%s\n<body>Http Getter error: <span style=\"color:red\">%s"
+ ^^ "</span></body></html>")
+ (html_tag ~exn ()) s
in
- fun (req: Http_types.request) ->
- let baseuri = req#param "baseuri" in
- try
- let subs =
- Pcre.extract ~rex:parse_ls_RE
- (Pcre.replace ~rex:trailing_slash_RE baseuri)
- in
- (match (subs.(1), subs.(2)) with
- | "cic", uri -> Cic uri
- | "theory", uri -> Theory uri
- | _ -> wrong_uri baseuri)
- with Not_found -> wrong_uri baseuri
-
- (* parse "position" argument, default is 0 *)
-let parse_position (req: Http_types.request) =
- try
- let res = int_of_string (req#param "position") in
- if res < 0 then
- raise (Failure "int_of_string");
- res
- with
- | Http_types.Param_not_found _ -> 0
- | Failure "int_of_string" ->
- raise (Bad_request
- (sprintf "position must be a non negative integer (%s given)"
- (req#param "position")))
-
-let parse_rdf_class (req: Http_types.request) =
- match req#param "class" with
- | "forward" -> `Forward
- | "backward" -> `Backward
- | c -> raise (Bad_request ("Invalid RDF class: " ^ c))
-
-let return_all_foo_uris doctype uris outchan =
- Http_daemon.send_basic_headers ~code:200 outchan;
+ mk_return_fun pp_error "text/xml"
+
+let return_html_internal_error exn =
+ let pp_internal_error s =
+ sprintf
+ ("%s\n<body>Http Getter Internal error: <span style=\"color:red\">%s"
+ ^^ "</span></body></html>")
+ (html_tag ~exn ()) s
+ in
+ mk_return_fun pp_internal_error "text/xml"
+
+let return_html_msg = mk_return_fun pp_msg "text/xml"
+let return_html_raw = mk_return_fun null_pp "text/xml"
+let return_xml_raw = mk_return_fun null_pp "text/xml"
+let return_400 exn body = return_html_error exn body
+
+let return_all_uris doctype uris outchan =
+ Http_daemon.send_basic_headers ~code:(`Code 200) outchan;
Http_daemon.send_header "Content-Type" "text/xml" outchan;
Http_daemon.send_headers common_headers outchan;
Http_daemon.send_CRLF outchan;
output_string outchan (sprintf "</%s>\n" doctype)
let return_all_xml_uris fmt outchan =
- let uris = Http_getter.getalluris () in
+ let uris = Http_getter.getalluris () in
match fmt with
- | Fmt_text ->
+ | `Text ->
let buf = Buffer.create 10240 in
- List.iter (bprintf buf "%s\n") uris ;
- let body = Buffer.contents buf in
- Http_daemon.respond
- ~headers:(("Content-Type", "text/plain") :: common_headers)
- ~body outchan
- | Fmt_xml ->
- return_all_foo_uris "alluris" uris outchan
-
-let return_all_rdf_uris classs outchan =
- return_all_foo_uris "allrdfuris" (Http_getter.getallrdfuris classs) outchan
-
-let return_ls xmluri fmt outchan =
- let ls_items = Http_getter.ls xmluri in
+ List.iter (bprintf buf "%s\n") uris ;
+ let body = Buffer.contents buf in
+ Http_daemon.respond
+ ~headers:(("Content-Type", "text/plain") :: common_headers)
+ ~body outchan
+ | `Xml -> return_all_uris "alluris" uris outchan
+
+let return_ls regexp fmt outchan =
+ let ls_items = Http_getter.ls regexp in
let buf = Buffer.create 10240 in
(match fmt with
- | Fmt_text ->
+ | `Text ->
List.iter
(function
| Ls_section dir -> bprintf buf "dir, %s\n" dir
(string_of_ls_flag obj.body)
(string_of_ls_flag obj.proof_tree))
ls_items
- | Fmt_xml ->
+ | `Xml ->
Buffer.add_string buf "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n";
bprintf buf "<!DOCTYPE ls SYSTEM \"%s/getdtd?uri=ls.dtd\">\n"
(Lazy.force Http_getter_env.my_own_url);
return_xml_raw
(sprintf "<url value=\"%s\" />\n" (Http_getter.resolve uri))
outchan
- with Unresolvable_URI uri ->
- return_xml_raw "<unresolved />\n" outchan
-
-let return_list_servers outchan =
- return_html_raw
- (sprintf "<html><body><table>\n%s\n</table></body></html>"
- (String.concat "\n"
- (List.map
- (fun (pos, server) ->
- sprintf "<tr><td>%d</td><td>%s</td></tr>" pos server)
- (Http_getter.list_servers ()))))
- outchan
+ with
+ | Unresolvable_URI _ -> return_xml_raw "<unresolvable />\n" outchan
+ | Key_not_found _ -> return_xml_raw "<not_found />\n" outchan
+
+let log_failure msg = Http_getter_logger.log ("Request not fulfilled: " ^ msg)
+
+let convert_file ~from_enc ~to_enc fname =
+ let remove f = fun () -> if Sys.file_exists f then Sys.remove f in
+ match from_enc, to_enc with
+ | `Normal, `Normal
+ | `Gzipped, `Gzipped -> fname, (fun () -> ())
+ | `Normal, `Gzipped ->
+ let tmp = Http_getter_misc.tempfile () in
+ Http_getter_misc.gzip ~keep:true ~output:tmp fname;
+ tmp, remove tmp
+ | `Gzipped, `Normal ->
+ let tmp = Http_getter_misc.tempfile () in
+ Http_getter_misc.gunzip ~keep:true ~output:tmp fname;
+ tmp, remove tmp
+
+let is_gzip fname = Http_getter_misc.extension fname = ".gz"
+
+let patch_fun_for uri url =
+ let xmlbases =
+ if Http_getter_common.is_theory_uri uri then
+ Some (Filename.dirname uri, Filename.dirname url)
+ else
+ None
+ in
+ Http_getter_common.patch_xml ?xmlbases ~via_http:true ()
+
+let respond_dtd patch_dtd fname outchan =
+ let via_http = false in
+ let patch_fun =
+ if patch_dtd then Some (Http_getter_common.patch_dtd ~via_http ())
+ else None
+ in
+ Http_getter_common.return_file ~via_http:true ~fname ~contype:"text/plain"
+ ~gunzip:false ?patch_fun ~enc:`Normal outchan
+
+(* let respond_xsl
+ ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url outchan
+ =
+ let patch_fun =
+ if patch then Http_getter_common.patch_xsl ~via_http () else (fun x -> x)
+ in
+ let fname = tempfile () in
+ finally (fun () -> Sys.remove fname) (lazy (
+ wget ~output:fname url;
+ return_file ~via_http ~fname ~contype:"text/xml" ~patch_fun ~enc outchan
+ )) *)
+(* | "/getxslt" ->
+ Http_getter_cache.respond_xsl
+ ~url:(Http_getter.resolve (req#param "uri"))
+ ~patch:(parse_patch req) outchan *)
+
+let respond_xslt patch_xslt xslt_name outchan =
+ let fname = Http_getter.getxslt xslt_name in
+ let patch_fun =
+ if patch_xslt then Some (Http_getter_common.patch_xsl ~via_http:true ())
+ else None
+ in
+ Http_getter_common.return_file ~fname ~contype:"text/xml" ?patch_fun
+ ~gunzip:false ~via_http:true ~enc:`Normal outchan
(* thread action *)
let callback (req: Http_types.request) outchan =
try
- debug_print ("Connection from " ^ req#clientAddr);
- debug_print ("Received request: " ^ req#path);
+ Http_getter_logger.log ("Connection from " ^ req#clientAddr);
+ Http_getter_logger.log ("Received request: " ^ req#uri);
(match req#path with
| "/help" -> return_help outchan
| "/getxml" ->
let uri = req#param "uri" in
- Http_getter_cache.respond_xml ~url:(Http_getter.resolve uri) ~uri
- ~enc:(parse_enc req) ~patch:(parse_patch req) outchan
- | "/getxslt" ->
- Http_getter_cache.respond_xsl
- ~url:(Http_getter.resolve (req#param "uri"))
- ~patch:(parse_patch req) outchan
+ let fname = Http_getter.getxml uri in (* local name, in cache *)
+ let remote_name = Http_getter.resolve uri in (* remote name *)
+ let src_enc = if is_gzip fname then `Gzipped else `Normal in
+ let enc = parse_enc req in
+ let fname, cleanup = convert_file ~from_enc:src_enc ~to_enc:enc fname in
+ let contenc = if enc = `Gzipped then Some "x-gzip" else None in
+ let patch_fun =
+ if parse_patch req
+ then Some (patch_fun_for uri remote_name)
+ else None
+ in
+ let gunzip = (enc = `Gzipped) in
+ (try
+ Http_getter_common.return_file
+ ~fname ~contype:"text/xml" ?contenc ?patch_fun ~gunzip
+ ~via_http:true ~enc outchan;
+ with exn -> cleanup (); raise exn);
+ cleanup ()
+ | "/getxslt" -> respond_xslt (parse_patch req) (req#param "uri") outchan
| "/getdtd" ->
- Http_getter_cache.respond_dtd ~patch:(parse_patch req)
- ~url:(sprintf "%s/%s"
- (Helm_registry.get "getter.dtd_dir") (req#param "uri"))
- outchan
+ let fname = Http_getter.getdtd (req#param "uri") in
+ respond_dtd (parse_patch req) fname outchan
| "/resolve" -> return_resolve (req#param "uri") outchan
- | "/register" ->
- Http_getter.register ~uri:(req#param "uri") ~url:(req#param "url");
- return_html_msg "Register done" outchan
| "/clean_cache" ->
Http_getter.clean_cache ();
return_html_msg "Done." outchan
- | "/update" ->
- Http_getter_env.reload (); (* reload servers list from servers file *)
- let log = Http_getter.update () in
- return_html_msg (HelmLogger.html_of_html_msg log) outchan
- | "/list_servers" -> return_list_servers outchan
- | "/add_server" ->
- let name = req#param "url" in
- let position = parse_position req in
- let log = Http_getter.add_server ~position name in
- return_html_msg
- (sprintf "Added server %s in position %d)<br />\n%s"
- name position (HelmLogger.html_of_html_msg log))
- outchan
- | "/remove_server" ->
- let position = parse_position req in
- let log =
- try
- Http_getter.remove_server position
- with Invalid_argument _ ->
- raise (Bad_request (sprintf "no server with position %d" position))
- in
- return_html_msg
- (sprintf "Removed server at position %d<br />\n%s"
- position (HelmLogger.html_of_html_msg log))
- outchan
| "/getalluris" ->
return_all_xml_uris (parse_output_format "getalluris" req) outchan
- | "/getallrdfuris" -> return_all_rdf_uris (parse_rdf_class req) outchan
| "/ls" ->
- return_ls (parse_ls_uri req) (parse_output_format "ls" req) outchan
+ return_ls (req#param "baseuri") (parse_output_format "ls" req) outchan
| "/getempty" ->
Http_daemon.respond ~body:Http_getter_const.empty_xml outchan
| invalid_request ->
- Http_daemon.respond_error ~status:(`Client_error `Bad_request) outchan);
- debug_print "Done!\n"
+ Http_daemon.respond_error ~code:(`Status (`Client_error `Bad_request))
+ outchan);
+ Http_getter_logger.log "Done!\n"
with
| Http_types.Param_not_found attr_name ->
- return_400 (sprintf "Parameter '%s' is missing" attr_name) outchan
- | Bad_request msg -> return_html_error msg outchan
- | Internal_error msg -> return_html_internal_error msg outchan
- | Shell.Subprocess_error l ->
- return_html_internal_error
- (String.concat "<br />\n"
- (List.map
- (fun (cmd, code) ->
- sprintf "Command '%s' returned %s"
- cmd (string_of_proc_status code))
- l))
- outchan
- | exc ->
- return_html_error
- ("Uncaught exception: " ^ (Printexc.to_string exc))
- outchan
+ let msg = sprintf "Parameter '%s' is missing" attr_name in
+ log_failure msg;
+ return_400 ("bad_request", msg) msg outchan
+ | Bad_request msg ->
+ log_failure msg;
+ return_html_error ("bad_request", msg) msg outchan
+ | Internal_error msg ->
+ log_failure msg;
+ return_html_internal_error ("internal_error", msg) msg outchan
+ | exn ->
+ let msg = "uncaught exception: " ^ (Printexc.to_string exn) in
+ (match exn with
+ | Http_getter_types.Key_not_found uri ->
+ return_html_error ("key_not_found", uri) msg outchan
+ | _ ->
+ log_failure msg;
+ return_html_error ("uncaught_exception", msg) msg outchan)
- (* Main *)
+let batch_update = ref false
+let args = [ ]
+
+ (* Main *)
let main () =
+ Arg.parse args (fun _-> ()) "http_getter honors the following options:\n";
Helm_registry.load_from configuration_file;
- Http_getter_env.reload ();
+ Http_getter.init ();
print_string (Http_getter_env.env_to_string ());
flush stdout;
- at_exit Http_getter.close_maps;
Sys.catch_break true;
+ let d_spec = Http_daemon.daemon_spec
+ ~mode:`Thread ~timeout:(Some 600)
+ ~port:(Lazy.force Http_getter_env.port)
+ ~callback:callback ()
+ in
try
- Http_daemon.start' ~mode:`Thread
- ~timeout:(Some 600) ~port:(Helm_registry.get_int "getter.port")
- callback
- with Sys.Break -> () (* 'close_maps' already registered with 'at_exit' *)
+ Http_daemon.main d_spec
+ with Sys.Break -> ()
let _ = main ()