X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2Fhttp_getter%2Fmain.ml;h=3117a85c991eed1be6c3cf81d14d2048cfe45683;hb=4167cea65ca58897d1a3dbb81ff95de5074700cc;hp=974d029e5e28ca7aa22331d7045294f15c0e3c38;hpb=a3b2a5b4a985644c7bd7271b40b599e154d347ef;p=helm.git diff --git a/helm/http_getter/main.ml b/helm/http_getter/main.ml index 974d029e5..3117a85c9 100644 --- a/helm/http_getter/main.ml +++ b/helm/http_getter/main.ml @@ -1,42 +1,38 @@ -(* - * Copyright (C) 2003-2004: - * Stefano Zacchiroli - * for the HELM Team http://helm.cs.unibo.it/ +(* Copyright (C) 2003-2005, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://helm.cs.unibo.it/ + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ *) open Printf open Http_getter_common +open Http_getter_const open Http_getter_misc open Http_getter_types -open Http_getter_debugger (* constants *) -(* let configuration_file = "/projects/helm/etc/http_getter.conf.xml" *) -let configuration_file = "http_getter.conf.xml" +let configuration_file = BuildTimeOpts.conffile let common_headers = [ "Cache-Control", "no-cache"; @@ -71,64 +67,53 @@ let parse_output_format meth (req: Http_types.request) = | s when String.lowercase s = "xml" -> `Xml | s -> raise (Bad_request ("Invalid /" ^ meth ^ " format: " ^ s)) - (* parse "baseuri" format for /ls method, no default value *) -let parse_ls_uri = - let parse_ls_RE = Pcre.regexp "^(\\w+):(.*)$" in - let trailing_slash_RE = Pcre.regexp "/+$" in - let wrong_uri uri = - raise (Bad_request ("Invalid /ls baseuri: " ^ uri)) - in - fun (req: Http_types.request) -> - let baseuri = req#param "baseuri" in - try - let subs = - Pcre.extract ~rex:parse_ls_RE - (Pcre.replace ~rex:trailing_slash_RE baseuri) - in - (match (subs.(1), subs.(2)) with - | "cic", uri -> Cic uri - | "theory", uri -> Theory uri - | _ -> wrong_uri baseuri) - with Not_found -> wrong_uri baseuri - - (* parse "position" argument, default is 0 *) -let parse_position (req: Http_types.request) = - try - let res = int_of_string (req#param "position") in - if res < 0 then - raise (Failure "int_of_string"); - res - with - | Http_types.Param_not_found _ -> 0 - | Failure "int_of_string" -> - raise (Bad_request - (sprintf "position must be a non negative integer (%s given)" - (req#param "position"))) - -let parse_rdf_class (req: Http_types.request) = - match req#param "class" with - | "forward" -> `Forward - | "backward" -> `Backward - | c -> raise (Bad_request ("Invalid RDF class: " ^ c)) +let xml_escape = Netencoding.Html.encode ~in_enc:`Enc_utf8 () + +let html_tag ?exn () = + let xml_decl = "\n" in + match exn with + | Some (exn, arg) -> + let (exn, arg) = (xml_escape exn, xml_escape arg) in + sprintf + ("%s\n") + xml_decl xhtml_ns helm_ns exn arg + | None -> + sprintf "%s\n" + xml_decl xhtml_ns helm_ns let mk_return_fun pp_fun contype msg outchan = Http_daemon.respond ~body:(pp_fun msg) ~headers:["Content-Type", contype] outchan -let pp_error s = - sprintf "Http Getter error: %s" s -let pp_internal_error s = - sprintf "Http Getter Internal error: %s" s -let pp_msg s = sprintf "%s" s + +let pp_msg s = sprintf "%s%s" (html_tag ()) s let null_pp s = s -let return_html_error = mk_return_fun pp_error "text/html" -let return_html_internal_error = mk_return_fun pp_internal_error "text/html" -let return_html_msg = mk_return_fun pp_msg "text/html" -let return_html_raw = mk_return_fun null_pp "text/html" + +let return_html_error exn = + let pp_error s = + sprintf + ("%s\nHttp Getter error: %s" + ^^ "") + (html_tag ~exn ()) s + in + mk_return_fun pp_error "text/xml" + +let return_html_internal_error exn = + let pp_internal_error s = + sprintf + ("%s\nHttp Getter Internal error: %s" + ^^ "") + (html_tag ~exn ()) s + in + mk_return_fun pp_internal_error "text/xml" + +let return_html_msg = mk_return_fun pp_msg "text/xml" +let return_html_raw = mk_return_fun null_pp "text/xml" let return_xml_raw = mk_return_fun null_pp "text/xml" -let return_400 body outchan = Http_daemon.respond_error ~code:400 ~body outchan +let return_400 exn body = return_html_error exn body -let return_all_foo_uris doctype uris outchan = - Http_daemon.send_basic_headers ~code:200 outchan; +let return_all_uris doctype uris outchan = + Http_daemon.send_basic_headers ~code:(`Code 200) outchan; Http_daemon.send_header "Content-Type" "text/xml" outchan; Http_daemon.send_headers common_headers outchan; Http_daemon.send_CRLF outchan; @@ -150,23 +135,19 @@ let return_all_foo_uris doctype uris outchan = output_string outchan (sprintf "\n" doctype) let return_all_xml_uris fmt outchan = - let uris = Http_getter.getalluris () in + let uris = Http_getter.getalluris () in match fmt with - | `Text -> + | `Text -> let buf = Buffer.create 10240 in - List.iter (bprintf buf "%s\n") uris ; - let body = Buffer.contents buf in - Http_daemon.respond - ~headers:(("Content-Type", "text/plain") :: common_headers) - ~body outchan - | `Xml -> - return_all_foo_uris "alluris" uris outchan - -let return_all_rdf_uris classs outchan = - return_all_foo_uris "allrdfuris" (Http_getter.getallrdfuris classs) outchan - -let return_ls xmluri fmt outchan = - let ls_items = Http_getter.ls xmluri in + List.iter (bprintf buf "%s\n") uris ; + let body = Buffer.contents buf in + Http_daemon.respond + ~headers:(("Content-Type", "text/plain") :: common_headers) + ~body outchan + | `Xml -> return_all_uris "alluris" uris outchan + +let return_ls regexp fmt outchan = + let ls_items = Http_getter.ls regexp in let buf = Buffer.create 10240 in (match fmt with | `Text -> @@ -215,140 +196,157 @@ let return_resolve uri outchan = return_xml_raw (sprintf "\n" (Http_getter.resolve uri)) outchan - with Unresolvable_URI uri -> - return_xml_raw "\n" outchan - -let return_list_servers outchan = - return_html_raw - (sprintf "\n%s\n
" - (String.concat "\n" - (List.map - (fun (pos, server) -> - sprintf "%d%s" pos server) - (Http_getter.list_servers ())))) - outchan + with + | Unresolvable_URI _ -> return_xml_raw "\n" outchan + | Key_not_found _ -> return_xml_raw "\n" outchan -let log_failure msg = debug_print ("Request not fulfilled: " ^ msg) +let log_failure msg = Http_getter_logger.log ("Request not fulfilled: " ^ msg) - (** given an action (i.e. a function which expects a logger and do something - * using it as a logger), perform it sending its output incrementally to the - * given output channel. Response is sent embedded in an HTML document. - * Channel is closed afterwards. *) -let send_log_to ?prepend action outchan = - Http_daemon.send_basic_headers ~code:200 outchan; - Http_daemon.send_header "Content-Type" "text/html" outchan; - Http_daemon.send_CRLF outchan; - output_string outchan "\n"; flush outchan; - (match prepend with - | None -> () - | Some text -> output_string outchan text; flush outchan); - let logger tag = - output_string outchan (HelmLogger.html_of_html_tag tag); - flush outchan +let convert_file ~from_enc ~to_enc fname = + let remove f = fun () -> if Sys.file_exists f then Sys.remove f in + match from_enc, to_enc with + | `Normal, `Normal + | `Gzipped, `Gzipped -> fname, (fun () -> ()) + | `Normal, `Gzipped -> + let tmp = Http_getter_misc.tempfile () in + Http_getter_misc.gzip ~keep:true ~output:tmp fname; + tmp, remove tmp + | `Gzipped, `Normal -> + let tmp = Http_getter_misc.tempfile () in + Http_getter_misc.gunzip ~keep:true ~output:tmp fname; + tmp, remove tmp + +let is_gzip fname = Http_getter_misc.extension fname = ".gz" + +let patch_fun_for uri url = + let xmlbases = + if Http_getter_common.is_theory_uri uri then + Some (Filename.dirname uri, Filename.dirname url) + else + None + in + Http_getter_common.patch_xml ?xmlbases ~via_http:true () + +let respond_dtd patch_dtd fname outchan = + let via_http = false in + let patch_fun = + if patch_dtd then Some (Http_getter_common.patch_dtd ~via_http ()) + else None + in + Http_getter_common.return_file ~via_http:true ~fname ~contype:"text/plain" + ~gunzip:false ?patch_fun ~enc:`Normal outchan + +(* let respond_xsl + ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url outchan + = + let patch_fun = + if patch then Http_getter_common.patch_xsl ~via_http () else (fun x -> x) in - action logger; - output_string outchan "\n"; - close_out outchan + let fname = tempfile () in + finally (fun () -> Sys.remove fname) (lazy ( + wget ~output:fname url; + return_file ~via_http ~fname ~contype:"text/xml" ~patch_fun ~enc outchan + )) *) +(* | "/getxslt" -> + Http_getter_cache.respond_xsl + ~url:(Http_getter.resolve (req#param "uri")) + ~patch:(parse_patch req) outchan *) + +let respond_xslt patch_xslt xslt_name outchan = + let fname = Http_getter.getxslt xslt_name in + let patch_fun = + if patch_xslt then Some (Http_getter_common.patch_xsl ~via_http:true ()) + else None + in + Http_getter_common.return_file ~fname ~contype:"text/xml" ?patch_fun + ~gunzip:false ~via_http:true ~enc:`Normal outchan (* thread action *) let callback (req: Http_types.request) outchan = try - debug_print ("Connection from " ^ req#clientAddr); - debug_print ("Received request: " ^ req#path); + Http_getter_logger.log ("Connection from " ^ req#clientAddr); + Http_getter_logger.log ("Received request: " ^ req#uri); (match req#path with | "/help" -> return_help outchan | "/getxml" -> let uri = req#param "uri" in - Http_getter_cache.respond_xml ~url:(Http_getter.resolve uri) ~uri - ~enc:(parse_enc req) ~patch:(parse_patch req) outchan - | "/getxslt" -> - Http_getter_cache.respond_xsl - ~url:(Http_getter.resolve (req#param "uri")) - ~patch:(parse_patch req) outchan + let fname = Http_getter.getxml uri in (* local name, in cache *) + let remote_name = Http_getter.resolve uri in (* remote name *) + let src_enc = if is_gzip fname then `Gzipped else `Normal in + let enc = parse_enc req in + let fname, cleanup = convert_file ~from_enc:src_enc ~to_enc:enc fname in + let contenc = if enc = `Gzipped then Some "x-gzip" else None in + let patch_fun = + if parse_patch req + then Some (patch_fun_for uri remote_name) + else None + in + let gunzip = (enc = `Gzipped) in + (try + Http_getter_common.return_file + ~fname ~contype:"text/xml" ?contenc ?patch_fun ~gunzip + ~via_http:true ~enc outchan; + with exn -> cleanup (); raise exn); + cleanup () + | "/getxslt" -> respond_xslt (parse_patch req) (req#param "uri") outchan | "/getdtd" -> - Http_getter_cache.respond_dtd ~patch:(parse_patch req) - ~url:(sprintf "%s/%s" - (Helm_registry.get "getter.dtd_dir") (req#param "uri")) - outchan + let fname = Http_getter.getdtd (req#param "uri") in + respond_dtd (parse_patch req) fname outchan | "/resolve" -> return_resolve (req#param "uri") outchan - | "/register" -> - Http_getter.register ~uri:(req#param "uri") ~url:(req#param "url"); - return_html_msg "Register done" outchan | "/clean_cache" -> Http_getter.clean_cache (); return_html_msg "Done." outchan - | "/update" -> - Http_getter_env.reload (); (* reload servers list from servers file *) - send_log_to (fun logger -> Http_getter.update ~logger ()) outchan - | "/list_servers" -> return_list_servers outchan - | "/add_server" -> - let name = req#param "url" in - let position = parse_position req in - let prepend = - sprintf "Added server %s in position %d)
\n" name position - in - send_log_to ~prepend - (fun logger -> Http_getter.add_server ~logger ~position name) outchan - | "/remove_server" -> - let position = parse_position req in - if not (Http_getter.has_server position) then - raise (Bad_request (sprintf "no server with position %d" position)) - else - let prepend = - sprintf "Removed server at position %d
\n" position - in - send_log_to ~prepend - (fun logger -> Http_getter.remove_server ~logger position) outchan | "/getalluris" -> return_all_xml_uris (parse_output_format "getalluris" req) outchan - | "/getallrdfuris" -> return_all_rdf_uris (parse_rdf_class req) outchan | "/ls" -> - return_ls (parse_ls_uri req) (parse_output_format "ls" req) outchan + return_ls (req#param "baseuri") (parse_output_format "ls" req) outchan | "/getempty" -> Http_daemon.respond ~body:Http_getter_const.empty_xml outchan | invalid_request -> - Http_daemon.respond_error ~status:(`Client_error `Bad_request) outchan); - debug_print "Done!\n" + Http_daemon.respond_error ~code:(`Status (`Client_error `Bad_request)) + outchan); + Http_getter_logger.log "Done!\n" with | Http_types.Param_not_found attr_name -> let msg = sprintf "Parameter '%s' is missing" attr_name in log_failure msg; - return_400 msg outchan + return_400 ("bad_request", msg) msg outchan | Bad_request msg -> log_failure msg; - return_html_error msg outchan + return_html_error ("bad_request", msg) msg outchan | Internal_error msg -> log_failure msg; - return_html_internal_error msg outchan - | Shell.Subprocess_error l -> - let msgs = - List.map - (fun (cmd, code) -> - sprintf "Command '%s' returned %s" cmd (string_of_proc_status code)) - l - in - log_failure (String.concat ", " msgs); - return_html_internal_error (String.concat "
\n" msgs) outchan - | exc -> - let msg = "Uncaught exception: " ^ (Printexc.to_string exc) in - log_failure msg; - return_html_error msg outchan + return_html_internal_error ("internal_error", msg) msg outchan + | exn -> + let msg = "uncaught exception: " ^ (Printexc.to_string exn) in + (match exn with + | Http_getter_types.Key_not_found uri -> + return_html_error ("key_not_found", uri) msg outchan + | _ -> + log_failure msg; + return_html_error ("uncaught_exception", msg) msg outchan) - (* Main *) +let batch_update = ref false +let args = [ ] + + (* Main *) let main () = + Arg.parse args (fun _-> ()) "http_getter honors the following options:\n"; Helm_registry.load_from configuration_file; - Http_getter_env.reload (); + Http_getter.init (); print_string (Http_getter_env.env_to_string ()); flush stdout; - at_exit Http_getter.close_maps; Sys.catch_break true; + let d_spec = Http_daemon.daemon_spec + ~mode:`Thread ~timeout:(Some 600) + ~port:(Lazy.force Http_getter_env.port) + ~callback:callback () + in try - Http_daemon.start' ~mode:`Thread - ~timeout:(Some 600) ~port:(Helm_registry.get_int "getter.port") - callback - with Sys.Break -> () (* 'close_maps' already registered with 'at_exit' *) + Http_daemon.main d_spec + with Sys.Break -> () let _ = main ()