X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2Fhttp_getter%2Fmain.ml;h=3117a85c991eed1be6c3cf81d14d2048cfe45683;hb=4167cea65ca58897d1a3dbb81ff95de5074700cc;hp=7722a2cdce135626a1d883a8e0c0d62deaf8e7d2;hpb=024e92b8b3bf29e41ce50004c37d884baa1db847;p=helm.git diff --git a/helm/http_getter/main.ml b/helm/http_getter/main.ml index 7722a2cdc..3117a85c9 100644 --- a/helm/http_getter/main.ml +++ b/helm/http_getter/main.ml @@ -1,41 +1,38 @@ -(* - * Copyright (C) 2003-2004: - * Stefano Zacchiroli - * for the HELM Team http://helm.cs.unibo.it/ +(* Copyright (C) 2003-2005, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://helm.cs.unibo.it/ + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ *) open Printf open Http_getter_common +open Http_getter_const open Http_getter_misc open Http_getter_types -open Http_getter_debugger (* constants *) -let configuration_file = "/projects/helm/etc/http_getter.conf.xml" +let configuration_file = BuildTimeOpts.conffile let common_headers = [ "Cache-Control", "no-cache"; @@ -45,14 +42,14 @@ let common_headers = [ (* HTTP queries argument parsing *) - (* parse encoding ("format" parameter), default is Enc_normal *) + (* parse encoding ("format" parameter), default is `Normal *) let parse_enc (req: Http_types.request) = try (match req#param "format" with - | "normal" -> Enc_normal - | "gz" -> Enc_gzipped + | "normal" -> `Normal + | "gz" -> `Gzipped | s -> raise (Bad_request ("Invalid format: " ^ s))) - with Http_types.Param_not_found _ -> Enc_normal + with Http_types.Param_not_found _ -> `Normal (* parse "patch_dtd" parameter, default is true *) let parse_patch (req: Http_types.request) = @@ -64,54 +61,59 @@ let parse_patch (req: Http_types.request) = with Http_types.Param_not_found _ -> true (* parse output format ("format" parameter), no default value *) -let parse_output_format (req: Http_types.request) = +let parse_output_format meth (req: Http_types.request) = match req#param "format" with - | s when String.lowercase s = "txt" -> Fmt_text - | s when String.lowercase s = "xml" -> Fmt_xml - | s -> raise (Bad_request ("Invalid /ls format: " ^ s)) - - (* parse "baseuri" format for /ls method, no default value *) -let parse_ls_uri = - let parse_ls_RE = Pcre.regexp "^(\\w+):(.*)$" in - let trailing_slash_RE = Pcre.regexp "/+$" in - let wrong_uri uri = - raise (Bad_request ("Invalid /ls baseuri: " ^ uri)) + | s when String.lowercase s = "txt" -> `Text + | s when String.lowercase s = "xml" -> `Xml + | s -> raise (Bad_request ("Invalid /" ^ meth ^ " format: " ^ s)) + +let xml_escape = Netencoding.Html.encode ~in_enc:`Enc_utf8 () + +let html_tag ?exn () = + let xml_decl = "\n" in + match exn with + | Some (exn, arg) -> + let (exn, arg) = (xml_escape exn, xml_escape arg) in + sprintf + ("%s\n") + xml_decl xhtml_ns helm_ns exn arg + | None -> + sprintf "%s\n" + xml_decl xhtml_ns helm_ns + +let mk_return_fun pp_fun contype msg outchan = + Http_daemon.respond + ~body:(pp_fun msg) ~headers:["Content-Type", contype] outchan + +let pp_msg s = sprintf "%s%s" (html_tag ()) s +let null_pp s = s + +let return_html_error exn = + let pp_error s = + sprintf + ("%s\nHttp Getter error: %s" + ^^ "") + (html_tag ~exn ()) s in - fun (req: Http_types.request) -> - let baseuri = req#param "baseuri" in - try - let subs = - Pcre.extract ~rex:parse_ls_RE - (Pcre.replace ~rex:trailing_slash_RE baseuri) - in - (match (subs.(1), subs.(2)) with - | "cic", uri -> Cic uri - | "theory", uri -> Theory uri - | _ -> wrong_uri baseuri) - with Not_found -> wrong_uri baseuri - - (* parse "position" argument, default is 0 *) -let parse_position (req: Http_types.request) = - try - let res = int_of_string (req#param "position") in - if res < 0 then - raise (Failure "int_of_string"); - res - with - | Http_types.Param_not_found _ -> 0 - | Failure "int_of_string" -> - raise (Bad_request - (sprintf "position must be a non negative integer (%s given)" - (req#param "position"))) - -let parse_rdf_class (req: Http_types.request) = - match req#param "class" with - | "forward" -> `Forward - | "backward" -> `Backward - | c -> raise (Bad_request ("Invalid RDF class: " ^ c)) - -let return_all_foo_uris doctype uris outchan = - Http_daemon.send_basic_headers ~code:200 outchan; + mk_return_fun pp_error "text/xml" + +let return_html_internal_error exn = + let pp_internal_error s = + sprintf + ("%s\nHttp Getter Internal error: %s" + ^^ "") + (html_tag ~exn ()) s + in + mk_return_fun pp_internal_error "text/xml" + +let return_html_msg = mk_return_fun pp_msg "text/xml" +let return_html_raw = mk_return_fun null_pp "text/xml" +let return_xml_raw = mk_return_fun null_pp "text/xml" +let return_400 exn body = return_html_error exn body + +let return_all_uris doctype uris outchan = + Http_daemon.send_basic_headers ~code:(`Code 200) outchan; Http_daemon.send_header "Content-Type" "text/xml" outchan; Http_daemon.send_headers common_headers outchan; Http_daemon.send_CRLF outchan; @@ -132,16 +134,23 @@ let return_all_foo_uris doctype uris outchan = uris; output_string outchan (sprintf "\n" doctype) -let return_all_xml_uris outchan = - return_all_foo_uris "alluris" (Http_getter.getalluris ()) outchan -let return_all_rdf_uris classs outchan = - return_all_foo_uris "allrdfuris" (Http_getter.getallrdfuris classs) outchan +let return_all_xml_uris fmt outchan = + let uris = Http_getter.getalluris () in + match fmt with + | `Text -> + let buf = Buffer.create 10240 in + List.iter (bprintf buf "%s\n") uris ; + let body = Buffer.contents buf in + Http_daemon.respond + ~headers:(("Content-Type", "text/plain") :: common_headers) + ~body outchan + | `Xml -> return_all_uris "alluris" uris outchan -let return_ls xmluri fmt outchan = - let ls_items = Http_getter.ls xmluri in +let return_ls regexp fmt outchan = + let ls_items = Http_getter.ls regexp in let buf = Buffer.create 10240 in (match fmt with - | Fmt_text -> + | `Text -> List.iter (function | Ls_section dir -> bprintf buf "dir, %s\n" dir @@ -152,7 +161,7 @@ let return_ls xmluri fmt outchan = (string_of_ls_flag obj.body) (string_of_ls_flag obj.proof_tree)) ls_items - | Fmt_xml -> + | `Xml -> Buffer.add_string buf "\n"; bprintf buf "\n" (Lazy.force Http_getter_env.my_own_url); @@ -187,113 +196,157 @@ let return_resolve uri outchan = return_xml_raw (sprintf "\n" (Http_getter.resolve uri)) outchan - with Unresolvable_URI uri -> - return_xml_raw "\n" outchan - -let return_list_servers outchan = - return_html_raw - (sprintf "\n%s\n
" - (String.concat "\n" - (List.map - (fun (pos, server) -> - sprintf "%d%s" pos server) - (Http_getter.list_servers ())))) - outchan + with + | Unresolvable_URI _ -> return_xml_raw "\n" outchan + | Key_not_found _ -> return_xml_raw "\n" outchan + +let log_failure msg = Http_getter_logger.log ("Request not fulfilled: " ^ msg) + +let convert_file ~from_enc ~to_enc fname = + let remove f = fun () -> if Sys.file_exists f then Sys.remove f in + match from_enc, to_enc with + | `Normal, `Normal + | `Gzipped, `Gzipped -> fname, (fun () -> ()) + | `Normal, `Gzipped -> + let tmp = Http_getter_misc.tempfile () in + Http_getter_misc.gzip ~keep:true ~output:tmp fname; + tmp, remove tmp + | `Gzipped, `Normal -> + let tmp = Http_getter_misc.tempfile () in + Http_getter_misc.gunzip ~keep:true ~output:tmp fname; + tmp, remove tmp + +let is_gzip fname = Http_getter_misc.extension fname = ".gz" + +let patch_fun_for uri url = + let xmlbases = + if Http_getter_common.is_theory_uri uri then + Some (Filename.dirname uri, Filename.dirname url) + else + None + in + Http_getter_common.patch_xml ?xmlbases ~via_http:true () + +let respond_dtd patch_dtd fname outchan = + let via_http = false in + let patch_fun = + if patch_dtd then Some (Http_getter_common.patch_dtd ~via_http ()) + else None + in + Http_getter_common.return_file ~via_http:true ~fname ~contype:"text/plain" + ~gunzip:false ?patch_fun ~enc:`Normal outchan + +(* let respond_xsl + ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url outchan + = + let patch_fun = + if patch then Http_getter_common.patch_xsl ~via_http () else (fun x -> x) + in + let fname = tempfile () in + finally (fun () -> Sys.remove fname) (lazy ( + wget ~output:fname url; + return_file ~via_http ~fname ~contype:"text/xml" ~patch_fun ~enc outchan + )) *) +(* | "/getxslt" -> + Http_getter_cache.respond_xsl + ~url:(Http_getter.resolve (req#param "uri")) + ~patch:(parse_patch req) outchan *) + +let respond_xslt patch_xslt xslt_name outchan = + let fname = Http_getter.getxslt xslt_name in + let patch_fun = + if patch_xslt then Some (Http_getter_common.patch_xsl ~via_http:true ()) + else None + in + Http_getter_common.return_file ~fname ~contype:"text/xml" ?patch_fun + ~gunzip:false ~via_http:true ~enc:`Normal outchan (* thread action *) let callback (req: Http_types.request) outchan = try - debug_print ("Connection from " ^ req#clientAddr); - debug_print ("Received request: " ^ req#path); + Http_getter_logger.log ("Connection from " ^ req#clientAddr); + Http_getter_logger.log ("Received request: " ^ req#uri); (match req#path with | "/help" -> return_help outchan | "/getxml" -> let uri = req#param "uri" in - Http_getter_cache.respond_xml ~url:(Http_getter.resolve uri) ~uri - ~enc:(parse_enc req) ~patch:(parse_patch req) outchan - | "/getxslt" -> - Http_getter_cache.respond_xsl - ~url:(Http_getter.resolve (req#param "uri")) - ~patch:(parse_patch req) outchan + let fname = Http_getter.getxml uri in (* local name, in cache *) + let remote_name = Http_getter.resolve uri in (* remote name *) + let src_enc = if is_gzip fname then `Gzipped else `Normal in + let enc = parse_enc req in + let fname, cleanup = convert_file ~from_enc:src_enc ~to_enc:enc fname in + let contenc = if enc = `Gzipped then Some "x-gzip" else None in + let patch_fun = + if parse_patch req + then Some (patch_fun_for uri remote_name) + else None + in + let gunzip = (enc = `Gzipped) in + (try + Http_getter_common.return_file + ~fname ~contype:"text/xml" ?contenc ?patch_fun ~gunzip + ~via_http:true ~enc outchan; + with exn -> cleanup (); raise exn); + cleanup () + | "/getxslt" -> respond_xslt (parse_patch req) (req#param "uri") outchan | "/getdtd" -> - Http_getter_cache.respond_dtd ~patch:(parse_patch req) - ~url:(sprintf "%s/%s" - (Helm_registry.get "getter.dtd_dir") (req#param "uri")) - outchan + let fname = Http_getter.getdtd (req#param "uri") in + respond_dtd (parse_patch req) fname outchan | "/resolve" -> return_resolve (req#param "uri") outchan - | "/register" -> - Http_getter.register ~uri:(req#param "uri") ~url:(req#param "url"); - return_html_msg "Register done" outchan | "/clean_cache" -> Http_getter.clean_cache (); return_html_msg "Done." outchan - | "/update" -> - Http_getter_env.reload (); (* reload servers list from servers file *) - let log = Http_getter.update () in - return_html_msg (HelmLogger.html_of_html_msg log) outchan - | "/list_servers" -> return_list_servers outchan - | "/add_server" -> - let name = req#param "url" in - let position = parse_position req in - let log = Http_getter.add_server ~position name in - return_html_msg - (sprintf "Added server %s in position %d)
\n%s" - name position (HelmLogger.html_of_html_msg log)) - outchan - | "/remove_server" -> - let position = parse_position req in - let log = - try - Http_getter.remove_server position - with Invalid_argument _ -> - raise (Bad_request (sprintf "no server with position %d" position)) - in - return_html_msg - (sprintf "Removed server at position %d
\n%s" - position (HelmLogger.html_of_html_msg log)) - outchan - | "/getalluris" -> return_all_xml_uris outchan - | "/getallrdfuris" -> return_all_rdf_uris (parse_rdf_class req) outchan - | "/ls" -> return_ls (parse_ls_uri req) (parse_output_format req) outchan + | "/getalluris" -> + return_all_xml_uris (parse_output_format "getalluris" req) outchan + | "/ls" -> + return_ls (req#param "baseuri") (parse_output_format "ls" req) outchan | "/getempty" -> Http_daemon.respond ~body:Http_getter_const.empty_xml outchan | invalid_request -> - Http_daemon.respond_error ~status:(`Client_error `Bad_request) outchan); - debug_print "Done!\n" + Http_daemon.respond_error ~code:(`Status (`Client_error `Bad_request)) + outchan); + Http_getter_logger.log "Done!\n" with | Http_types.Param_not_found attr_name -> - return_400 (sprintf "Parameter '%s' is missing" attr_name) outchan - | Bad_request msg -> return_html_error msg outchan - | Internal_error msg -> return_html_internal_error msg outchan - | Shell.Subprocess_error l -> - return_html_internal_error - (String.concat "
\n" - (List.map - (fun (cmd, code) -> - sprintf "Command '%s' returned %s" - cmd (string_of_proc_status code)) - l)) - outchan - | exc -> - return_html_error - ("Uncaught exception: " ^ (Printexc.to_string exc)) - outchan + let msg = sprintf "Parameter '%s' is missing" attr_name in + log_failure msg; + return_400 ("bad_request", msg) msg outchan + | Bad_request msg -> + log_failure msg; + return_html_error ("bad_request", msg) msg outchan + | Internal_error msg -> + log_failure msg; + return_html_internal_error ("internal_error", msg) msg outchan + | exn -> + let msg = "uncaught exception: " ^ (Printexc.to_string exn) in + (match exn with + | Http_getter_types.Key_not_found uri -> + return_html_error ("key_not_found", uri) msg outchan + | _ -> + log_failure msg; + return_html_error ("uncaught_exception", msg) msg outchan) - (* Main *) +let batch_update = ref false +let args = [ ] + + (* Main *) let main () = + Arg.parse args (fun _-> ()) "http_getter honors the following options:\n"; Helm_registry.load_from configuration_file; - Http_getter_env.reload (); + Http_getter.init (); print_string (Http_getter_env.env_to_string ()); flush stdout; - at_exit Http_getter.close_maps; Sys.catch_break true; + let d_spec = Http_daemon.daemon_spec + ~mode:`Thread ~timeout:(Some 600) + ~port:(Lazy.force Http_getter_env.port) + ~callback:callback () + in try - Http_daemon.start' ~mode:`Thread - ~timeout:(Some 600) ~port:(Helm_registry.get_int "getter.port") - callback - with Sys.Break -> () (* 'close_maps' already registered with 'at_exit' *) + Http_daemon.main d_spec + with Sys.Break -> () let _ = main ()