X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2Fhttp_getter%2Fmain.ml;h=3117a85c991eed1be6c3cf81d14d2048cfe45683;hb=4167cea65ca58897d1a3dbb81ff95de5074700cc;hp=5f7e8cd93582e94a90dd9bf27619266031451ab8;hpb=52d40595f2cc59fb58a9c544041fccc59f223a58;p=helm.git diff --git a/helm/http_getter/main.ml b/helm/http_getter/main.ml index 5f7e8cd93..3117a85c9 100644 --- a/helm/http_getter/main.ml +++ b/helm/http_getter/main.ml @@ -1,29 +1,26 @@ -(* - * Copyright (C) 2003-2004: - * Stefano Zacchiroli - * for the HELM Team http://helm.cs.unibo.it/ +(* Copyright (C) 2003-2005, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://helm.cs.unibo.it/ + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ *) open Printf @@ -70,26 +67,6 @@ let parse_output_format meth (req: Http_types.request) = | s when String.lowercase s = "xml" -> `Xml | s -> raise (Bad_request ("Invalid /" ^ meth ^ " format: " ^ s)) - (* parse "position" argument, default is 0 *) -let parse_position (req: Http_types.request) = - try - let res = int_of_string (req#param "position") in - if res < 0 then - raise (Failure "int_of_string"); - res - with - | Http_types.Param_not_found _ -> 0 - | Failure "int_of_string" -> - raise (Bad_request - (sprintf "position must be a non negative integer (%s given)" - (req#param "position"))) - -let parse_rdf_class (req: Http_types.request) = - match req#param "class" with - | "forward" -> `Forward - | "backward" -> `Backward - | c -> raise (Bad_request ("Invalid RDF class: " ^ c)) - let xml_escape = Netencoding.Html.encode ~in_enc:`Enc_utf8 () let html_tag ?exn () = @@ -98,7 +75,8 @@ let html_tag ?exn () = | Some (exn, arg) -> let (exn, arg) = (xml_escape exn, xml_escape arg) in sprintf - "%s\n" + ("%s\n") xml_decl xhtml_ns helm_ns exn arg | None -> sprintf "%s\n" @@ -107,26 +85,34 @@ let html_tag ?exn () = let mk_return_fun pp_fun contype msg outchan = Http_daemon.respond ~body:(pp_fun msg) ~headers:["Content-Type", contype] outchan + let pp_msg s = sprintf "%s%s" (html_tag ()) s let null_pp s = s + let return_html_error exn = let pp_error s = - sprintf "%s\nHttp Getter error: %s" + sprintf + ("%s\nHttp Getter error: %s" + ^^ "") (html_tag ~exn ()) s in mk_return_fun pp_error "text/xml" + let return_html_internal_error exn = let pp_internal_error s = - sprintf "%s\nHttp Getter Internal error: %s" + sprintf + ("%s\nHttp Getter Internal error: %s" + ^^ "") (html_tag ~exn ()) s in mk_return_fun pp_internal_error "text/xml" + let return_html_msg = mk_return_fun pp_msg "text/xml" let return_html_raw = mk_return_fun null_pp "text/xml" let return_xml_raw = mk_return_fun null_pp "text/xml" let return_400 exn body = return_html_error exn body -let return_all_foo_uris doctype uris outchan = +let return_all_uris doctype uris outchan = Http_daemon.send_basic_headers ~code:(`Code 200) outchan; Http_daemon.send_header "Content-Type" "text/xml" outchan; Http_daemon.send_headers common_headers outchan; @@ -149,20 +135,16 @@ let return_all_foo_uris doctype uris outchan = output_string outchan (sprintf "\n" doctype) let return_all_xml_uris fmt outchan = - let uris = Http_getter.getalluris () in + let uris = Http_getter.getalluris () in match fmt with - | `Text -> + | `Text -> let buf = Buffer.create 10240 in - List.iter (bprintf buf "%s\n") uris ; - let body = Buffer.contents buf in - Http_daemon.respond - ~headers:(("Content-Type", "text/plain") :: common_headers) - ~body outchan - | `Xml -> - return_all_foo_uris "alluris" uris outchan - -let return_all_rdf_uris classs outchan = - return_all_foo_uris "allrdfuris" (Http_getter.getallrdfuris classs) outchan + List.iter (bprintf buf "%s\n") uris ; + let body = Buffer.contents buf in + Http_daemon.respond + ~headers:(("Content-Type", "text/plain") :: common_headers) + ~body outchan + | `Xml -> return_all_uris "alluris" uris outchan let return_ls regexp fmt outchan = let ls_items = Http_getter.ls regexp in @@ -218,96 +200,105 @@ let return_resolve uri outchan = | Unresolvable_URI _ -> return_xml_raw "\n" outchan | Key_not_found _ -> return_xml_raw "\n" outchan -let return_list_servers outchan = - return_html_raw - (sprintf "%s\n%s\n
" - (html_tag ()) - (String.concat "\n" - (List.map - (fun (pos, server) -> - sprintf "%d%s" pos server) - (Http_getter.list_servers ())))) - outchan - let log_failure msg = Http_getter_logger.log ("Request not fulfilled: " ^ msg) - (** given an action (i.e. a function which expects a logger and do something - * using it as a logger), perform it sending its output incrementally to the - * given output channel. Response is sent embedded in an HTML document. - * Channel is closed afterwards. *) -let send_log_to ?prepend action outchan = - Http_daemon.send_basic_headers ~code:(`Code 200) outchan; - Http_daemon.send_header "Content-Type" "text/xml" outchan; - Http_daemon.send_CRLF outchan; - output_string outchan (sprintf "%s\n" (html_tag ())); - flush outchan; - (match prepend with - | None -> () - | Some text -> output_string outchan text; flush outchan); - let logger tag = - output_string outchan (HelmLogger.html_of_html_tag tag); - flush outchan +let convert_file ~from_enc ~to_enc fname = + let remove f = fun () -> if Sys.file_exists f then Sys.remove f in + match from_enc, to_enc with + | `Normal, `Normal + | `Gzipped, `Gzipped -> fname, (fun () -> ()) + | `Normal, `Gzipped -> + let tmp = Http_getter_misc.tempfile () in + Http_getter_misc.gzip ~keep:true ~output:tmp fname; + tmp, remove tmp + | `Gzipped, `Normal -> + let tmp = Http_getter_misc.tempfile () in + Http_getter_misc.gunzip ~keep:true ~output:tmp fname; + tmp, remove tmp + +let is_gzip fname = Http_getter_misc.extension fname = ".gz" + +let patch_fun_for uri url = + let xmlbases = + if Http_getter_common.is_theory_uri uri then + Some (Filename.dirname uri, Filename.dirname url) + else + None + in + Http_getter_common.patch_xml ?xmlbases ~via_http:true () + +let respond_dtd patch_dtd fname outchan = + let via_http = false in + let patch_fun = + if patch_dtd then Some (Http_getter_common.patch_dtd ~via_http ()) + else None + in + Http_getter_common.return_file ~via_http:true ~fname ~contype:"text/plain" + ~gunzip:false ?patch_fun ~enc:`Normal outchan + +(* let respond_xsl + ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url outchan + = + let patch_fun = + if patch then Http_getter_common.patch_xsl ~via_http () else (fun x -> x) in - action logger; - output_string outchan "\n"; - close_out outchan + let fname = tempfile () in + finally (fun () -> Sys.remove fname) (lazy ( + wget ~output:fname url; + return_file ~via_http ~fname ~contype:"text/xml" ~patch_fun ~enc outchan + )) *) +(* | "/getxslt" -> + Http_getter_cache.respond_xsl + ~url:(Http_getter.resolve (req#param "uri")) + ~patch:(parse_patch req) outchan *) + +let respond_xslt patch_xslt xslt_name outchan = + let fname = Http_getter.getxslt xslt_name in + let patch_fun = + if patch_xslt then Some (Http_getter_common.patch_xsl ~via_http:true ()) + else None + in + Http_getter_common.return_file ~fname ~contype:"text/xml" ?patch_fun + ~gunzip:false ~via_http:true ~enc:`Normal outchan (* thread action *) let callback (req: Http_types.request) outchan = try Http_getter_logger.log ("Connection from " ^ req#clientAddr); - Http_getter_logger.log ("Received request: " ^ req#path); + Http_getter_logger.log ("Received request: " ^ req#uri); (match req#path with | "/help" -> return_help outchan | "/getxml" -> let uri = req#param "uri" in - Http_getter_cache.respond_xml ~url:(Http_getter.resolve uri) ~uri - ~enc:(parse_enc req) ~patch:(parse_patch req) outchan - | "/getxslt" -> - Http_getter_cache.respond_xsl - ~url:(Http_getter.resolve (req#param "uri")) - ~patch:(parse_patch req) outchan + let fname = Http_getter.getxml uri in (* local name, in cache *) + let remote_name = Http_getter.resolve uri in (* remote name *) + let src_enc = if is_gzip fname then `Gzipped else `Normal in + let enc = parse_enc req in + let fname, cleanup = convert_file ~from_enc:src_enc ~to_enc:enc fname in + let contenc = if enc = `Gzipped then Some "x-gzip" else None in + let patch_fun = + if parse_patch req + then Some (patch_fun_for uri remote_name) + else None + in + let gunzip = (enc = `Gzipped) in + (try + Http_getter_common.return_file + ~fname ~contype:"text/xml" ?contenc ?patch_fun ~gunzip + ~via_http:true ~enc outchan; + with exn -> cleanup (); raise exn); + cleanup () + | "/getxslt" -> respond_xslt (parse_patch req) (req#param "uri") outchan | "/getdtd" -> - Http_getter_cache.respond_dtd ~patch:(parse_patch req) - ~url:(sprintf "%s/%s" - (Lazy.force Http_getter_env.dtd_dir) (req#param "uri")) - outchan + let fname = Http_getter.getdtd (req#param "uri") in + respond_dtd (parse_patch req) fname outchan | "/resolve" -> return_resolve (req#param "uri") outchan - | "/register" -> - Http_getter.register ~uri:(req#param "uri") ~url:(req#param "url"); - return_html_msg "Register done" outchan - | "/unregister" -> - Http_getter.unregister (req#param "uri"); - return_html_msg "Unregister done" outchan | "/clean_cache" -> Http_getter.clean_cache (); return_html_msg "Done." outchan - | "/update" -> - Http_getter_env.reload (); (* reload servers list from servers file *) - send_log_to (fun logger -> Http_getter.update ~logger ()) outchan - | "/list_servers" -> return_list_servers outchan - | "/add_server" -> - let name = req#param "url" in - let position = parse_position req in - let prepend = - sprintf "Added server %s in position %d)
\n" name position - in - send_log_to ~prepend - (fun logger -> Http_getter.add_server ~logger ~position name) outchan - | "/remove_server" -> - let position = parse_position req in - if not (Http_getter.has_server position) then - raise (Bad_request (sprintf "no server with position %d" position)) - else - let prepend = - sprintf "Removed server at position %d
\n" position - in - send_log_to ~prepend - (fun logger -> Http_getter.remove_server ~logger position) outchan | "/getalluris" -> return_all_xml_uris (parse_output_format "getalluris" req) outchan - | "/getallrdfuris" -> return_all_rdf_uris (parse_rdf_class req) outchan | "/ls" -> return_ls (req#param "baseuri") (parse_output_format "ls" req) outchan | "/getempty" -> @@ -327,20 +318,9 @@ let callback (req: Http_types.request) outchan = | Internal_error msg -> log_failure msg; return_html_internal_error ("internal_error", msg) msg outchan - | Shell.Subprocess_error l -> - let msgs = - List.map - (fun (cmd, code) -> - sprintf "Command '%s' returned %s" cmd (string_of_proc_status code)) - l - in - let msg = String.concat ", " msgs in - log_failure msg; - return_html_internal_error ("subprocess_error", msg) - (String.concat "
\n" msgs) outchan - | exc -> - let msg = "uncaught exception: " ^ (Printexc.to_string exc) in - (match exc with + | exn -> + let msg = "uncaught exception: " ^ (Printexc.to_string exn) in + (match exn with | Http_getter_types.Key_not_found uri -> return_html_error ("key_not_found", uri) msg outchan | _ -> @@ -349,33 +329,24 @@ let callback (req: Http_types.request) outchan = let batch_update = ref false -let args = [ - ("-update", - Arg.Unit (fun () -> batch_update := true), - "\tupdate maps and exit"); -] +let args = [ ] (* Main *) let main () = - Arg.parse args (fun _->()) "http_getter honors the following options:\n"; + Arg.parse args (fun _-> ()) "http_getter honors the following options:\n"; Helm_registry.load_from configuration_file; Http_getter.init (); print_string (Http_getter_env.env_to_string ()); flush stdout; - if !batch_update then (* batch mode: performs update and exit *) - Http_getter.update ~logger:Http_getter.stdout_logger () - else begin (* daemon mode: start http daemon *) - at_exit Http_getter.close_maps; - Sys.catch_break true; - let d_spec = Http_daemon.daemon_spec - ~mode:`Thread ~timeout:(Some 600) - ~port:(Lazy.force Http_getter_env.port) - ~callback:callback () - in - try - Http_daemon.main d_spec - with Sys.Break -> () (* 'close_maps' already registered with 'at_exit' *) - end + Sys.catch_break true; + let d_spec = Http_daemon.daemon_spec + ~mode:`Thread ~timeout:(Some 600) + ~port:(Lazy.force Http_getter_env.port) + ~callback:callback () + in + try + Http_daemon.main d_spec + with Sys.Break -> () let _ = main ()