(* Copyright (C) 2003-2005, HELM Team. * * This file is part of HELM, an Hypertextual, Electronic * Library of Mathematics, developed at the Computer Science * Department, University of Bologna, Italy. * * HELM is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * HELM is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HELM; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, * MA 02111-1307, USA. * * For details, see the HELM World-Wide-Web page, * http://helm.cs.unibo.it/ *) open Printf open Http_getter_common open Http_getter_const open Http_getter_misc open Http_getter_types (* constants *) let configuration_file = BuildTimeOpts.conffile let common_headers = [ "Cache-Control", "no-cache"; "Pragma", "no-cache"; "Expires", "0" ] (* HTTP queries argument parsing *) (* parse encoding ("format" parameter), default is `Normal *) let parse_enc (req: Http_types.request) = try (match req#param "format" with | "normal" -> `Normal | "gz" -> `Gzipped | s -> raise (Bad_request ("Invalid format: " ^ s))) with Http_types.Param_not_found _ -> `Normal (* parse "patch_dtd" parameter, default is true *) let parse_patch (req: Http_types.request) = try (match req#param "patch_dtd" with | s when String.lowercase s = "yes" -> true | s when String.lowercase s = "no" -> false | s -> raise (Bad_request ("Invalid patch_dtd value: " ^ s))) with Http_types.Param_not_found _ -> true (* parse output format ("format" parameter), no default value *) let parse_output_format meth (req: Http_types.request) = match req#param "format" with | s when String.lowercase s = "txt" -> `Text | s when String.lowercase s = "xml" -> `Xml | s -> raise (Bad_request ("Invalid /" ^ meth ^ " format: " ^ s)) let xml_escape = Netencoding.Html.encode ~in_enc:`Enc_utf8 () let html_tag ?exn () = let xml_decl = "\n" in match exn with | Some (exn, arg) -> let (exn, arg) = (xml_escape exn, xml_escape arg) in sprintf ("%s\n") xml_decl xhtml_ns helm_ns exn arg | None -> sprintf "%s\n" xml_decl xhtml_ns helm_ns let mk_return_fun pp_fun contype msg outchan = Http_daemon.respond ~body:(pp_fun msg) ~headers:["Content-Type", contype] outchan let pp_msg s = sprintf "%s%s" (html_tag ()) s let null_pp s = s let return_html_error exn = let pp_error s = sprintf ("%s\nHttp Getter error: %s" ^^ "") (html_tag ~exn ()) s in mk_return_fun pp_error "text/xml" let return_html_internal_error exn = let pp_internal_error s = sprintf ("%s\nHttp Getter Internal error: %s" ^^ "") (html_tag ~exn ()) s in mk_return_fun pp_internal_error "text/xml" let return_html_msg = mk_return_fun pp_msg "text/xml" let return_html_raw = mk_return_fun null_pp "text/xml" let return_xml_raw = mk_return_fun null_pp "text/xml" let return_400 exn body = return_html_error exn body let return_all_uris doctype uris outchan = Http_daemon.send_basic_headers ~code:(`Code 200) outchan; Http_daemon.send_header "Content-Type" "text/xml" outchan; Http_daemon.send_headers common_headers outchan; Http_daemon.send_CRLF outchan; output_string outchan (sprintf " <%s> " doctype (Lazy.force Http_getter_env.my_own_url) doctype doctype); List.iter (fun uri -> output_string outchan (sprintf "\t\n" uri)) uris; output_string outchan (sprintf "\n" doctype) let return_all_xml_uris fmt outchan = let uris = Http_getter.getalluris () in match fmt with | `Text -> let buf = Buffer.create 10240 in List.iter (bprintf buf "%s\n") uris ; let body = Buffer.contents buf in Http_daemon.respond ~headers:(("Content-Type", "text/plain") :: common_headers) ~body outchan | `Xml -> return_all_uris "alluris" uris outchan let return_ls regexp fmt outchan = let ls_items = Http_getter.ls regexp in let buf = Buffer.create 10240 in (match fmt with | `Text -> List.iter (function | Ls_section dir -> bprintf buf "dir, %s\n" dir | Ls_object obj -> bprintf buf "object, %s, <%s,%s,%s,%s>\n" obj.uri (if obj.ann then "YES" else "NO") (string_of_ls_flag obj.types) (string_of_ls_flag obj.body) (string_of_ls_flag obj.proof_tree)) ls_items | `Xml -> Buffer.add_string buf "\n"; bprintf buf "\n" (Lazy.force Http_getter_env.my_own_url); Buffer.add_string buf "\n"; List.iter (function | Ls_section dir -> bprintf buf "
%s
\n" dir | Ls_object obj -> bprintf buf " \t \t \t \t " obj.uri (if obj.ann then "YES" else "NO") (string_of_ls_flag obj.types) (string_of_ls_flag obj.body) (string_of_ls_flag obj.proof_tree)) ls_items; Buffer.add_string buf "
\n"); let body = Buffer.contents buf in Http_daemon.respond ~headers:(("Content-Type", "text/plain") :: common_headers) ~body outchan let return_help outchan = return_html_raw (Http_getter.help ()) outchan let return_resolve writable uri outchan = try return_xml_raw (sprintf "\n" (Http_getter.resolve ~writable uri)) outchan with | Unresolvable_URI _ -> return_xml_raw "\n" outchan | Key_not_found _ -> return_xml_raw "\n" outchan let log_failure msg = Http_getter_logger.log ("Request not fulfilled: " ^ msg) let convert_file ~from_enc ~to_enc fname = let remove f = fun () -> if Sys.file_exists f then Sys.remove f in match from_enc, to_enc with | `Normal, `Normal | `Gzipped, `Gzipped -> fname, (fun () -> ()) | `Normal, `Gzipped -> let tmp = Http_getter_misc.tempfile () in Http_getter_misc.gzip ~keep:true ~output:tmp fname; tmp, remove tmp | `Gzipped, `Normal -> let tmp = Http_getter_misc.tempfile () in Http_getter_misc.gunzip ~keep:true ~output:tmp fname; tmp, remove tmp let is_gzip fname = Http_getter_misc.extension fname = ".gz" let patch_fun_for uri url = let xmlbases = if Http_getter_common.is_theory_uri uri then Some (Filename.dirname uri, Filename.dirname url) else None in Http_getter_common.patch_xml ?xmlbases ~via_http:true () let respond_dtd patch_dtd fname outchan = let via_http = false in let patch_fun = if patch_dtd then Some (Http_getter_common.patch_dtd ~via_http ()) else None in Http_getter_common.return_file ~via_http:true ~fname ~contype:"text/plain" ~gunzip:false ?patch_fun ~enc:`Normal outchan (* let respond_xsl ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url outchan = let patch_fun = if patch then Http_getter_common.patch_xsl ~via_http () else (fun x -> x) in let fname = tempfile () in finally (fun () -> Sys.remove fname) (lazy ( wget ~output:fname url; return_file ~via_http ~fname ~contype:"text/xml" ~patch_fun ~enc outchan )) *) (* | "/getxslt" -> Http_getter_cache.respond_xsl ~url:(Http_getter.resolve (req#param "uri")) ~patch:(parse_patch req) outchan *) let respond_xslt patch_xslt xslt_name outchan = let fname = Http_getter.getxslt xslt_name in let patch_fun = if patch_xslt then Some (Http_getter_common.patch_xsl ~via_http:true ()) else None in Http_getter_common.return_file ~fname ~contype:"text/xml" ?patch_fun ~gunzip:false ~via_http:true ~enc:`Normal outchan (* thread action *) let callback ((req: Http_types.request), outchan) = try Http_getter_logger.log ("Connection from " ^ req#clientAddr); Http_getter_logger.log ("Received request: " ^ req#uri); (match req#path with | "/help" -> return_help outchan | "/getxml" -> let uri = req#param "uri" in let fname = Http_getter.getxml uri in (* local name, in cache *) (* remote name *) let remote_name = Http_getter.resolve ~writable:false uri in let src_enc = if is_gzip fname then `Gzipped else `Normal in let enc = parse_enc req in let fname, cleanup = convert_file ~from_enc:src_enc ~to_enc:enc fname in let contenc = if enc = `Gzipped then Some "x-gzip" else None in let patch_fun = if parse_patch req then Some (patch_fun_for uri remote_name) else None in let gunzip = (enc = `Gzipped) in (try Http_getter_common.return_file ~fname ~contype:"text/xml" ?contenc ?patch_fun ~gunzip ~via_http:true ~enc outchan; with exn -> cleanup (); raise exn); cleanup () | "/getxslt" -> respond_xslt (parse_patch req) (req#param "uri") outchan | "/getdtd" -> let fname = Http_getter.getdtd (req#param "uri") in respond_dtd (parse_patch req) fname outchan | "/resolve" -> let writable = match req#param ~default:"false" "writable" with | "true" -> true | _ -> false in return_resolve writable (req#param "uri") outchan | "/clean_cache" -> Http_getter.clean_cache (); return_html_msg "Done." outchan | "/getalluris" -> return_all_xml_uris (parse_output_format "getalluris" req) outchan | "/ls" -> return_ls (req#param "baseuri") (parse_output_format "ls" req) outchan | "/getempty" -> Http_daemon.respond ~body:Http_getter_const.empty_xml outchan | invalid_request -> Http_daemon.respond_error ~code:(`Status (`Client_error `Bad_request)) outchan); Http_getter_logger.log "Done!\n" with | Http_types.Param_not_found attr_name -> let msg = sprintf "Parameter '%s' is missing" attr_name in log_failure msg; return_400 ("bad_request", msg) msg outchan | Bad_request msg -> log_failure msg; return_html_error ("bad_request", msg) msg outchan | Internal_error msg -> log_failure msg; return_html_internal_error ("internal_error", msg) msg outchan | exn -> let msg = "uncaught exception: " ^ (Printexc.to_string exn) in (match exn with | Http_getter_types.Key_not_found uri -> return_html_error ("key_not_found", uri) msg outchan | _ -> log_failure msg; return_html_error ("uncaught_exception", msg) msg outchan) let callback req outchan = HExtlib.finally (fun () -> try close_out outchan with Sys_error _ -> ()) callback (req, outchan) let batch_update = ref false let args = [ ] (* Main *) let main () = Arg.parse args (fun _-> ()) "http_getter honors the following options:\n"; Helm_registry.load_from configuration_file; Http_getter.init (); print_string (Http_getter_env.env_to_string ()); flush stdout; Sys.catch_break true; let d_spec = Http_daemon.daemon_spec ~mode:`Thread ~timeout:(Some 600) ~port:(Lazy.force Http_getter_env.port) ~callback:callback () in try Http_daemon.main d_spec with Sys.Break -> () let _ = main ()