(* * Copyright (C) 2003-2004: * Stefano Zacchiroli * for the HELM Team http://helm.cs.unibo.it/ * * This file is part of HELM, an Hypertextual, Electronic * Library of Mathematics, developed at the Computer Science * Department, University of Bologna, Italy. * * HELM is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * HELM is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HELM; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, * MA 02111-1307, USA. * * For details, see the HELM World-Wide-Web page, * http://helm.cs.unibo.it/ *) open Printf open Http_getter_common open Http_getter_misc open Http_getter_types (* constants *) let configuration_file = BuildTimeOpts.conffile let common_headers = [ "Cache-Control", "no-cache"; "Pragma", "no-cache"; "Expires", "0" ] (* HTTP queries argument parsing *) (* parse encoding ("format" parameter), default is `Normal *) let parse_enc (req: Http_types.request) = try (match req#param "format" with | "normal" -> `Normal | "gz" -> `Gzipped | s -> raise (Bad_request ("Invalid format: " ^ s))) with Http_types.Param_not_found _ -> `Normal (* parse "patch_dtd" parameter, default is true *) let parse_patch (req: Http_types.request) = try (match req#param "patch_dtd" with | s when String.lowercase s = "yes" -> true | s when String.lowercase s = "no" -> false | s -> raise (Bad_request ("Invalid patch_dtd value: " ^ s))) with Http_types.Param_not_found _ -> true (* parse output format ("format" parameter), no default value *) let parse_output_format meth (req: Http_types.request) = match req#param "format" with | s when String.lowercase s = "txt" -> `Text | s when String.lowercase s = "xml" -> `Xml | s -> raise (Bad_request ("Invalid /" ^ meth ^ " format: " ^ s)) (* parse "position" argument, default is 0 *) let parse_position (req: Http_types.request) = try let res = int_of_string (req#param "position") in if res < 0 then raise (Failure "int_of_string"); res with | Http_types.Param_not_found _ -> 0 | Failure "int_of_string" -> raise (Bad_request (sprintf "position must be a non negative integer (%s given)" (req#param "position"))) let parse_rdf_class (req: Http_types.request) = match req#param "class" with | "forward" -> `Forward | "backward" -> `Backward | c -> raise (Bad_request ("Invalid RDF class: " ^ c)) let mk_return_fun pp_fun contype msg outchan = Http_daemon.respond ~body:(pp_fun msg) ~headers:["Content-Type", contype] outchan let pp_error s = sprintf "Http Getter error: %s" s let pp_internal_error s = sprintf "Http Getter Internal error: %s" s let pp_msg s = sprintf "%s" s let null_pp s = s let return_html_error = mk_return_fun pp_error "text/html" let return_html_internal_error = mk_return_fun pp_internal_error "text/html" let return_html_msg = mk_return_fun pp_msg "text/html" let return_html_raw = mk_return_fun null_pp "text/html" let return_xml_raw = mk_return_fun null_pp "text/xml" let return_400 body outchan = Http_daemon.respond_error ~code:(`Code 400) ~body outchan let return_all_foo_uris doctype uris outchan = Http_daemon.send_basic_headers ~code:(`Code 200) outchan; Http_daemon.send_header "Content-Type" "text/xml" outchan; Http_daemon.send_headers common_headers outchan; Http_daemon.send_CRLF outchan; output_string outchan (sprintf " <%s> " doctype (Lazy.force Http_getter_env.my_own_url) doctype doctype); List.iter (fun uri -> output_string outchan (sprintf "\t\n" uri)) uris; output_string outchan (sprintf "\n" doctype) let return_all_xml_uris fmt outchan = let uris = Http_getter.getalluris () in match fmt with | `Text -> let buf = Buffer.create 10240 in List.iter (bprintf buf "%s\n") uris ; let body = Buffer.contents buf in Http_daemon.respond ~headers:(("Content-Type", "text/plain") :: common_headers) ~body outchan | `Xml -> return_all_foo_uris "alluris" uris outchan let return_all_rdf_uris classs outchan = return_all_foo_uris "allrdfuris" (Http_getter.getallrdfuris classs) outchan let return_ls regexp fmt outchan = let ls_items = Http_getter.ls regexp in let buf = Buffer.create 10240 in (match fmt with | `Text -> List.iter (function | Ls_section dir -> bprintf buf "dir, %s\n" dir | Ls_object obj -> bprintf buf "object, %s, <%s,%s,%s,%s>\n" obj.uri (if obj.ann then "YES" else "NO") (string_of_ls_flag obj.types) (string_of_ls_flag obj.body) (string_of_ls_flag obj.proof_tree)) ls_items | `Xml -> Buffer.add_string buf "\n"; bprintf buf "\n" (Lazy.force Http_getter_env.my_own_url); Buffer.add_string buf "\n"; List.iter (function | Ls_section dir -> bprintf buf "
%s
\n" dir | Ls_object obj -> bprintf buf " \t \t \t \t " obj.uri (if obj.ann then "YES" else "NO") (string_of_ls_flag obj.types) (string_of_ls_flag obj.body) (string_of_ls_flag obj.proof_tree)) ls_items; Buffer.add_string buf "
\n"); let body = Buffer.contents buf in Http_daemon.respond ~headers:(("Content-Type", "text/plain") :: common_headers) ~body outchan let return_help outchan = return_html_raw (Http_getter.help ()) outchan let return_resolve uri outchan = try return_xml_raw (sprintf "\n" (Http_getter.resolve uri)) outchan with Unresolvable_URI uri -> return_xml_raw "\n" outchan let return_list_servers outchan = return_html_raw (sprintf "\n%s\n
" (String.concat "\n" (List.map (fun (pos, server) -> sprintf "%d%s" pos server) (Http_getter.list_servers ())))) outchan let log_failure msg = Http_getter_logger.log ("Request not fulfilled: " ^ msg) (** given an action (i.e. a function which expects a logger and do something * using it as a logger), perform it sending its output incrementally to the * given output channel. Response is sent embedded in an HTML document. * Channel is closed afterwards. *) let send_log_to ?prepend action outchan = Http_daemon.send_basic_headers ~code:(`Code 200) outchan; Http_daemon.send_header "Content-Type" "text/html" outchan; Http_daemon.send_CRLF outchan; output_string outchan "\n"; flush outchan; (match prepend with | None -> () | Some text -> output_string outchan text; flush outchan); let logger tag = output_string outchan (HelmLogger.html_of_html_tag tag); flush outchan in action logger; output_string outchan "\n"; close_out outchan (* thread action *) let callback (req: Http_types.request) outchan = try Http_getter_logger.log ("Connection from " ^ req#clientAddr); Http_getter_logger.log ("Received request: " ^ req#path); (match req#path with | "/help" -> return_help outchan | "/getxml" -> let uri = req#param "uri" in Http_getter_cache.respond_xml ~url:(Http_getter.resolve uri) ~uri ~enc:(parse_enc req) ~patch:(parse_patch req) outchan | "/getxslt" -> Http_getter_cache.respond_xsl ~url:(Http_getter.resolve (req#param "uri")) ~patch:(parse_patch req) outchan | "/getdtd" -> Http_getter_cache.respond_dtd ~patch:(parse_patch req) ~url:(sprintf "%s/%s" (Helm_registry.get "getter.dtd_dir") (req#param "uri")) outchan | "/resolve" -> return_resolve (req#param "uri") outchan | "/register" -> Http_getter.register ~uri:(req#param "uri") ~url:(req#param "url"); return_html_msg "Register done" outchan | "/clean_cache" -> Http_getter.clean_cache (); return_html_msg "Done." outchan | "/update" -> Http_getter_env.reload (); (* reload servers list from servers file *) send_log_to (fun logger -> Http_getter.update ~logger ()) outchan | "/list_servers" -> return_list_servers outchan | "/add_server" -> let name = req#param "url" in let position = parse_position req in let prepend = sprintf "Added server %s in position %d)
\n" name position in send_log_to ~prepend (fun logger -> Http_getter.add_server ~logger ~position name) outchan | "/remove_server" -> let position = parse_position req in if not (Http_getter.has_server position) then raise (Bad_request (sprintf "no server with position %d" position)) else let prepend = sprintf "Removed server at position %d
\n" position in send_log_to ~prepend (fun logger -> Http_getter.remove_server ~logger position) outchan | "/getalluris" -> return_all_xml_uris (parse_output_format "getalluris" req) outchan | "/getallrdfuris" -> return_all_rdf_uris (parse_rdf_class req) outchan | "/ls" -> return_ls (req#param "baseuri") (parse_output_format "ls" req) outchan | "/getempty" -> Http_daemon.respond ~body:Http_getter_const.empty_xml outchan | invalid_request -> Http_daemon.respond_error ~code:(`Status (`Client_error `Bad_request)) outchan); Http_getter_logger.log "Done!\n" with | Http_types.Param_not_found attr_name -> let msg = sprintf "Parameter '%s' is missing" attr_name in log_failure msg; return_400 msg outchan | Bad_request msg -> log_failure msg; return_html_error msg outchan | Internal_error msg -> log_failure msg; return_html_internal_error msg outchan | Shell.Subprocess_error l -> let msgs = List.map (fun (cmd, code) -> sprintf "Command '%s' returned %s" cmd (string_of_proc_status code)) l in log_failure (String.concat ", " msgs); return_html_internal_error (String.concat "
\n" msgs) outchan | exc -> let msg = "Uncaught exception: " ^ (Printexc.to_string exc) in log_failure msg; return_html_error msg outchan (* Main *) let main () = Helm_registry.load_from configuration_file; Http_getter_logger.set_log_level (Helm_registry.get_opt_default Helm_registry.get_int 1 "getter.log_level"); Http_getter_logger.set_log_file (Helm_registry.get_opt Helm_registry.get_string "getter.log_file"); Http_getter_env.reload (); print_string (Http_getter_env.env_to_string ()); flush stdout; let batch_update = try Sys.argv.(1) = "-update" with Invalid_argument _ -> false in if batch_update then (* batch mode: performs update and exit *) Http_getter.update ~logger:Http_getter.stdout_logger () else begin (* daemon mode: start http daemon *) at_exit Http_getter.close_maps; Sys.catch_break true; try Http_daemon.start' ~mode:`Thread ~timeout:(Some 600) ~port:(Helm_registry.get_int "getter.port") callback with Sys.Break -> () (* 'close_maps' already registered with 'at_exit' *) end let _ = main ()