(* constants *)
-let configuration_file = "/projects/helm/etc/http_getter.conf.xml"
+let configuration_file = "http_getter.conf.xml"
let common_headers = [
"Cache-Control", "no-cache";
| "backward" -> `Backward
| c -> raise (Bad_request ("Invalid RDF class: " ^ c))
+let mk_return_fun pp_fun contype msg outchan =
+ Http_daemon.respond
+ ~body:(pp_fun msg) ~headers:["Content-Type", contype] outchan
+let pp_error s =
+ sprintf "<html><body>Http Getter error: %s</body></html>" s
+let pp_internal_error s =
+ sprintf "<html><body>Http Getter Internal error: %s</body></html>" s
+let pp_msg s = sprintf "<html><body>%s</body></html>" s
+let null_pp s = s
+let return_html_error = mk_return_fun pp_error "text/html"
+let return_html_internal_error = mk_return_fun pp_internal_error "text/html"
+let return_html_msg = mk_return_fun pp_msg "text/html"
+let return_html_raw = mk_return_fun null_pp "text/html"
+let return_xml_raw = mk_return_fun null_pp "text/xml"
+let return_400 body outchan = Http_daemon.respond_error ~code:400 ~body outchan
+
let return_all_foo_uris doctype uris outchan =
Http_daemon.send_basic_headers ~code:200 outchan;
Http_daemon.send_header "Content-Type" "text/xml" outchan;
(Http_getter.list_servers ()))))
outchan
+let log_failure msg = debug_print ("Request not fulfilled: " ^ msg)
+
+ (** given an action (i.e. a function which expects a logger and do something
+ * using it as a logger), perform it sending its output incrementally to the
+ * given output channel. Response is sent embedded in an HTML document.
+ * Channel is closed afterwards. *)
+let send_log_to ?prepend action outchan =
+ Http_daemon.send_basic_headers ~code:200 outchan;
+ Http_daemon.send_header "Content-Type" "text/html" outchan;
+ Http_daemon.send_CRLF outchan;
+ output_string outchan "<html><body>\n"; flush outchan;
+ (match prepend with
+ | None -> ()
+ | Some text -> output_string outchan text; flush outchan);
+ let logger tag =
+ output_string outchan (HelmLogger.html_of_html_tag tag);
+ flush outchan
+ in
+ action logger;
+ output_string outchan "\n</body></html>";
+ close_out outchan
+
(* thread action *)
let callback (req: Http_types.request) outchan =
return_html_msg "Done." outchan
| "/update" ->
Http_getter_env.reload (); (* reload servers list from servers file *)
- let log = Http_getter.update () in
- return_html_msg (HelmLogger.html_of_html_msg log) outchan
+ send_log_to (fun logger -> Http_getter.update ~logger ()) outchan
| "/list_servers" -> return_list_servers outchan
| "/add_server" ->
let name = req#param "url" in
let position = parse_position req in
- let log = Http_getter.add_server ~position name in
- return_html_msg
- (sprintf "Added server %s in position %d)<br />\n%s"
- name position (HelmLogger.html_of_html_msg log))
- outchan
+ let prepend =
+ sprintf "Added server %s in position %d)<br />\n" name position
+ in
+ send_log_to ~prepend
+ (fun logger -> Http_getter.add_server ~logger ~position name) outchan
| "/remove_server" ->
let position = parse_position req in
- let log =
- try
- Http_getter.remove_server position
- with Invalid_argument _ ->
- raise (Bad_request (sprintf "no server with position %d" position))
- in
- return_html_msg
- (sprintf "Removed server at position %d<br />\n%s"
- position (HelmLogger.html_of_html_msg log))
- outchan
+ if not (Http_getter.has_server position) then
+ raise (Bad_request (sprintf "no server with position %d" position))
+ else
+ let prepend =
+ sprintf "Removed server at position %d<br />\n" position
+ in
+ send_log_to ~prepend
+ (fun logger -> Http_getter.remove_server ~logger position) outchan
| "/getalluris" ->
return_all_xml_uris (parse_output_format "getalluris" req) outchan
| "/getallrdfuris" -> return_all_rdf_uris (parse_rdf_class req) outchan
debug_print "Done!\n"
with
| Http_types.Param_not_found attr_name ->
- return_400 (sprintf "Parameter '%s' is missing" attr_name) outchan
- | Bad_request msg -> return_html_error msg outchan
- | Internal_error msg -> return_html_internal_error msg outchan
+ let msg = sprintf "Parameter '%s' is missing" attr_name in
+ log_failure msg;
+ return_400 msg outchan
+ | Bad_request msg ->
+ log_failure msg;
+ return_html_error msg outchan
+ | Internal_error msg ->
+ log_failure msg;
+ return_html_internal_error msg outchan
| Shell.Subprocess_error l ->
- return_html_internal_error
- (String.concat "<br />\n"
- (List.map
- (fun (cmd, code) ->
- sprintf "Command '%s' returned %s"
- cmd (string_of_proc_status code))
- l))
- outchan
+ let msgs =
+ List.map
+ (fun (cmd, code) ->
+ sprintf "Command '%s' returned %s" cmd (string_of_proc_status code))
+ l
+ in
+ log_failure (String.concat ", " msgs);
+ return_html_internal_error (String.concat "<br />\n" msgs) outchan
| exc ->
- return_html_error
- ("Uncaught exception: " ^ (Printexc.to_string exc))
- outchan
+ let msg = "Uncaught exception: " ^ (Printexc.to_string exc) in
+ log_failure msg;
+ return_html_error msg outchan
(* Main *)
| Exception of exn
| Resolved of string
+type logger_callback = HelmLogger.html_tag -> unit
+
let not_implemented s = raise (Not_implemented ("Http_getter." ^ s))
let (index_line_sep_RE, index_sep_RE, trailing_types_RE,
| uri when is_xsl_uri uri -> Lazy.force xsl_map
| uri -> raise (Unresolvable_URI uri)
-let update_from_server logmsg server_url = (* use global maps *)
+let update_from_server logger server_url = (* use global maps *)
debug_print ("Updating information from " ^ server_url);
let xml_url_of_uri = function
(* TODO missing sanity checks on server_url, e.g. it can contains $1 *)
Pcre.replace ~rex:heading_rdf_theory_RE ~templ:server_url uri
| uri -> raise (Invalid_URI uri)
in
- let log = ref (`T ("Processing server: " ^ server_url) :: logmsg) in
+ logger (`T ("Processing server: " ^ server_url));
let (xml_index, rdf_index, xsl_index) =
(* TODO keeps index in memory, is better to keep them on temp files? *)
(http_get (server_url ^ "/" ^ (Lazy.force Http_getter_env.xml_index)),
debug_print (sprintf "Warning: useless server %s" server_url);
(match xml_index with
| Some xml_index ->
- (log := `T "Updating XML db ...<br />" :: !log;
+ logger (`T "Updating XML db ...<br />");
List.iter
(function
| l when is_blank_line l -> () (* skip blank and commented lines *)
assert (is_cic_uri uri || is_nuprl_uri uri) ;
(map_of_uri uri)#replace
uri ((xml_url_of_uri uri) ^ ".xml")
- | _ -> log := `T ("Ignoring invalid line: '" ^ l) :: !log)
+ | _ -> logger (`T ("Ignoring invalid line: '" ^ l)))
with Invalid_URI uri ->
- log := `T ("Ignoring invalid XML URI: '" ^ l) :: !log))
+ logger (`T ("Ignoring invalid XML URI: '" ^ l))))
(Pcre.split ~rex:index_sep_RE xml_index); (* xml_index lines *)
- log := `T "All done" :: !log)
+ logger (`T "All done")
| None -> ());
(match rdf_index with
| Some rdf_index ->
- (log := `T "Updating RDF db ..." :: !log;
+ logger (`T "Updating RDF db ...");
List.iter
(fun l ->
try
| [uri] ->
(Lazy.force rdf_map) # replace uri
((rdf_url_of_uri uri) ^ ".xml")
- | _ -> log := `T ("Ignoring invalid line: '" ^ l) :: !log)
+ | _ -> logger (`T ("Ignoring invalid line: '" ^ l)))
with Invalid_URI uri ->
- log := `T ("Ignoring invalid RDF URI: '" ^ l) :: !log)
+ logger (`T ("Ignoring invalid RDF URI: '" ^ l)))
(Pcre.split ~rex:index_sep_RE rdf_index); (* rdf_index lines *)
- log := `T "All done" :: !log)
+ logger (`T "All done")
| None -> ());
(match xsl_index with
| Some xsl_index ->
- (log := `T "Updating XSLT db ..." :: !log;
+ logger (`T "Updating XSLT db ...");
List.iter
(fun l -> (Lazy.force xsl_map) # replace l (server_url ^ "/" ^ l))
(Pcre.split ~rex:index_sep_RE xsl_index);
- log := `T "All done" :: !log)
+ logger (`T "All done")
| None -> ());
- debug_print "done with this server";
- !log
+ debug_print "done with this server"
-let update_from_all_servers () = (* use global maps *)
+let update_from_all_servers logger () = (* use global maps *)
clear_maps ();
- let log =
- List.fold_left
- update_from_server
- [] (* initial logmsg: empty *)
- (* reverse order: 1st server is the most important one *)
- (List.map snd (List.rev (Http_getter_env.servers ())))
- in
- sync_maps ();
- `Msg (`L (List.rev log))
+ List.iter
+ (update_from_server logger)
+ (* reverse order: 1st server is the most important one *)
+ (List.map snd (List.rev (Http_getter_env.servers ())));
+ sync_maps ()
-let update_from_one_server server_url =
- let log = update_from_server [] server_url in
- `Msg (`L (List.rev log))
+let update_from_one_server ?(logger = fun _ -> ()) server_url =
+ update_from_server logger server_url
let temp_file_of_uri uri =
let flat_string s s' c =
let getdtd_remote ~patch_dtd uri = not_implemented "getdtd_remote"
let clean_cache_remote () = not_implemented "clean_cache_remote"
let list_servers_remote () = not_implemented "list_servers_remote"
-let add_server_remote ~position name = not_implemented "add_server_remote"
-let remove_server_remote position = not_implemented "remove_server_remote"
+let add_server_remote ~logger ~position name = not_implemented "add_server_remote"
+let remove_server_remote ~logger position = not_implemented "remove_server_remote"
let getalluris_remote () = not_implemented "getalluris_remote"
let getallrdfuris_remote () = not_implemented "getallrdfuris_remote"
let ls_remote lsuri = not_implemented "ls_remote"
let register_remote ~uri ~url =
ClientHTTP.send (sprintf "%sregister?uri=%s&url=%s" (getter_url ()) uri url)
-let update_remote () =
+let update_remote logger () =
let answer = ClientHTTP.get (getter_url () ^ "update") in
- `Msg (`T answer)
+ logger (`T answer)
let getxml_remote ~format ~patch_dtd uri =
ClientHTTP.get_and_save_to_tmp
else
(map_of_uri uri)#add uri url
-let update () =
+let update ?(logger = fun _ -> ()) () =
if remote () then
- update_remote ()
+ update_remote logger ()
else
- update_from_all_servers ()
+ update_from_all_servers logger ()
let getxml ?(format = Enc_normal) ?(patch_dtd = true) uri =
if remote () then
else
Http_getter_env.servers ()
-let add_server ?(position = 0) name =
+let add_server ?(logger = fun _ -> ()) ?(position = 0) name =
if remote () then
- add_server_remote ~position name
+ add_server_remote ~logger ~position name
else begin
if position = 0 then begin
Http_getter_env.add_server ~position:0 name;
- update_from_one_server name (* quick update (new server only) *)
+ update_from_one_server ~logger name (* quick update (new server only) *)
end else if position > 0 then begin
Http_getter_env.add_server ~position name;
- update ()
- end else (* already checked bt parse_position *)
+ update ~logger ()
+ end else (* already checked by parse_position *)
assert false
end
-let remove_server position =
+let has_server position = List.mem_assoc position (Http_getter_env.servers ())
+
+let remove_server ?(logger = fun _ -> ()) position =
if remote () then
- remove_server_remote ()
+ remove_server_remote ~logger ()
else begin
let server_name =
try
raise (Invalid_argument (sprintf "no server with position %d" position))
in
Http_getter_env.remove_server position;
- update ()
+ update ~logger ()
end
let return_uris map filter =
open Http_getter_types
+type logger_callback = HelmLogger.html_tag -> unit
+
(** {2 Getter Web Service interface as API *)
val help: unit -> string
val resolve: string -> string (* uri -> url *)
val register: uri:string -> url:string -> unit
-val update: unit -> HelmLogger.html_msg
+val update: ?logger:logger_callback -> unit -> unit
val getxml : ?format:encoding -> ?patch_dtd:bool -> string -> string
val getxslt : ?patch_dtd:bool -> string -> string
val getdtd : ?patch_dtd:bool -> string -> string
val clean_cache: unit -> unit
val list_servers: unit -> (int * string) list
-val add_server: ?position:int -> string -> HelmLogger.html_msg
-val remove_server: int -> HelmLogger.html_msg
+val add_server: ?logger:logger_callback -> ?position:int -> string -> unit
+val remove_server: ?logger:logger_callback -> int -> unit
val getalluris: unit -> string list
val getallrdfuris: [ `Forward | `Backward ] -> string list
val ls: xml_uri -> ls_item list
(** {2 Misc} *)
val close_maps: unit -> unit
-val update_from_one_server: string -> HelmLogger.html_msg
+val update_from_one_server: ?logger:logger_callback -> string -> unit
+val has_server: int -> bool (* does a server with a given position exists? *)
(* TODO check this: old getter here used text/xml *)
return_file ~fname:url ~contype:"text/plain" ~patch_fun outchan
else
- return_html_error ("Can't find DTD: " ^ url) outchan
+ raise (Dtd_not_found url)
let clean () =
let module E = Http_getter_env in
(Lazy.force Http_getter_env.my_own_url))
line
-let pp_error s =
- sprintf "<html><body>Http Getter error: %s</body></html>" s
-let pp_internal_error s =
- sprintf "<html><body>Http Getter Internal error: %s</body></html>" s
-let pp_msg s = sprintf "<html><body>%s</body></html>" s
-let null_pp s = s
-
-let mk_return_fun pp_fun contype msg outchan =
- Http_daemon.respond
- ~body:(pp_fun msg) ~headers:["Content-Type", contype] outchan
-
-let return_html_error = mk_return_fun pp_error "text/html"
-let return_html_internal_error = mk_return_fun pp_internal_error "text/html"
-let return_html_msg = mk_return_fun pp_msg "text/html"
-let return_html_raw = mk_return_fun null_pp "text/html"
-let return_xml_raw = mk_return_fun null_pp "text/xml"
let return_file
~fname ?contype ?contenc ?(patch_fun = fun x -> x) ?(gunzip = false) outchan
=
(fun line -> output_string outchan (patch_fun line ^ "\n"))
fname
;;
-let return_400 body outchan = Http_daemon.respond_error ~code:400 ~body outchan
val patch_xsl : string -> string
val patch_dtd : string -> string
-val return_html_error: string -> out_channel -> unit
-val return_html_internal_error: string -> out_channel -> unit
- (** return an HTML HTTP response from the given string, embedding it in an
- "H1" element of an HTML page; content-type is set to text/html *)
-val return_html_msg: string -> out_channel -> unit
- (** return an HTTP response using given string as content; content-type is set
- to text/html *)
-val return_html_raw: string -> out_channel -> unit
- (** return an HTTP response using given string as content; content-type is set
- to text/xml *)
-val return_xml_raw: string -> out_channel -> unit
- (** return a bad request http response *)
-val return_400: string -> out_channel -> unit
(**
@param fname name of the file to be sent
@param contype Content-Type header value
* http://helm.cs.unibo.it/
*)
+(** {2 Debugger and logger} *)
+
(** enable/disable debugging messages *)
val debug: bool ref
exception Invalid_RDF_class of string
exception Internal_error of string
exception Cache_failure of string
+exception Dtd_not_found of string (* dtd's url *)
type encoding = Enc_normal | Enc_gzipped
type answer_format = Fmt_text | Fmt_xml