(*
- * Copyright (C) 2003, HELM Team.
+ * Copyright (C) 2003:
+ * Stefano Zacchiroli <zack@cs.unibo.it>
+ * for the HELM Team http://helm.cs.unibo.it/
*
* This file is part of HELM, an Hypertextual, Electronic
* Library of Mathematics, developed at the Computer Science
* MA 02111-1307, USA.
*
* For details, see the HELM World-Wide-Web page,
- * http://cs.unibo.it/helm/.
+ * http://helm.cs.unibo.it/
*)
-(* TODO optimization: precompile regexps *)
-
open Http_getter_common;;
open Http_getter_misc;;
open Http_getter_types;;
open Http_getter_debugger;;
open Printf;;
+ (* constants *)
+
+let common_headers = [
+ "Cache-Control", "no-cache";
+ "Pragma", "no-cache";
+ "Expires", "0"
+]
+
(* HTTP queries argument parsing *)
let parse_enc (req: Http_types.request) =
| s -> raise (Http_getter_bad_request ("Invalid format: " ^ s)))
with Http_types.Param_not_found _ -> Enc_normal
;;
-let parse_patch_dtd (req: Http_types.request) =
+let parse_patch (req: Http_types.request) =
match req#param "patch_dtd" with
| s when String.lowercase s = "yes" -> true
| s when String.lowercase s = "no" -> false
| s when String.lowercase s = "xml" -> Fmt_xml
| s -> raise (Http_getter_bad_request ("Invalid /ls format: " ^ s))
;;
-let parse_ls_uri (req: Http_types.request) =
- let baseuri = req#param "baseuri" in
- let subs =
- Pcre.extract ~pat:"^(\\w+):(.*)$" (Pcre.replace ~pat:"/+$" baseuri)
- in
- match (subs.(1), subs.(2)) with
- | "cic", uri -> Cic uri
- | "theory", uri -> Theory uri
- | _ -> raise (Http_getter_bad_request ("Invalid /ls baseuri: " ^ baseuri))
+let parse_ls_uri =
+ let parse_ls_RE = Pcre.regexp "^(\\w+):(.*)$" in
+ let trailing_slash_RE = Pcre.regexp "/+$" in
+ fun (req: Http_types.request) ->
+ let baseuri = req#param "baseuri" in
+ let subs =
+ Pcre.extract ~rex:parse_ls_RE
+ (Pcre.replace ~rex:trailing_slash_RE baseuri)
+ in
+ match (subs.(1), subs.(2)) with
+ | "cic", uri -> Cic uri
+ | "theory", uri -> Theory uri
+ | _ -> raise (Http_getter_bad_request ("Invalid /ls baseuri: " ^ baseuri))
;;
(* global maps, shared by all threads *)
| uri when is_xsl_uri uri -> xsl_map
| uri -> raise (Http_getter_unresolvable_URI uri)
in
-let resolve uri = (map_of_uri uri)#resolve uri in
+let resolve uri =
+ try
+ (map_of_uri uri)#resolve uri
+ with Http_getter_map.Key_not_found _ ->
+ raise (Http_getter_unresolvable_URI uri)
+in
let register uri = (map_of_uri uri )#add uri in
let return_all_foo_uris map doctype filter outchan =
+ (** return all URIs contained in 'map' which satisfy predicate 'filter'; URIs
+ are written in an XMLish format ('doctype' is the XML doctype) onto 'outchan'
+ *)
Http_daemon.send_basic_headers ~code:200 outchan;
Http_daemon.send_header "Content-Type" "text/xml" outchan;
+ Http_daemon.send_headers common_headers outchan;
Http_daemon.send_CRLF outchan;
output_string
outchan
objs "")
in
Http_daemon.respond
- ~headers:["Content-Type", "text/plain"] ~body outchan
+ ~headers:(("Content-Type", "text/plain") :: common_headers)
+ ~body outchan
| Fmt_xml ->
let body =
sprintf
objs ""))
in
Http_daemon.respond
- ~headers:["Content-Type", "text/xml"] ~body outchan
+ ~headers:(("Content-Type", "text/xml") :: common_headers)
+ ~body outchan
in
-let (index_line_sep_RE, index_sep_RE) =
- (Pcre.regexp "[ \t]+", Pcre.regexp "\n+")
+let (index_line_sep_RE, index_sep_RE, trailing_types_RE,
+ heading_cic_RE, heading_theory_RE,
+ heading_rdf_cic_RE, heading_rdf_theory_RE) =
+ (Pcre.regexp "[ \t]+", Pcre.regexp "\n+", Pcre.regexp "\\.types$",
+ Pcre.regexp "^cic:", Pcre.regexp "^theory:",
+ Pcre.regexp "^helm:rdf.*//cic:", Pcre.regexp "^helm:rdf.*//theory:")
in
let update_from_server logmsg server_url = (* use global maps *)
debug_print ("Updating information from " ^ server_url);
let xml_url_of_uri = function
(* TODO missing sanity checks on server_url, e.g. it can contains $1 *)
- | uri when (Pcre.pmatch ~pat:"^cic:" uri) ->
- Pcre.replace ~pat:"^cic:" ~templ:server_url uri
- | uri when (Pcre.pmatch ~pat:"^theory:" uri) ->
- Pcre.replace ~pat:"^theory:" ~templ:server_url uri
+ | uri when (Pcre.pmatch ~rex:heading_cic_RE uri) ->
+ Pcre.replace ~rex:heading_cic_RE ~templ:server_url uri
+ | uri when (Pcre.pmatch ~rex:heading_theory_RE uri) ->
+ Pcre.replace ~rex:heading_theory_RE ~templ:server_url uri
| uri -> raise (Http_getter_invalid_URI uri)
in
let rdf_url_of_uri = function (* TODO as above *)
- | uri when (Pcre.pmatch ~pat:"^helm:rdf.*//cic:" uri) ->
- Pcre.replace ~pat:"^helm:rdf.*//cic:" ~templ:server_url uri
- | uri when (Pcre.pmatch ~pat:"^helm:rdf.*//theory:" uri) ->
- Pcre.replace ~pat:"^helm:rdf.*//theory:" ~templ:server_url uri
+ | uri when (Pcre.pmatch ~rex:heading_rdf_cic_RE uri) ->
+ Pcre.replace ~rex:heading_rdf_cic_RE ~templ:server_url uri
+ | uri when (Pcre.pmatch ~rex:heading_rdf_theory_RE uri) ->
+ Pcre.replace ~rex:heading_rdf_theory_RE ~templ:server_url uri
| uri -> raise (Http_getter_invalid_URI uri)
in
let log = ref (logmsg ^ "Processing server: " ^ server_url ^ "<br />\n") in
match req#path with
| "/getxml" ->
let enc = parse_enc req in
- let patch_dtd =
- try parse_patch_dtd req with Http_types.Param_not_found _ -> true
+ let patch =
+ try parse_patch req with Http_types.Param_not_found _ -> true
in
Http_getter_cache.respond_xml
- ~url:(resolve uri) ~uri ~enc ~patch_dtd outchan
+ ~url:(resolve uri) ~uri ~enc ~patch outchan
| "/getxslt" ->
- let patch_dtd =
- try parse_patch_dtd req with Http_types.Param_not_found _ -> true
+ let patch =
+ try parse_patch req with Http_types.Param_not_found _ -> true
in
- Http_getter_cache.respond_xsl ~url:(resolve uri) ~patch_dtd outchan
+ Http_getter_cache.respond_xsl ~url:(resolve uri) ~patch outchan
| "/getdtd" ->
- let patch_dtd =
- try parse_patch_dtd req with Http_types.Param_not_found _ -> true
+ let patch =
+ try parse_patch req with Http_types.Param_not_found _ -> true
in
Http_getter_cache.respond_dtd
- ~patch_dtd ~url:(Http_getter_env.dtd_dir ^ "/" ^ uri) outchan
+ ~patch ~url:(Http_getter_env.dtd_dir ^ "/" ^ uri) outchan
| "/resolve" ->
(try
return_xml_msg
return_html_msg "Register done" outchan
| _ -> assert false)
| "/update" ->
- (xml_map#clear; rdf_map#clear; xsl_map#clear;
+ (Http_getter_env.reload (); (* reload servers list from servers file *)
+ xml_map#clear; rdf_map#clear; xsl_map#clear;
let log =
List.fold_left
update_from_server
"" (* initial logmsg: empty *)
(* reverse order: 1st server is the most important one *)
- (List.rev Http_getter_env.servers)
+ (List.rev !Http_getter_env.servers)
in
xml_map#sync; rdf_map#sync; xsl_map#sync;
return_html_msg log outchan)
| "/getalluris" ->
return_all_xml_uris
(fun uri ->
- (Pcre.pmatch ~pat:"^cic:" uri) &&
- not (Pcre.pmatch ~pat:"\\.types$" uri))
+ (Pcre.pmatch ~rex:heading_cic_RE uri) &&
+ not (Pcre.pmatch ~rex:trailing_types_RE uri))
outchan
| "/getallrdfuris" ->
(let classs = req#param "class" in