X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2Fhttp_getter%2Fhttp_getter.ml;h=b5cfa47d9564561c09783dfb85b702ae280f7605;hb=85ccebb566c36671ca753debe09e6dd5c9dd0df7;hp=09a49a308ad50d7dcf80342ad9d29ff63f9987fe;hpb=5d7d6bd5090f3f82279bef0b93b4b361a5b1d751;p=helm.git diff --git a/helm/http_getter/http_getter.ml b/helm/http_getter/http_getter.ml index 09a49a308..b5cfa47d9 100644 --- a/helm/http_getter/http_getter.ml +++ b/helm/http_getter/http_getter.ml @@ -42,6 +42,7 @@ let common_headers = [ (* HTTP queries argument parsing *) + (* parse encoding ("format" parameter), default is Enc_normal *) let parse_enc (req: Http_types.request) = try (match req#param "format" with @@ -50,18 +51,23 @@ let parse_enc (req: Http_types.request) = | s -> raise (Http_getter_bad_request ("Invalid format: " ^ s))) with Http_types.Param_not_found _ -> Enc_normal ;; + (* parse "patch_dtd" parameter, default is true *) let parse_patch (req: Http_types.request) = - match req#param "patch_dtd" with - | s when String.lowercase s = "yes" -> true - | s when String.lowercase s = "no" -> false - | s -> raise (Http_getter_bad_request ("Invalid patch_dtd value: " ^ s)) + try + (match req#param "patch_dtd" with + | s when String.lowercase s = "yes" -> true + | s when String.lowercase s = "no" -> false + | s -> raise (Http_getter_bad_request ("Invalid patch_dtd value: " ^ s))) + with Http_types.Param_not_found _ -> true ;; + (* parse output format ("format" parameter), no default value *) let parse_output_format (req: Http_types.request) = match req#param "format" with | s when String.lowercase s = "txt" -> Fmt_text | s when String.lowercase s = "xml" -> Fmt_xml | s -> raise (Http_getter_bad_request ("Invalid /ls format: " ^ s)) ;; + (* parse "baseuri" format for /ls method, no default value *) let parse_ls_uri = let parse_ls_RE = Pcre.regexp "^(\\w+):(.*)$" in let trailing_slash_RE = Pcre.regexp "/+$" in @@ -79,13 +85,16 @@ let parse_ls_uri = (* global maps, shared by all threads *) -let xml_map = new Http_getter_map.map Http_getter_env.xml_dbm in +let cic_map = new Http_getter_map.map Http_getter_env.cic_dbm in +let nuprl_map = new Http_getter_map.map Http_getter_env.nuprl_dbm in let rdf_map = new Http_getter_map.map Http_getter_env.rdf_dbm in let xsl_map = new Http_getter_map.map Http_getter_env.xsl_dbm in -let save_maps () = xml_map#close; rdf_map#close; xsl_map#close in +let save_maps () = + cic_map#close; nuprl_map#close; rdf_map#close; xsl_map#close in let map_of_uri = function - | uri when is_xml_uri uri -> xml_map + | uri when is_cic_uri uri -> cic_map + | uri when is_nuprl_uri uri -> nuprl_map | uri when is_rdf_uri uri -> rdf_map | uri when is_xsl_uri uri -> xsl_map | uri -> raise (Http_getter_unresolvable_URI uri) @@ -123,14 +132,14 @@ let return_all_foo_uris map doctype filter outchan = output_string outchan (sprintf "\t\n" uri)); output_string outchan (sprintf "\n" doctype) in -let return_all_xml_uris = return_all_foo_uris xml_map "alluris" in +let return_all_xml_uris = return_all_foo_uris cic_map "alluris" in let return_all_rdf_uris = return_all_foo_uris rdf_map "allrdfuris" in let return_ls = let (++) (oldann, oldtypes, oldbody) (newann, newtypes, newbody) = ((if newann > oldann then newann else oldann), (if newtypes > oldtypes then newtypes else oldtypes), - (if newbody > oldbody then newbody else oldbody)) - in + (if newbody > oldbody then newbody else oldbody)) + in let basepart_RE = Pcre.regexp "^([^.]*\\.[^.]*)((\\.body)|(\\.types))?(\\.ann)?" in @@ -159,7 +168,7 @@ let return_ls = let oldflags = try Hashtbl.find objs basepart - with Not_found -> (false, No, No) (* no ann, no types no body *) + with Not_found -> (false, No, No) (* no ann, no types, no body *) in let newflags = match o with @@ -171,7 +180,7 @@ let return_ls = in Hashtbl.replace objs basepart (oldflags ++ newflags) in - xml_map#iter (* BLEARGH Dbm module lacks support for fold-like functions *) + cic_map#iter (* BLEARGH Dbm module lacks support for fold-like functions *) (fun key _ -> match key with | uri when Pcre.pmatch ~rex:dir_RE uri -> (* directory hit *) @@ -198,7 +207,7 @@ let return_ls = in Http_daemon.respond ~headers:(("Content-Type", "text/plain") :: common_headers) - ~body outchan + ~body outchan | Fmt_xml -> let body = sprintf @@ -232,13 +241,14 @@ let return_ls = in Http_daemon.respond ~headers:(("Content-Type", "text/xml") :: common_headers) - ~body outchan + ~body outchan in let (index_line_sep_RE, index_sep_RE, trailing_types_RE, - heading_cic_RE, heading_theory_RE, + heading_cic_RE, heading_theory_RE, heading_nuprl_RE, heading_rdf_cic_RE, heading_rdf_theory_RE) = - (Pcre.regexp "[ \t]+", Pcre.regexp "\n+", Pcre.regexp "\\.types$", - Pcre.regexp "^cic:", Pcre.regexp "^theory:", + (Pcre.regexp "[ \t]+", Pcre.regexp "\r\n|\r|\n", + Pcre.regexp "\\.types$", + Pcre.regexp "^cic:", Pcre.regexp "^theory:", Pcre.regexp "^nuprl:", Pcre.regexp "^helm:rdf.*//cic:", Pcre.regexp "^helm:rdf.*//theory:") in let update_from_server logmsg server_url = (* use global maps *) @@ -249,6 +259,8 @@ let update_from_server logmsg server_url = (* use global maps *) Pcre.replace ~rex:heading_cic_RE ~templ:server_url uri | uri when (Pcre.pmatch ~rex:heading_theory_RE uri) -> Pcre.replace ~rex:heading_theory_RE ~templ:server_url uri + | uri when (Pcre.pmatch ~rex:heading_nuprl_RE uri) -> + Pcre.replace ~rex:heading_nuprl_RE ~templ:server_url uri | uri -> raise (Http_getter_invalid_URI uri) in let rdf_url_of_uri = function (* TODO as above *) @@ -271,15 +283,22 @@ let update_from_server logmsg server_url = (* use global maps *) | Some xml_index -> (log := !log ^ "Updating XML db ...
\n"; List.iter - (fun l -> - try - (match Pcre.split ~rex:index_line_sep_RE l with - | [uri; "gz"] -> xml_map#add uri ((xml_url_of_uri uri) ^ ".xml.gz") - | [uri] -> xml_map#add uri ((xml_url_of_uri uri) ^ ".xml") - | _ -> log := !log ^ "Ignoring invalid line: " ^ l ^ "
\n") - with Http_getter_invalid_URI uri -> - log := !log ^ "Ignoring invalid XML URI: " ^ uri ^ "
\n") - (Pcre.split ~rex:index_sep_RE xml_index)) (* xml_index lines *) + (function + | l when is_blank_line l -> () (* skip blank and commented lines *) + | l -> + try + (match Pcre.split ~rex:index_line_sep_RE l with + | [uri; "gz"] -> + assert (is_cic_uri uri || is_nuprl_uri uri) ; + (map_of_uri uri)#add uri ((xml_url_of_uri uri) ^ ".xml.gz") + | [uri] -> + assert (is_cic_uri uri || is_nuprl_uri uri) ; + (map_of_uri uri)#add uri ((xml_url_of_uri uri) ^ ".xml") + | _ -> + log := !log ^ "Ignoring invalid line: '" ^ l ^ "'
\n") + with Http_getter_invalid_URI uri -> + log := !log ^ "Ignoring invalid XML URI: '" ^ uri ^ "'
\n") + (Pcre.split ~rex:index_sep_RE xml_index)) (* xml_index lines *) | None -> ()); (match rdf_index with | Some rdf_index -> @@ -314,35 +333,29 @@ let callback (req: Http_types.request) outchan = debug_print ("Connection from " ^ req#clientAddr); debug_print ("Received request: " ^ req#path); (match req#path with - | "/help" -> return_html_msg Http_getter_const.usage_string outchan + | "/help" -> return_html_raw Http_getter_const.usage_string outchan | "/getxml" | "/getxslt" | "/getdtd" | "/resolve" | "/register" -> (let uri = req#param "uri" in (* common parameter *) match req#path with | "/getxml" -> let enc = parse_enc req in - let patch = - try parse_patch req with Http_types.Param_not_found _ -> true - in + let patch = parse_patch req in Http_getter_cache.respond_xml ~url:(resolve uri) ~uri ~enc ~patch outchan | "/getxslt" -> - let patch = - try parse_patch req with Http_types.Param_not_found _ -> true - in + let patch = parse_patch req in Http_getter_cache.respond_xsl ~url:(resolve uri) ~patch outchan | "/getdtd" -> - let patch = - try parse_patch req with Http_types.Param_not_found _ -> true - in + let patch = parse_patch req in Http_getter_cache.respond_dtd ~patch ~url:(Http_getter_env.dtd_dir ^ "/" ^ uri) outchan | "/resolve" -> (try - return_xml_msg + return_xml_raw (sprintf "\n" (resolve uri)) outchan with Http_getter_unresolvable_URI uri -> - return_xml_msg "\n" outchan) + return_xml_raw "\n" outchan) | "/register" -> let url = req#param "url" in register uri url; @@ -350,7 +363,7 @@ let callback (req: Http_types.request) outchan = | _ -> assert false) | "/update" -> (Http_getter_env.reload (); (* reload servers list from servers file *) - xml_map#clear; rdf_map#clear; xsl_map#clear; + cic_map#clear; nuprl_map#clear; rdf_map#clear; xsl_map#clear; let log = List.fold_left update_from_server @@ -358,7 +371,7 @@ let callback (req: Http_types.request) outchan = (* reverse order: 1st server is the most important one *) (List.rev !Http_getter_env.servers) in - xml_map#sync; rdf_map#sync; xsl_map#sync; + cic_map#sync; nuprl_map#sync; rdf_map#sync; xsl_map#sync; return_html_msg log outchan) | "/getalluris" -> return_all_xml_uris