From 0dc01969c5bdf943e0b84e134b320d1fd39f5e8f Mon Sep 17 00:00:00 2001 From: Stefano Zacchiroli Date: Thu, 26 Dec 2002 15:45:11 +0000 Subject: [PATCH] - removed some debugging prints - removed default 'true' value from patch_dtd parsing function - added support for RDF and XSL uris to 'register' method - bugfix: set HTTP return code when using Http_daemon.send_basic_headers - optimization: precompiled almost all regexp used in 'ls' method - bugfix: search for 'objects' also in 'directory hit' ('ls' method) - bugfix: never raise exception when no flag is found ('ls' method) - bugfix: many typos in XML output for various methods - bugfix: s%cic://%cic:% - added support for patch_dtd also in 'getdtd' and 'getxslt' methods - bugfix: clear map only once for /update run, not for each server --- helm/http_getter/http_getter.ml | 237 ++++++++++++++++++-------------- 1 file changed, 132 insertions(+), 105 deletions(-) diff --git a/helm/http_getter/http_getter.ml b/helm/http_getter/http_getter.ml index bb9c78e7e..c46a79326 100644 --- a/helm/http_getter/http_getter.ml +++ b/helm/http_getter/http_getter.ml @@ -37,16 +37,18 @@ let http_debug = false;; Http_common.debug := http_debug;; let http_get url = - debug_print ("Downloading URL: " ^ url); +(* debug_print ("Downloading URL: " ^ url); *) try Some (Http_client.Convenience.http_get url) - with Http_client.Http_error (code, _) -> + with Http_client.Http_error (code, _) -> None +(* (debug_print (sprintf "Failed to download %s, HTTP response was %d" url code); None) +*) ;; -let parse_format (req: Http_types.request) = +let parse_enc (req: Http_types.request) = try (match req#param "format" with | "normal" -> Enc_normal @@ -55,14 +57,12 @@ let parse_format (req: Http_types.request) = with Http_types.Param_not_found _ -> Enc_normal ;; let parse_patch_dtd (req: Http_types.request) = - try - (match req#param "patch_dtd" with - | s when String.lowercase s = "yes" -> true - | s when String.lowercase s = "no" -> false - | s -> raise (Http_getter_bad_request ("Invalid patch_dtd value: " ^ s))) - with Http_types.Param_not_found _ -> true + match req#param "patch_dtd" with + | s when String.lowercase s = "yes" -> true + | s when String.lowercase s = "no" -> false + | s -> raise (Http_getter_bad_request ("Invalid patch_dtd value: " ^ s)) ;; -let parse_ls_format (req: Http_types.request) = +let parse_output_format (req: Http_types.request) = match req#param "format" with | s when String.lowercase s = "txt" -> Fmt_text | s when String.lowercase s = "xml" -> Fmt_xml @@ -83,25 +83,23 @@ let xml_map = new Http_getter_map.map Http_getter_env.xml_dbm in let rdf_map = new Http_getter_map.map Http_getter_env.rdf_dbm in let xsl_map = new Http_getter_map.map Http_getter_env.xsl_dbm in -let resolve uri = (* use global maps *) - let map = - match uri with - | uri when is_xml_uri uri -> xml_map - | uri when is_rdf_uri uri -> rdf_map - | uri when is_xsl_uri uri -> xsl_map - | uri -> raise (Http_getter_unresolvable_URI uri) - in - map#resolve uri +let map_of_uri = function + | uri when is_xml_uri uri -> xml_map + | uri when is_rdf_uri uri -> rdf_map + | uri when is_xsl_uri uri -> xsl_map + | uri -> raise (Http_getter_unresolvable_URI uri) in +let resolve uri = (map_of_uri uri)#resolve uri in +let register uri = (map_of_uri uri )#add uri in let return_all_foo_uris map doctype filter outchan = - Http_daemon.send_basic_headers outchan; + Http_daemon.send_basic_headers ~code:200 outchan; Http_daemon.send_header "Content-Type" "text/xml" outchan; Http_daemon.send_CRLF outchan; output_string outchan (sprintf " -; + <%s> " @@ -115,7 +113,7 @@ let return_all_foo_uris map doctype filter outchan = output_string outchan (sprintf "\t\n" uri)); output_string outchan (sprintf "\n" doctype) in -let return_all_uris = return_all_foo_uris xml_map "alluris" in +let return_all_xml_uris = return_all_foo_uris xml_map "alluris" in let return_all_rdf_uris = return_all_foo_uris rdf_map "allrdfuris" in let return_ls = let (++) (oldann, oldtypes, oldbody) (newann, newtypes, newbody) = @@ -123,93 +121,115 @@ let return_ls = (if newtypes > oldtypes then newtypes else oldtypes), (if newbody > oldbody then newbody else oldbody)) in - let basepartRE = + let basepart_RE = Pcre.regexp "^([^.]*\\.[^.]*)((\\.body)|(\\.types))?(\\.ann)?" in - fun lsuri format outchan -> - let pat = - "^" ^ (match lsuri with Cic p -> ("cic:" ^ p) | Theory p -> ("theory:" ^ p)) + let (types_RE, types_ann_RE, body_RE, body_ann_RE) = + (Pcre.regexp "\\.types", Pcre.regexp "\\.types.ann", + Pcre.regexp "\\.body", Pcre.regexp "\\.body.ann") in - let dirs = ref [] in - let objs = Hashtbl.create 17 in - xml_map#iter (* BLEARGH Dbm module lacks support for fold-like functions *) - (fun _ -> function - | uri when Pcre.pmatch ~pat:(pat ^ "/") uri -> (* directory hit *) - let dir = - List.hd (Pcre.split ~pat:"/" (Pcre.replace ~pat:(pat ^ "/") uri)) - in - dirs := dir :: !dirs - | uri when Pcre.pmatch ~pat:(pat ^ "(\\.|$)") uri -> (* object hit *) - let localpart = Pcre.replace ~pat:"^.*/" uri in - let basepart = Pcre.replace ~rex:basepartRE ~templ:"$1" localpart in - let oldflags = - try - Hashtbl.find objs basepart - with Not_found -> (false, No, No) (* no ann, no types no body *) - in - let newflags = - match localpart with - | s when Pcre.pmatch ~pat:"\\.types" s -> (false, Yes, No) - | s when Pcre.pmatch ~pat:"\\.types.ann" s -> (true, Ann, No) - | s when Pcre.pmatch ~pat:"\\.body" s -> (false, No, Yes) - | s when Pcre.pmatch ~pat:"\\.body.ann" s -> (true, No, Ann) - | s -> - raise - (Http_getter_internal_error ("Invalid /ls localpart: " ^ s)) - in - Hashtbl.replace objs basepart (oldflags ++ newflags) - | _ -> () (* miss *)); - match format with - | Fmt_text -> - let body = - "dir, " ^ (String.concat "\ndir, " (List.sort compare !dirs)) ^ "\n" ^ - (Http_getter_misc.hashtbl_sorted_fold - (fun uri (annflag, typesflag, bodyflag) cont -> - sprintf - "%sobject, %s, <%s,%s,%s>\n" - cont uri (if annflag then "YES" else "NO") - (string_of_ls_flag typesflag) (string_of_ls_flag bodyflag)) - objs "") - in Http_daemon.respond ~headers:["Content-Type", "text/txt"] ~body outchan - | Fmt_xml -> - let body = - sprintf + let (slash_RE, til_slash_RE, no_slashes_RE) = + (Pcre.regexp "/", Pcre.regexp "^.*/", Pcre.regexp "^[^/]*$") + in + fun lsuri fmt outchan -> + let pat = + "^" ^ + (match lsuri with Cic p -> ("cic:" ^ p) | Theory p -> ("theory:" ^ p)) + in + let (dir_RE, obj_RE) = + (Pcre.regexp (pat ^ "/"), Pcre.regexp (pat ^ "(\\.|$)")) + in + let dirs = ref StringSet.empty in + let objs = Hashtbl.create 17 in + let store_dir d = + dirs := StringSet.add (List.hd (Pcre.split ~rex:slash_RE d)) !dirs + in + let store_obj o = + let basepart = Pcre.replace ~rex:basepart_RE ~templ:"$1" o in + let oldflags = + try + Hashtbl.find objs basepart + with Not_found -> (false, No, No) (* no ann, no types no body *) + in + let newflags = + match o with + | s when Pcre.pmatch ~rex:types_RE s -> (false, Yes, No) + | s when Pcre.pmatch ~rex:types_ann_RE s -> (true, Ann, No) + | s when Pcre.pmatch ~rex:body_RE s -> (false, No, Yes) + | s when Pcre.pmatch ~rex:body_ann_RE s -> (true, No, Ann) + | s -> (false, No, No) + in + Hashtbl.replace objs basepart (oldflags ++ newflags) + in + xml_map#iter (* BLEARGH Dbm module lacks support for fold-like functions *) + (fun key _ -> + match key with + | uri when Pcre.pmatch ~rex:dir_RE uri -> (* directory hit *) + let localpart = Pcre.replace ~rex:dir_RE uri in + if Pcre.pmatch ~rex:no_slashes_RE localpart then + store_obj localpart + else + store_dir localpart + | uri when Pcre.pmatch ~rex:obj_RE uri -> (* file hit *) + store_obj (Pcre.replace ~rex:til_slash_RE uri) + | uri -> () (* miss *)); + match fmt with + | Fmt_text -> + let body = + (List.fold_left + (fun s d -> sprintf "%sdir, %s\n" s d) "" + (StringSet.elements !dirs)) ^ + (Http_getter_misc.hashtbl_sorted_fold + (fun uri (annflag, typesflag, bodyflag) cont -> + sprintf "%sobject, %s, <%s,%s,%s>\n" + cont uri (if annflag then "YES" else "NO") + (string_of_ls_flag typesflag) (string_of_ls_flag bodyflag)) + objs "") + in + Http_daemon.respond + ~headers:["Content-Type", "text/plain"] ~body outchan + | Fmt_xml -> + let body = + sprintf " - %s " - Http_getter_env.my_own_url - ("\n" ^ - (String.concat - "\n" - (List.map - (fun d -> "
" ^ d ^ "
") - (List.sort compare !dirs))) ^ "\n" ^ - (Http_getter_misc.hashtbl_sorted_fold - (fun uri (annflag, typesflag, bodyflag) cont -> - sprintf + Http_getter_env.my_own_url + ("\n" ^ + (String.concat + "\n" + (List.map + (fun d -> "
" ^ d ^ "
") + (StringSet.elements !dirs))) ^ "\n" ^ + (Http_getter_misc.hashtbl_sorted_fold + (fun uri (annflag, typesflag, bodyflag) cont -> + sprintf "%s -\t -\t -\t +\t +\t +\t " - cont uri (if annflag then "YES" else "NO") - (string_of_ls_flag typesflag) - (string_of_ls_flag bodyflag)) - objs "")) - in Http_daemon.respond ~headers:["Content-Type", "text/xml"] ~body outchan + cont uri (if annflag then "YES" else "NO") + (string_of_ls_flag typesflag) + (string_of_ls_flag bodyflag)) + objs "")) + in + Http_daemon.respond + ~headers:["Content-Type", "text/xml"] ~body outchan in let update_from_server logmsg server_url = (* use global maps *) + debug_print ("Updating information from " ^ server_url); let xml_url_of_uri = function (* TODO missing sanity checks on server_url, e.g. it can contains $1 *) - | uri when (Pcre.pmatch ~pat:"^cic://" uri) -> - Pcre.replace ~pat:"^cic://" ~templ:server_url uri - | uri when (Pcre.pmatch ~pat:"^theory://" uri) -> - Pcre.replace ~pat:"^theory://" ~templ:server_url uri + | uri when (Pcre.pmatch ~pat:"^cic:" uri) -> + Pcre.replace ~pat:"^cic:" ~templ:server_url uri + | uri when (Pcre.pmatch ~pat:"^theory:" uri) -> + Pcre.replace ~pat:"^theory:" ~templ:server_url uri | uri -> raise (Http_getter_invalid_URI uri) in let rdf_url_of_uri = function (* TODO as above *) @@ -239,7 +259,7 @@ let update_from_server logmsg server_url = (* use global maps *) | [uri] -> xml_map#add uri ((xml_url_of_uri uri) ^ ".xml") | _ -> log := !log ^ "Ignoring invalid line: " ^ l ^ "
\n") with Http_getter_invalid_URI uri -> - log := !log ^ "Ignoring invalid XML URI: " ^ uri) + log := !log ^ "Ignoring invalid XML URI: " ^ uri ^ "
\n") (Pcre.split ~pat:"\n+" xml_index)) (* xml_index lines *) | None -> ()); (match rdf_index with @@ -253,7 +273,7 @@ let update_from_server logmsg server_url = (* use global maps *) | [uri] -> rdf_map#add uri ((rdf_url_of_uri uri) ^ ".xml") | _ -> log := !log ^ "Ignoring invalid line: " ^ l ^ "
\n") with Http_getter_invalid_URI uri -> - log := !log ^ "Ignoring invalid RDF URI: " ^ uri) + log := !log ^ "Ignoring invalid RDF URI: " ^ uri ^ "
\n") (Pcre.split ~pat:"\n+" rdf_index)) (* rdf_index lines *) | None -> ()); (match xsl_index with @@ -278,16 +298,21 @@ let callback (req: Http_types.request) outchan = (let uri = req#param "uri" in (* common parameter *) match req#path with | "/getxml" -> - let enc = parse_format req in - let patch_dtd = parse_patch_dtd req in + let enc = parse_enc req in + let patch_dtd = + try parse_patch_dtd req with Http_types.Param_not_found _ -> true + in Http_getter_cache.respond_xml ~url:(resolve uri) ~uri ~enc ~patch_dtd outchan | "/getxslt" -> -(* let patch_dtd = parse_patch_dtd req in *) - (* TODO add support and default value for patch_dtd *) - Http_getter_cache.respond_xsl ~url:(resolve uri) outchan + let patch_dtd = + try parse_patch_dtd req with Http_types.Param_not_found _ -> true + in + Http_getter_cache.respond_xsl ~url:(resolve uri) ~patch_dtd outchan | "/getdtd" -> - let patch_dtd = parse_patch_dtd req in + let patch_dtd = + try parse_patch_dtd req with Http_types.Param_not_found _ -> true + in Http_getter_cache.respond_dtd ~patch_dtd ~url:(Http_getter_env.dtd_dir ^ "/" ^ uri) outchan | "/resolve" -> @@ -299,11 +324,13 @@ let callback (req: Http_types.request) outchan = return_xml_msg "\n" outchan) | "/register" -> let url = req#param "url" in - xml_map#add uri url; + register uri url; return_html_msg "Register done" outchan | _ -> assert false) | "/update" -> - (xml_map#clear; rdf_map#clear; xsl_map#clear; + (xml_map#clear; + rdf_map#clear; + xsl_map#clear; let log = List.fold_left update_from_server @@ -313,7 +340,7 @@ let callback (req: Http_types.request) outchan = in return_html_msg log outchan) | "/getalluris" -> - return_all_uris + return_all_xml_uris (fun uri -> (Pcre.pmatch ~pat:"^cic:" uri) && not (Pcre.pmatch ~pat:"\\.types$" uri)) @@ -331,7 +358,7 @@ let callback (req: Http_types.request) outchan = return_all_rdf_uris filter outchan with Http_getter_invalid_RDF_class c -> raise (Http_getter_bad_request ("Invalid RDF class: " ^ c))) - | "/ls" -> return_ls (parse_ls_uri req) (parse_ls_format req) outchan + | "/ls" -> return_ls (parse_ls_uri req) (parse_output_format req) outchan | "/getempty" -> Http_daemon.respond ~body:Http_getter_const.empty_xml outchan | invalid_request -> -- 2.39.2