X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2Fhttp_getter%2Fhttp_getter.ml;h=ec6564249270e627d40facb4a9af47c9670bfa95;hb=2a22e73880edeea9a67e0a8e234f056a17b60549;hp=5a84455a14c2c0d5e581ac4f230c3bca53ab82cc;hpb=9606380c2f02f5ab6ab13808023469a3ae46553c;p=helm.git diff --git a/helm/http_getter/http_getter.ml b/helm/http_getter/http_getter.ml index 5a84455a1..ec6564249 100644 --- a/helm/http_getter/http_getter.ml +++ b/helm/http_getter/http_getter.ml @@ -71,16 +71,21 @@ let parse_output_format (req: Http_types.request) = let parse_ls_uri = let parse_ls_RE = Pcre.regexp "^(\\w+):(.*)$" in let trailing_slash_RE = Pcre.regexp "/+$" in + let wrong_uri uri = + raise (Http_getter_bad_request ("Invalid /ls baseuri: " ^ uri)) + in fun (req: Http_types.request) -> let baseuri = req#param "baseuri" in - let subs = - Pcre.extract ~rex:parse_ls_RE - (Pcre.replace ~rex:trailing_slash_RE baseuri) - in - match (subs.(1), subs.(2)) with - | "cic", uri -> Cic uri - | "theory", uri -> Theory uri - | _ -> raise (Http_getter_bad_request ("Invalid /ls baseuri: " ^ baseuri)) + try + let subs = + Pcre.extract ~rex:parse_ls_RE + (Pcre.replace ~rex:trailing_slash_RE baseuri) + in + (match (subs.(1), subs.(2)) with + | "cic", uri -> Cic uri + | "theory", uri -> Theory uri + | _ -> wrong_uri baseuri) + with Not_found -> wrong_uri baseuri ;; (* global maps, shared by all threads *) @@ -90,8 +95,11 @@ let nuprl_map = new Http_getter_map.map Http_getter_env.nuprl_dbm in let rdf_map = new Http_getter_map.map Http_getter_env.rdf_dbm in let xsl_map = new Http_getter_map.map Http_getter_env.xsl_dbm in -let save_maps () = - cic_map#close; nuprl_map#close; rdf_map#close; xsl_map#close in +let maps = [ cic_map; nuprl_map; rdf_map; xsl_map ] in +let close_maps () = List.iter (fun m -> m#close) maps in +let clear_maps () = List.iter (fun m -> m#clear) maps in +let sync_maps () = List.iter (fun m -> m#sync) maps in + let map_of_uri = function | uri when is_cic_uri uri -> cic_map | uri when is_nuprl_uri uri -> nuprl_map @@ -105,7 +113,10 @@ let resolve uri = with Http_getter_map.Key_not_found _ -> raise (Http_getter_unresolvable_URI uri) in -let register uri = (map_of_uri uri )#add uri in +let register uri = + (* Warning: this fail if uri is already registered *) + (map_of_uri uri)#add uri +in let return_all_foo_uris map doctype filter outchan = (** return all URIs contained in 'map' which satisfy predicate 'filter'; URIs are written in an XMLish format ('doctype' is the XML doctype) onto 'outchan' @@ -135,17 +146,22 @@ in let return_all_xml_uris = return_all_foo_uris cic_map "alluris" in let return_all_rdf_uris = return_all_foo_uris rdf_map "allrdfuris" in let return_ls = - let (++) (oldann, oldtypes, oldbody) (newann, newtypes, newbody) = + let (++) (oldann, oldtypes, oldbody, oldtree) + (newann, newtypes, newbody, newtree) = ((if newann > oldann then newann else oldann), (if newtypes > oldtypes then newtypes else oldtypes), - (if newbody > oldbody then newbody else oldbody)) + (if newbody > oldbody then newbody else oldbody), + (if newtree > oldtree then newtree else oldtree)) in let basepart_RE = - Pcre.regexp "^([^.]*\\.[^.]*)((\\.body)|(\\.types))?(\\.ann)?" + Pcre.regexp + "^([^.]*\\.[^.]*)((\\.body)|(\\.proof_tree)|(\\.types))?(\\.ann)?$" in - let (types_RE, types_ann_RE, body_RE, body_ann_RE) = - (Pcre.regexp "\\.types", Pcre.regexp "\\.types.ann", - Pcre.regexp "\\.body", Pcre.regexp "\\.body.ann") + let (types_RE, types_ann_RE, body_RE, body_ann_RE, + proof_tree_RE, proof_tree_ann_RE) = + (Pcre.regexp "\\.types$", Pcre.regexp "\\.types\\.ann$", + Pcre.regexp "\\.body$", Pcre.regexp "\\.body\\.ann$", + Pcre.regexp "\\.proof_tree$", Pcre.regexp "\\.proof_tree\\.ann$") in let (slash_RE, til_slash_RE, no_slashes_RE) = (Pcre.regexp "/", Pcre.regexp "^.*/", Pcre.regexp "^[^/]*$") @@ -165,18 +181,22 @@ let return_ls = in let store_obj o = let basepart = Pcre.replace ~rex:basepart_RE ~templ:"$1" o in + let no_flags = false, No, No, No in let oldflags = try Hashtbl.find objs basepart - with Not_found -> (false, No, No) (* no ann, no types, no body *) + with Not_found -> (* no ann, no types, no body, no proof tree *) + no_flags in let newflags = match o with - | s when Pcre.pmatch ~rex:types_RE s -> (false, Yes, No) - | s when Pcre.pmatch ~rex:types_ann_RE s -> (true, Ann, No) - | s when Pcre.pmatch ~rex:body_RE s -> (false, No, Yes) - | s when Pcre.pmatch ~rex:body_ann_RE s -> (true, No, Ann) - | s -> (false, No, No) + | s when Pcre.pmatch ~rex:types_RE s -> (false, Yes, No, No) + | s when Pcre.pmatch ~rex:types_ann_RE s -> (true, Ann, No, No) + | s when Pcre.pmatch ~rex:body_RE s -> (false, No, Yes, No) + | s when Pcre.pmatch ~rex:body_ann_RE s -> (true, No, Ann, No) + | s when Pcre.pmatch ~rex:proof_tree_RE s -> (false, No, No, Yes) + | s when Pcre.pmatch ~rex:proof_tree_ann_RE s -> (true, No, No, Ann) + | s -> no_flags in Hashtbl.replace objs basepart (oldflags ++ newflags) in @@ -199,10 +219,12 @@ let return_ls = (fun s d -> sprintf "%sdir, %s\n" s d) "" (StringSet.elements !dirs)) ^ (Http_getter_misc.hashtbl_sorted_fold - (fun uri (annflag, typesflag, bodyflag) cont -> - sprintf "%sobject, %s, <%s,%s,%s>\n" + (fun uri (annflag, typesflag, bodyflag, treeflag) cont -> + sprintf "%sobject, %s, <%s,%s,%s,%s>\n" cont uri (if annflag then "YES" else "NO") - (string_of_ls_flag typesflag) (string_of_ls_flag bodyflag)) + (string_of_ls_flag typesflag) + (string_of_ls_flag bodyflag) + (string_of_ls_flag treeflag)) objs "") in Http_daemon.respond @@ -226,17 +248,19 @@ let return_ls = (fun d -> "
" ^ d ^ "
") (StringSet.elements !dirs))) ^ "\n" ^ (Http_getter_misc.hashtbl_sorted_fold - (fun uri (annflag, typesflag, bodyflag) cont -> + (fun uri (annflag, typesflag, bodyflag, treeflag) cont -> sprintf "%s \t \t \t +\t " cont uri (if annflag then "YES" else "NO") (string_of_ls_flag typesflag) - (string_of_ls_flag bodyflag)) + (string_of_ls_flag bodyflag) + (string_of_ls_flag treeflag)) objs "")) in Http_daemon.respond @@ -244,10 +268,11 @@ let return_ls = ~body outchan in let (index_line_sep_RE, index_sep_RE, trailing_types_RE, - heading_cic_RE, heading_theory_RE, + heading_cic_RE, heading_theory_RE, heading_nuprl_RE, heading_rdf_cic_RE, heading_rdf_theory_RE) = - (Pcre.regexp "[ \t]+", Pcre.regexp "\n+", Pcre.regexp "\\.types$", - Pcre.regexp "^cic:", Pcre.regexp "^theory:", + (Pcre.regexp "[ \t]+", Pcre.regexp "\r\n|\r|\n", + Pcre.regexp "\\.types$", + Pcre.regexp "^cic:", Pcre.regexp "^theory:", Pcre.regexp "^nuprl:", Pcre.regexp "^helm:rdf.*//cic:", Pcre.regexp "^helm:rdf.*//theory:") in let update_from_server logmsg server_url = (* use global maps *) @@ -258,6 +283,8 @@ let update_from_server logmsg server_url = (* use global maps *) Pcre.replace ~rex:heading_cic_RE ~templ:server_url uri | uri when (Pcre.pmatch ~rex:heading_theory_RE uri) -> Pcre.replace ~rex:heading_theory_RE ~templ:server_url uri + | uri when (Pcre.pmatch ~rex:heading_nuprl_RE uri) -> + Pcre.replace ~rex:heading_nuprl_RE ~templ:server_url uri | uri -> raise (Http_getter_invalid_URI uri) in let rdf_url_of_uri = function (* TODO as above *) @@ -280,19 +307,24 @@ let update_from_server logmsg server_url = (* use global maps *) | Some xml_index -> (log := !log ^ "Updating XML db ...
\n"; List.iter - (fun l -> - try - (match Pcre.split ~rex:index_line_sep_RE l with - | [uri; "gz"] -> - assert (is_cic_uri uri || is_nuprl_uri uri) ; - (map_of_uri uri)#add uri ((xml_url_of_uri uri) ^ ".xml.gz") - | [uri] -> - assert (is_cic_uri uri || is_nuprl_uri uri) ; - (map_of_uri uri)#add uri ((xml_url_of_uri uri) ^ ".xml") - | _ -> log := !log ^ "Ignoring invalid line: " ^ l ^ "
\n") - with Http_getter_invalid_URI uri -> - log := !log ^ "Ignoring invalid XML URI: " ^ uri ^ "
\n") - (Pcre.split ~rex:index_sep_RE xml_index)) (* xml_index lines *) + (function + | l when is_blank_line l -> () (* skip blank and commented lines *) + | l -> + try + (match Pcre.split ~rex:index_line_sep_RE l with + | [uri; "gz"] -> + assert (is_cic_uri uri || is_nuprl_uri uri) ; + (map_of_uri uri)#replace + uri ((xml_url_of_uri uri) ^ ".xml.gz") + | [uri] -> + assert (is_cic_uri uri || is_nuprl_uri uri) ; + (map_of_uri uri)#replace + uri ((xml_url_of_uri uri) ^ ".xml") + | _ -> + log := !log ^ "Ignoring invalid line: '" ^ l ^ "'
\n") + with Http_getter_invalid_URI uri -> + log := !log ^ "Ignoring invalid XML URI: '" ^ uri ^ "'
\n") + (Pcre.split ~rex:index_sep_RE xml_index)) (* xml_index lines *) | None -> ()); (match rdf_index with | Some rdf_index -> @@ -301,8 +333,9 @@ let update_from_server logmsg server_url = (* use global maps *) (fun l -> try (match Pcre.split ~rex:index_line_sep_RE l with - | [uri; "gz"] -> rdf_map#add uri ((rdf_url_of_uri uri) ^ ".xml.gz") - | [uri] -> rdf_map#add uri ((rdf_url_of_uri uri) ^ ".xml") + | [uri; "gz"] -> + rdf_map#replace uri ((rdf_url_of_uri uri) ^ ".xml.gz") + | [uri] -> rdf_map#replace uri ((rdf_url_of_uri uri) ^ ".xml") | _ -> log := !log ^ "Ignoring invalid line: " ^ l ^ "
\n") with Http_getter_invalid_URI uri -> log := !log ^ "Ignoring invalid RDF URI: " ^ uri ^ "
\n") @@ -312,13 +345,25 @@ let update_from_server logmsg server_url = (* use global maps *) | Some xsl_index -> (log := !log ^ "Updating XSLT db ...
\n"; List.iter - (fun l -> xsl_map#add l (server_url ^ "/" ^ l)) + (fun l -> xsl_map#replace l (server_url ^ "/" ^ l)) (Pcre.split ~rex:index_sep_RE xsl_index); log := !log ^ "All done!
\n") | None -> ()); debug_print "done with this server"; !log in +let update_from_all_servers () = (* use global maps *) + clear_maps (); + let log = + List.fold_left + update_from_server + "" (* initial logmsg: empty *) + (* reverse order: 1st server is the most important one *) + (List.rev !Http_getter_env.servers) + in + sync_maps (); + log +in (* thread action *) @@ -327,7 +372,10 @@ let callback (req: Http_types.request) outchan = debug_print ("Connection from " ^ req#clientAddr); debug_print ("Received request: " ^ req#path); (match req#path with - | "/help" -> return_html_raw Http_getter_const.usage_string outchan + | "/help" -> + return_html_raw + (Http_getter_const.usage_string (Http_getter_env.env_to_string ())) + outchan | "/getxml" | "/getxslt" | "/getdtd" | "/resolve" | "/register" -> (let uri = req#param "uri" in (* common parameter *) match req#path with @@ -355,18 +403,78 @@ let callback (req: Http_types.request) outchan = register uri url; return_html_msg "Register done" outchan | _ -> assert false) + | "/clean_cache" -> + Http_getter_cache.clean (); + return_html_msg "Done." outchan | "/update" -> - (Http_getter_env.reload (); (* reload servers list from servers file *) - cic_map#clear; nuprl_map#clear; rdf_map#clear; xsl_map#clear; - let log = - List.fold_left - update_from_server - "" (* initial logmsg: empty *) - (* reverse order: 1st server is the most important one *) - (List.rev !Http_getter_env.servers) + Http_getter_env.reload (); (* reload servers list from servers file *) + let log = update_from_all_servers () in + return_html_msg log outchan + | "/list_servers" -> + return_html_raw + (sprintf "\n%s\n
" + (String.concat "\n" + (List.map + (let i = ref ~-1 in + fun s -> incr i; sprintf "%d%s" !i s) + !Http_getter_env.servers))) + outchan + | "/add_server" -> + let name = req#param "url" in + (try + let position = + try + let res = int_of_string (req#param "position") in + if res < 0 then + raise (Failure "int_of_string"); + res + with Failure "int_of_string" -> + raise (Http_getter_bad_request + (sprintf "position must be a non negative integer (%s given)" + (req#param "position"))) + in + if position = 0 then (* fallback to default value *) + raise (Http_types.Param_not_found "foo") + else if position > 0 then begin (* add server and update all *) + Http_getter_env.add_server ~position name; + let log = update_from_all_servers () in + return_html_msg + (sprintf "Added server %s in position %d)
\n%s" + name position log) + outchan + end else (* position < 0 *) (* error! *) + assert false (* already checked above *) + with Http_types.Param_not_found _ -> (* add as 1st server by default *) + Http_getter_env.add_server ~position:0 name; + let log = update_from_server (* quick update (new server only) *) + (sprintf "Added server %s in head position
\n" name) name + in + return_html_msg log outchan) + | "/remove_server" -> + let position = + try + let res = int_of_string (req#param "position") in + if res < 0 then + raise (Failure "int_of_string"); + res + with Failure "int_of_string" -> + raise (Http_getter_bad_request + (sprintf "position must be a non negative integer (%s given)" + (req#param "position"))) in - cic_map#sync; nuprl_map#sync; rdf_map#sync; xsl_map#sync; - return_html_msg log outchan) + let server_name = + try + List.nth !Http_getter_env.servers position + with Failure "nth" -> + raise (Http_getter_bad_request + (sprintf "no server with position %d" position)) + in + Http_getter_env.remove_server position; + let log = update_from_all_servers () in + return_html_msg + (sprintf "Removed server %s (position %d)
\n%s" + server_name position log) + outchan | "/getalluris" -> return_all_xml_uris (fun uri -> @@ -415,14 +523,15 @@ in (* daemon initialization *) let main () = - Http_getter_env.dump_env (); + print_string (Http_getter_env.env_to_string ()); + flush stdout; Unix.putenv "http_proxy" ""; - at_exit save_maps; + at_exit close_maps; Sys.catch_break true; try Http_daemon.start' ~timeout:(Some 600) ~port:Http_getter_env.port ~mode:`Thread callback - with Sys.Break -> () (* 'save_maps' already registered with 'at_exit' *) + with Sys.Break -> () (* 'close_maps' already registered with 'at_exit' *) in main ()