X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2Focaml%2Fgetter%2Fhttp_getter.ml;h=d0ecf9ba22d241851933a343fd6406a82cc390cb;hb=acf29bdbdcdc6ad8c2d9d27e8a47500981b605cd;hp=a9884596ba62fe71ab7acfaf8f6981a49bf73c0e;hpb=11469c708704f96fde66e5bbc297f51a58173fb1;p=helm.git diff --git a/helm/ocaml/getter/http_getter.ml b/helm/ocaml/getter/http_getter.ml index a9884596b..d0ecf9ba2 100644 --- a/helm/ocaml/getter/http_getter.ml +++ b/helm/ocaml/getter/http_getter.ml @@ -67,10 +67,51 @@ let rdf_map = let xsl_map = lazy (new Http_getter_map.map (Lazy.force Http_getter_env.xsl_dbm)) +let uri_tree = ref None +let deref_if_some r = + match !r with + | None -> assert false + | Some x -> x +let is_prefetch_on () = + match !uri_tree with None -> false | Some _ -> true + +let dump_tree () = + let path = Lazy.force Http_getter_env.dump_file in + Tree.save_to_disk path (deref_if_some uri_tree); + Http_getter_md5.create_hash [ + (Lazy.force Http_getter_env.cic_dbm_real); + path ] + +let load_tree () = + if not (Http_getter_md5.check_hash ()) then + assert false + else + uri_tree := Some (Tree.load_from_disk + (Lazy.force Http_getter_env.dump_file)) + +let sync_with_map () = + if not (Http_getter_md5.check_hash ()) then begin + let tree = ref (Some Tree.empty_tree) in + Http_getter_logger.log "Updating cic map dump..."; + let t = Unix.time () in + (Lazy.force cic_map)#iter + (fun k _ -> + tree := Some (Tree.add_uri k (deref_if_some tree))); + uri_tree := !tree; + Http_getter_logger.log + (sprintf "done in %.0f sec" (Unix.time () -. t)); + dump_tree () + end else begin + Http_getter_logger.log "Cic map dump is up to date!"; + load_tree () (* XXX TASSI: race condition here *) + end + let maps = [ cic_map; nuprl_map; rdf_map; xsl_map ] let close_maps () = List.iter (fun m -> (Lazy.force m) # close) maps let clear_maps () = List.iter (fun m -> (Lazy.force m) # clear) maps -let sync_maps () = List.iter (fun m -> (Lazy.force m) # sync) maps +let sync_maps () = + List.iter (fun m -> (Lazy.force m) # sync) maps; + sync_with_map () let map_of_uri = function | uri when is_cic_uri uri -> Lazy.force cic_map @@ -222,13 +263,16 @@ let resolve_remote uri = let doc = ClientHTTP.get (sprintf "%sresolve?uri=%s" (getter_url ()) uri) in let res = ref Unknown in Pxp_ev_parser.process_entity PxpHelmConf.pxp_config (`Entry_content []) - (Pxp_ev_parser.create_entity_manager ~is_document:true PxpHelmConf.pxp_config - (Pxp_yacc.from_string doc)) + (Pxp_ev_parser.create_entity_manager ~is_document:true + PxpHelmConf.pxp_config (Pxp_yacc.from_string doc)) (function | Pxp_types.E_start_tag ("url",["value",url],_,_) -> res := Resolved url - | Pxp_types.E_start_tag ("unresolved",[],_,_) -> + | Pxp_types.E_start_tag ("unresolvable",[],_,_) -> res := Exception (Unresolvable_URI uri) - | Pxp_types.E_start_tag _ -> res := Exception UnexpectedGetterOutput + | Pxp_types.E_start_tag ("not_found",[],_,_) -> + res := Exception (Key_not_found uri) + | Pxp_types.E_start_tag (x,_,_,_) -> + res := Exception UnexpectedGetterOutput | _ -> ()); match !res with | Unknown -> raise UnexpectedGetterOutput @@ -238,8 +282,8 @@ let resolve_remote uri = let register_remote ~uri ~url = ClientHTTP.send (sprintf "%sregister?uri=%s&url=%s" (getter_url ()) uri url) -let register_remote ~uri ~url = - ClientHTTP.send (sprintf "%sregister?uri=%s&url=%s" (getter_url ()) uri url) +let unregister_remote uri = + ClientHTTP.send (sprintf "%sunregister?uri=%s" (getter_url ()) uri) let update_remote logger () = let answer = ClientHTTP.get (getter_url () ^ "update") in @@ -247,11 +291,13 @@ let update_remote logger () = logger `BR let getxml_remote ~format ~patch_dtd uri = - ClientHTTP.get_and_save_to_tmp - (sprintf "%sgetxml?uri=%s&format=%s&patch_dtd=%s" + let uri = + sprintf "%sgetxml?uri=%s&format=%s&patch_dtd=%s" (getter_url ()) uri - (match format with `Normal -> "normal" | `Gzipped -> "gzipped") - (match patch_dtd with true -> "yes" | false -> "no")) + (match format with `Normal -> "normal" | `Gzipped -> "gz") + (match patch_dtd with true -> "yes" | false -> "no") + in + ClientHTTP.get_and_save_to_tmp uri (* API *) @@ -261,16 +307,29 @@ let resolve uri = if remote () then resolve_remote uri else - try - (map_of_uri uri)#resolve uri - with Http_getter_map.Key_not_found _ -> raise (Unresolvable_URI uri) - - (* Warning: this fail if uri is already registered *) + (map_of_uri uri)#resolve uri + let register ~uri ~url = if remote () then register_remote ~uri ~url else - (map_of_uri uri)#add uri url + begin + (map_of_uri uri)#add uri url; + if is_prefetch_on () then + uri_tree := Some (Tree.add_uri uri (deref_if_some uri_tree)) + end + +let unregister uri = + if remote () then + unregister_remote uri + else + try + begin + (map_of_uri uri)#remove uri; + if is_prefetch_on () then + uri_tree := Some (Tree.remove_uri uri (deref_if_some uri_tree)) + end + with Key_not_found _ -> () let update ?(logger = fun _ -> ()) () = if remote () then @@ -278,11 +337,13 @@ let update ?(logger = fun _ -> ()) () = else update_from_all_servers logger () -let getxml ?(format = `Normal) ?(patch_dtd = true) uri = +let getxml ?(format = `Gzipped) ?(patch_dtd = false) uri = if remote () then getxml_remote ~format ~patch_dtd uri else begin + Http_getter_logger.log ~level:2 ("getxml: " ^ uri); let url = resolve uri in + Http_getter_logger.log ~level:2 ("resolved_uri: " ^ url) ; let (fname, outchan) = temp_file_of_uri uri in Http_getter_cache.respond_xml ~via_http:false ~enc:format ~patch:patch_dtd ~uri ~url outchan; @@ -294,6 +355,7 @@ let getxslt ?(patch_dtd = true) uri = if remote () then getxslt_remote ~patch_dtd uri else begin + let url = resolve uri in let (fname, outchan) = temp_file_of_uri uri in Http_getter_cache.respond_xsl ~via_http:false ~url ~patch:patch_dtd outchan; @@ -364,8 +426,8 @@ let getalluris () = getalluris_remote () else let filter uri = - (Pcre.pmatch ~rex:heading_cic_RE uri) && - not (Pcre.pmatch ~rex:trailing_types_RE uri) + (Pcre.pmatch ~rex:heading_cic_RE uri) +(* && not (Pcre.pmatch ~rex:trailing_types_RE uri) *) in return_uris (Lazy.force cic_map) filter @@ -381,145 +443,219 @@ let getallrdfuris classs = in return_uris (Lazy.force rdf_map) filter -let ls = - let (++) (oldann, oldtypes, oldbody, oldtree) - (newann, newtypes, newbody, newtree) = - ((if newann > oldann then newann else oldann), - (if newtypes > oldtypes then newtypes else oldtypes), - (if newbody > oldbody then newbody else oldbody), - (if newtree > oldtree then newtree else oldtree)) - in - let basepart_RE = - Pcre.regexp - "^([^.]*\\.[^.]*)((\\.body)|(\\.proof_tree)|(\\.types))?(\\.ann)?$" - in - let (types_RE, types_ann_RE, body_RE, body_ann_RE, - proof_tree_RE, proof_tree_ann_RE, trailing_slash_RE, theory_RE) = - (Pcre.regexp "\\.types$", Pcre.regexp "\\.types\\.ann$", - Pcre.regexp "\\.body$", Pcre.regexp "\\.body\\.ann$", - Pcre.regexp "\\.proof_tree$", Pcre.regexp "\\.proof_tree\\.ann$", - Pcre.regexp "/$", Pcre.regexp "\\.theory$") - in - let (slash_RE, til_slash_RE, no_slashes_RE) = - (Pcre.regexp "/", Pcre.regexp "^.*/", Pcre.regexp "^[^/]*$") - in - fun regexp -> - if remote () then - ls_remote regexp - else begin - let looking_for_dir = Pcre.pmatch ~rex:trailing_slash_RE regexp in - let pat = Pcre.replace ~rex:trailing_slash_RE ("^" ^ regexp) in - let (dir_RE, dir_RE2, obj_RE, orig_theory_RE) = - Pcre.regexp (pat ^ "/"), Pcre.regexp (pat ^ "/[^/]*"), - Pcre.regexp (pat ^ "(\\.|$)"), Pcre.regexp (pat ^ ".theory$") - in - let dirs = ref StringSet.empty in - let objs = Hashtbl.create 17 in - let store_dir d = - dirs := StringSet.add (List.hd (Pcre.split ~rex:slash_RE d)) !dirs + +let (++) (oldann, oldtypes, oldbody, oldtree) + (newann, newtypes, newbody, newtree) = + ((if newann > oldann then newann else oldann), + (if newtypes > oldtypes then newtypes else oldtypes), + (if newbody > oldbody then newbody else oldbody), + (if newtree > oldtree then newtree else oldtree)) + +let (types_RE, types_ann_RE, body_RE, body_ann_RE, + proof_tree_RE, proof_tree_ann_RE, trailing_slash_RE, theory_RE) = + (Pcre.regexp "\\.types$", Pcre.regexp "\\.types\\.ann$", + Pcre.regexp "\\.body$", Pcre.regexp "\\.body\\.ann$", + Pcre.regexp "\\.proof_tree$", Pcre.regexp "\\.proof_tree\\.ann$", + Pcre.regexp "/$", Pcre.regexp "\\.theory$") + +let basepart_RE = + Pcre.regexp + "^([^.]*\\.[^.]*)((\\.body)|(\\.proof_tree)|(\\.types))?(\\.ann)?$" + +let (slash_RE, til_slash_RE, no_slashes_RE) = + (Pcre.regexp "/", Pcre.regexp "^.*/", Pcre.regexp "^[^/]*$") +let fix_regexp_RE = Pcre.regexp ("^" ^ (Pcre.quote "(cic|theory)")) + +let ls regexp = + if remote () then + ls_remote regexp + else begin + let looking_for_dir = Pcre.pmatch ~rex:trailing_slash_RE regexp in + let pat = Pcre.replace ~rex:trailing_slash_RE ("^" ^ regexp) in + let (dir_RE, dir_local_RE, obj_RE, first_comp_RE) = + Pcre.regexp (pat ^ "/"), Pcre.regexp "[^/]+/[^/]*", + Pcre.regexp (pat ^ "(\\.|$)"), Pcre.regexp "/.*" + in + let exists_theory regexp = + let theory = + Pcre.replace ~rex:fix_regexp_RE ~templ:"theory" regexp ^ "index.theory" in - let store_obj o = - let basepart = Pcre.replace ~rex:basepart_RE ~templ:"$1" o in - let no_flags = false, No, No, No in - let oldflags = - try - Hashtbl.find objs basepart - with Not_found -> (* no ann, no types, no body, no proof tree *) - no_flags - in - let newflags = - match o with - | s when Pcre.pmatch ~rex:types_RE s -> (false, Yes, No, No) - | s when Pcre.pmatch ~rex:types_ann_RE s -> (true, Ann, No, No) - | s when Pcre.pmatch ~rex:body_RE s -> (false, No, Yes, No) - | s when Pcre.pmatch ~rex:body_ann_RE s -> (true, No, Ann, No) - | s when Pcre.pmatch ~rex:proof_tree_RE s -> (false, No, No, Yes) - | s when Pcre.pmatch ~rex:proof_tree_ann_RE s -> (true, No, No, Ann) - | s -> no_flags - in - Hashtbl.replace objs basepart (oldflags ++ newflags) + try + ignore (resolve theory); + true + with Key_not_found _ -> false + in + let toplevel_theory = + match List.rev (Pcre.split ~rex:slash_RE pat) with + | dir :: _ -> Some (dir ^ ".theory") + | _ -> None + in + let dirs = ref StringSet.empty in + let objs = Hashtbl.create 17 in + let store_dir d = + dirs := StringSet.add (List.hd (Pcre.split ~rex:slash_RE d)) !dirs + in + let store_obj o = + let basepart = Pcre.replace ~rex:basepart_RE ~templ:"$1" o in + let no_flags = false, No, No, No in + let oldflags = + try + Hashtbl.find objs basepart + with Not_found -> (* no ann, no types, no body, no proof tree *) + no_flags in - (* Variables used in backward compatibility code to map - theory:/path/t.theory into theory:/path/t/index.theory - when cic:/path/t/ exists *) - let the_candidate_for_remapping = - (* CSC: Here I am making a strong assumption: the pattern - can be only of the form [^:]*:/path where path is - NOT a regular expression *) - "theory:" ^ Pcre.replace ~rex:(Pcre.regexp "[^:]*:") pat + let newflags = + match o with + | s when Pcre.pmatch ~rex:types_RE s -> (false, Yes, No, No) + | s when Pcre.pmatch ~rex:types_ann_RE s -> (true, Ann, No, No) + | s when Pcre.pmatch ~rex:body_RE s -> (false, No, Yes, No) + | s when Pcre.pmatch ~rex:body_ann_RE s -> (true, No, Ann, No) + | s when Pcre.pmatch ~rex:proof_tree_RE s -> (false, No, No, Yes) + | s when Pcre.pmatch ~rex:proof_tree_ann_RE s -> (true, No, No, Ann) + | s -> no_flags in - let index_not_generated_yet = ref true in - let valid_candidates = ref [] in - let candidates_found = ref [] in - (Lazy.force cic_map) # iter - (* BLEARGH Dbm module lacks support for fold-like functions *) - (fun key _ -> - match key with - | uri when looking_for_dir && Pcre.pmatch ~rex:dir_RE uri -> - (* directory hit *) - let localpart = Pcre.replace ~rex:dir_RE uri in - if Pcre.pmatch ~rex:no_slashes_RE localpart then - begin - (* Backward compatibility code to map - theory:/path/t.theory into theory:/path/t/index.theory - when cic:/path/t/ exists *) - if Pcre.pmatch ~rex:theory_RE localpart then - candidates_found := (uri,localpart) :: !candidates_found - else - store_obj localpart - - end + Hashtbl.replace objs basepart (oldflags ++ newflags) + in + (* Variables used in backward compatibility code to map + theory:/path/t.theory into theory:/path/t/index.theory + when cic:/path/t/ exists *) + let the_candidate_for_remapping = + (* CSC: Here I am making a strong assumption: the pattern + can be only of the form [^:]*:/path where path is + NOT a regular expression *) + "theory:" ^ Pcre.replace ~rex:(Pcre.regexp "[^:]*:") pat + in + let index_not_generated_yet = ref true in + let valid_candidates = ref [] in + let candidates_found = ref [] in + + (*(Lazy.force cic_map) # iter*) + + (* depending on prefetch *) + let if_prefetch if_is if_not = + if is_prefetch_on() then if_is else if_not + in + + let iter_on_right_thing = if_prefetch + (fun f -> List.iter (fun k -> f k "") + (Tree.ls_path regexp (deref_if_some uri_tree))) + (fun f -> (Lazy.force cic_map) # iter f) + in + let calculate_localpart = if_prefetch + (fun uri -> uri) + (fun uri -> Pcre.replace ~rex:dir_RE uri) + in + let check_if_x_RE = if_prefetch + (fun x_RE uri -> true) + (fun x_RE uri -> Pcre.pmatch ~rex:x_RE uri) + in + let check_if_dir_RE = check_if_x_RE dir_RE in + let check_if_obj_RE = check_if_x_RE obj_RE in + + iter_on_right_thing + (fun key _ -> + (* we work in two ways: + * 1 iteration on the whole map + * 2 tree visit + * + * Since in the first case 'key' is a complete uri, while + * in the second case it is only the subtree rooted in the + * query regex, we must relay only on the localpath. + * + * example: + * query::= cic:/aaa/bbb/ + * + * answer1 ::= the whole map + * + * aswer2 ::= [ "ccc/"; "c1.something"] where + * cic:/aaa/bbb/ccc/ and cic:/aaa/bbb/c1.something + * are the (partials) uri that matched query + * + * after the localpath extracion we have more uris in the first case, + * but at least the are all rooted in the same node. + * + * the Tree.get_frontier may be changed to return the same stuff as + * the map iteration+localpath extraction does, but I hope it is not + * necessary + *) + match key with + | uri when looking_for_dir && check_if_dir_RE uri -> + (* directory hit *) + let localpart = calculate_localpart uri in + if Pcre.pmatch ~rex:no_slashes_RE localpart then + begin + (* Backward compatibility code to map + theory:/path/t.theory into theory:/path/t/index.theory + when cic:/path/t/ exists *) + if Pcre.pmatch ~rex:theory_RE localpart then + candidates_found := localpart :: !candidates_found else + store_obj localpart + end + else + begin + store_dir localpart ; + if Pcre.pmatch localpart ~rex:dir_local_RE then begin - store_dir localpart ; - if String.sub uri 0 3 = "cic" then - let dir_found = ref "" in - let _ = - Pcre.substitute_first ~rex:dir_RE2 - ~subst:(fun s -> dir_found := s; "") uri in - let dir = - "theory" ^ String.sub !dir_found 3 - (String.length !dir_found - 3) ^ ".theory" in -(* -prerr_endline ("### " ^ uri ^ " ==> " ^ !dir_found ^ " ==> " ^ dir); -*) - if not (List.mem dir !valid_candidates) then - valid_candidates := dir::!valid_candidates + let valid = + Pcre.replace ~rex:first_comp_RE localpart ^ ".theory" + in + if not (List.mem valid !valid_candidates) then + valid_candidates := valid::!valid_candidates end - | uri when (not looking_for_dir) && Pcre.pmatch ~rex:obj_RE uri -> - (* file hit *) - store_obj (Pcre.replace ~rex:til_slash_RE uri) - | uri -> (* miss *) - if !index_not_generated_yet && - Pcre.pmatch ~rex:orig_theory_RE uri - then - (index_not_generated_yet := false ; - store_obj "index.theory")); + end + | uri when (not looking_for_dir) && check_if_obj_RE uri -> + (* file hit *) + store_obj (Pcre.replace ~rex:til_slash_RE uri) + | uri -> ()); (* -prerr_endline ("@@@ " ^ String.concat " " !valid_candidates); -prerr_endline ("!!! " ^ String.concat " " (List.map fst !candidates_found)); -*) - List.iter - (fun (uri,localpart) -> - if not (List.mem uri !valid_candidates) then - store_obj localpart - ) !candidates_found ; - let ls_items = ref [] in - StringSet.iter (fun dir -> ls_items := Ls_section dir :: !ls_items) !dirs; - Http_getter_misc.hashtbl_sorted_iter - (fun uri (annflag, typesflag, bodyflag, treeflag) -> - ls_items := - Ls_object { - uri = uri; ann = annflag; - types = typesflag; body = bodyflag; proof_tree = treeflag - } :: !ls_items) - objs; - List.rev !ls_items - end + (* miss *) + if !index_not_generated_yet && + Pcre.pmatch ~rex:orig_theory_RE uri + then + (index_not_generated_yet := false ; + store_obj "index.theory")); + *) + if exists_theory regexp then store_obj "index.theory"; + List.iter + (fun localpart -> + if not (List.mem localpart !valid_candidates) then + store_obj localpart + ) !candidates_found ; + let ls_items = ref [] in + StringSet.iter (fun dir -> ls_items := Ls_section dir :: !ls_items) !dirs; + Http_getter_misc.hashtbl_sorted_iter + (fun uri (annflag, typesflag, bodyflag, treeflag) -> + ls_items := + Ls_object { + uri = uri; ann = annflag; + types = typesflag; body = bodyflag; proof_tree = treeflag + } :: !ls_items) + objs; + List.rev !ls_items + end - (* Shorthands from now on *) +(* Shorthands from now on *) let getxml' uri = getxml (UriManager.string_of_uri uri) let resolve' uri = resolve (UriManager.string_of_uri uri) let register' uri url = register ~uri:(UriManager.string_of_uri uri) ~url +let unregister' uri = unregister (UriManager.string_of_uri uri) + +let sync_dump_file () = + if is_prefetch_on () then + dump_tree () + +let init () = + Http_getter_logger.set_log_level + (Helm_registry.get_opt_default Helm_registry.get_int 1 "getter.log_level"); + Http_getter_logger.set_log_file + (Helm_registry.get_opt Helm_registry.get_string "getter.log_file"); + Http_getter_env.reload (); + let is_prefetch_set = + Helm_registry.get_opt_default Helm_registry.get_bool false "getter.prefetch" + in + if is_prefetch_set then + (* ignore (Thread.create sync_with_map ()) *) + sync_with_map ()