let xsl_map =
lazy (new Http_getter_map.map (Lazy.force Http_getter_env.xsl_dbm))
+let uri_tree = ref None
+let deref_if_some r =
+ match !r with
+ | None -> assert false
+ | Some x -> x
+let is_prefetch_on () =
+ match !uri_tree with None -> false | Some _ -> true
+
+let dump_tree () =
+ let path = Lazy.force Http_getter_env.dump_file in
+ Tree.save_to_disk path (deref_if_some uri_tree);
+ Http_getter_md5.create_hash [
+ (Lazy.force Http_getter_env.cic_dbm_real);
+ path ]
+
+let load_tree () =
+ if not (Http_getter_md5.check_hash ()) then
+ assert false
+ else
+ uri_tree := Some (Tree.load_from_disk
+ (Lazy.force Http_getter_env.dump_file))
+
+let sync_with_map () =
+ if not (Http_getter_md5.check_hash ()) then begin
+ let tree = ref (Some Tree.empty_tree) in
+ Http_getter_logger.log "Updating cic map dump...";
+ let t = Unix.time () in
+ (Lazy.force cic_map)#iter
+ (fun k _ ->
+ tree := Some (Tree.add_uri k (deref_if_some tree)));
+ uri_tree := !tree;
+ Http_getter_logger.log
+ (sprintf "done in %.0f sec" (Unix.time () -. t));
+ dump_tree ()
+ end else begin
+ Http_getter_logger.log "Cic map dump is up to date!";
+ load_tree () (* XXX TASSI: race condition here *)
+ end
+
let maps = [ cic_map; nuprl_map; rdf_map; xsl_map ]
let close_maps () = List.iter (fun m -> (Lazy.force m) # close) maps
let clear_maps () = List.iter (fun m -> (Lazy.force m) # clear) maps
-let sync_maps () = List.iter (fun m -> (Lazy.force m) # sync) maps
+let sync_maps () =
+ List.iter (fun m -> (Lazy.force m) # sync) maps;
+ sync_with_map ()
let map_of_uri = function
| uri when is_cic_uri uri -> Lazy.force cic_map
if remote () then
register_remote ~uri ~url
else
- (map_of_uri uri)#add uri url
+ begin
+ (map_of_uri uri)#add uri url;
+ if is_prefetch_on () then
+ uri_tree := Some (Tree.add_uri uri (deref_if_some uri_tree))
+ end
let unregister uri =
if remote () then
unregister_remote uri
else
try
- (map_of_uri uri)#remove uri
+ begin
+ (map_of_uri uri)#remove uri;
+ if is_prefetch_on () then
+ uri_tree := Some (Tree.remove_uri uri (deref_if_some uri_tree))
+ end
with Key_not_found _ -> ()
let update ?(logger = fun _ -> ()) () =
in
return_uris (Lazy.force rdf_map) filter
-let ls =
- let (++) (oldann, oldtypes, oldbody, oldtree)
- (newann, newtypes, newbody, newtree) =
- ((if newann > oldann then newann else oldann),
- (if newtypes > oldtypes then newtypes else oldtypes),
- (if newbody > oldbody then newbody else oldbody),
- (if newtree > oldtree then newtree else oldtree))
- in
- let basepart_RE =
- Pcre.regexp
- "^([^.]*\\.[^.]*)((\\.body)|(\\.proof_tree)|(\\.types))?(\\.ann)?$"
- in
- let (types_RE, types_ann_RE, body_RE, body_ann_RE,
- proof_tree_RE, proof_tree_ann_RE, trailing_slash_RE, theory_RE) =
- (Pcre.regexp "\\.types$", Pcre.regexp "\\.types\\.ann$",
- Pcre.regexp "\\.body$", Pcre.regexp "\\.body\\.ann$",
- Pcre.regexp "\\.proof_tree$", Pcre.regexp "\\.proof_tree\\.ann$",
- Pcre.regexp "/$", Pcre.regexp "\\.theory$")
- in
- let (slash_RE, til_slash_RE, no_slashes_RE) =
- (Pcre.regexp "/", Pcre.regexp "^.*/", Pcre.regexp "^[^/]*$")
- in
- fun regexp ->
- if remote () then
- ls_remote regexp
- else begin
- let looking_for_dir = Pcre.pmatch ~rex:trailing_slash_RE regexp in
- let pat = Pcre.replace ~rex:trailing_slash_RE ("^" ^ regexp) in
- let (dir_RE, dir_RE2, obj_RE, orig_theory_RE) =
- Pcre.regexp (pat ^ "/"), Pcre.regexp (pat ^ "/[^/]*"),
- Pcre.regexp (pat ^ "(\\.|$)"), Pcre.regexp (pat ^ ".theory$")
- in
- let dirs = ref StringSet.empty in
- let objs = Hashtbl.create 17 in
- let store_dir d =
- dirs := StringSet.add (List.hd (Pcre.split ~rex:slash_RE d)) !dirs
- in
- let store_obj o =
- let basepart = Pcre.replace ~rex:basepart_RE ~templ:"$1" o in
- let no_flags = false, No, No, No in
- let oldflags =
- try
- Hashtbl.find objs basepart
- with Not_found -> (* no ann, no types, no body, no proof tree *)
- no_flags
- in
- let newflags =
- match o with
- | s when Pcre.pmatch ~rex:types_RE s -> (false, Yes, No, No)
- | s when Pcre.pmatch ~rex:types_ann_RE s -> (true, Ann, No, No)
- | s when Pcre.pmatch ~rex:body_RE s -> (false, No, Yes, No)
- | s when Pcre.pmatch ~rex:body_ann_RE s -> (true, No, Ann, No)
- | s when Pcre.pmatch ~rex:proof_tree_RE s -> (false, No, No, Yes)
- | s when Pcre.pmatch ~rex:proof_tree_ann_RE s -> (true, No, No, Ann)
- | s -> no_flags
- in
- Hashtbl.replace objs basepart (oldflags ++ newflags)
+
+let (++) (oldann, oldtypes, oldbody, oldtree)
+ (newann, newtypes, newbody, newtree) =
+ ((if newann > oldann then newann else oldann),
+ (if newtypes > oldtypes then newtypes else oldtypes),
+ (if newbody > oldbody then newbody else oldbody),
+ (if newtree > oldtree then newtree else oldtree))
+
+let (types_RE, types_ann_RE, body_RE, body_ann_RE,
+ proof_tree_RE, proof_tree_ann_RE, trailing_slash_RE, theory_RE) =
+ (Pcre.regexp "\\.types$", Pcre.regexp "\\.types\\.ann$",
+ Pcre.regexp "\\.body$", Pcre.regexp "\\.body\\.ann$",
+ Pcre.regexp "\\.proof_tree$", Pcre.regexp "\\.proof_tree\\.ann$",
+ Pcre.regexp "/$", Pcre.regexp "\\.theory$")
+
+let basepart_RE =
+ Pcre.regexp
+ "^([^.]*\\.[^.]*)((\\.body)|(\\.proof_tree)|(\\.types))?(\\.ann)?$"
+
+let (slash_RE, til_slash_RE, no_slashes_RE) =
+ (Pcre.regexp "/", Pcre.regexp "^.*/", Pcre.regexp "^[^/]*$")
+
+let ls regexp =
+ if remote () then
+ ls_remote regexp
+ else begin
+ let looking_for_dir = Pcre.pmatch ~rex:trailing_slash_RE regexp in
+ let pat = Pcre.replace ~rex:trailing_slash_RE ("^" ^ regexp) in
+ let (dir_RE, dir_local_RE, obj_RE, first_comp_RE) =
+ Pcre.regexp (pat ^ "/"), Pcre.regexp "[^/]+/[^/]*",
+ Pcre.regexp (pat ^ "(\\.|$)"), Pcre.regexp "/.*"
+ in
+ let toplevel_theory =
+ match List.rev (Pcre.split ~rex:slash_RE pat) with
+ | dir :: _ -> Some (dir ^ ".theory")
+ | _ -> None
+ in
+ let dirs = ref StringSet.empty in
+ let objs = Hashtbl.create 17 in
+ let store_dir d =
+ dirs := StringSet.add (List.hd (Pcre.split ~rex:slash_RE d)) !dirs
+ in
+ let store_obj o =
+ let basepart = Pcre.replace ~rex:basepart_RE ~templ:"$1" o in
+ let no_flags = false, No, No, No in
+ let oldflags =
+ try
+ Hashtbl.find objs basepart
+ with Not_found -> (* no ann, no types, no body, no proof tree *)
+ no_flags
in
- (* Variables used in backward compatibility code to map
- theory:/path/t.theory into theory:/path/t/index.theory
- when cic:/path/t/ exists *)
- let the_candidate_for_remapping =
- (* CSC: Here I am making a strong assumption: the pattern
- can be only of the form [^:]*:/path where path is
- NOT a regular expression *)
- "theory:" ^ Pcre.replace ~rex:(Pcre.regexp "[^:]*:") pat
+ let newflags =
+ match o with
+ | s when Pcre.pmatch ~rex:types_RE s -> (false, Yes, No, No)
+ | s when Pcre.pmatch ~rex:types_ann_RE s -> (true, Ann, No, No)
+ | s when Pcre.pmatch ~rex:body_RE s -> (false, No, Yes, No)
+ | s when Pcre.pmatch ~rex:body_ann_RE s -> (true, No, Ann, No)
+ | s when Pcre.pmatch ~rex:proof_tree_RE s -> (false, No, No, Yes)
+ | s when Pcre.pmatch ~rex:proof_tree_ann_RE s -> (true, No, No, Ann)
+ | s -> no_flags
in
- let index_not_generated_yet = ref true in
- let valid_candidates = ref [] in
- let candidates_found = ref [] in
- (Lazy.force cic_map) # iter
- (* BLEARGH Dbm module lacks support for fold-like functions *)
- (fun key _ ->
- match key with
- | uri when looking_for_dir && Pcre.pmatch ~rex:dir_RE uri ->
- (* directory hit *)
- let localpart = Pcre.replace ~rex:dir_RE uri in
- if Pcre.pmatch ~rex:no_slashes_RE localpart then
- begin
- (* Backward compatibility code to map
- theory:/path/t.theory into theory:/path/t/index.theory
- when cic:/path/t/ exists *)
- if Pcre.pmatch ~rex:theory_RE localpart then
- candidates_found := (uri,localpart) :: !candidates_found
- else
- store_obj localpart
+ Hashtbl.replace objs basepart (oldflags ++ newflags)
+ in
+ (* Variables used in backward compatibility code to map
+ theory:/path/t.theory into theory:/path/t/index.theory
+ when cic:/path/t/ exists *)
+ let the_candidate_for_remapping =
+ (* CSC: Here I am making a strong assumption: the pattern
+ can be only of the form [^:]*:/path where path is
+ NOT a regular expression *)
+ "theory:" ^ Pcre.replace ~rex:(Pcre.regexp "[^:]*:") pat
+ in
+ let index_not_generated_yet = ref true in
+ let valid_candidates = ref [] in
+ let candidates_found = ref [] in
+
+ (*(Lazy.force cic_map) # iter*)
- end
+ (* depending on prefetch *)
+ let if_prefetch if_is if_not =
+ if is_prefetch_on() then if_is else if_not
+ in
+
+ let iter_on_right_thing = if_prefetch
+ (fun f -> List.iter (fun k -> f k "")
+ (Tree.ls_path regexp (deref_if_some uri_tree)))
+ (fun f -> (Lazy.force cic_map) # iter f)
+ in
+ let calculate_localpart = if_prefetch
+ (fun uri -> uri)
+ (fun uri -> Pcre.replace ~rex:dir_RE uri)
+ in
+ let check_if_x_RE = if_prefetch
+ (fun x_RE uri -> true)
+ (fun x_RE uri -> Pcre.pmatch ~rex:x_RE uri)
+ in
+ let check_if_dir_RE = check_if_x_RE dir_RE in
+ let check_if_obj_RE = check_if_x_RE obj_RE in
+
+ iter_on_right_thing
+ (fun key _ ->
+ (* we work in two ways:
+ * 1 iteration on the whole map
+ * 2 tree visit
+ *
+ * Since in the first case 'key' is a complete uri, while
+ * in the second case it is only the subtree rooted in the
+ * query regex, we must relay only on the localpath.
+ *
+ * example:
+ * query::= cic:/aaa/bbb/
+ *
+ * answer1 ::= the whole map
+ *
+ * aswer2 ::= [ "ccc/"; "c1.something"] where
+ * cic:/aaa/bbb/ccc/ and cic:/aaa/bbb/c1.something
+ * are the (partials) uri that matched query
+ *
+ * after the localpath extracion we have more uris in the first case,
+ * but at least the are all rooted in the same node.
+ *
+ * the Tree.get_frontier may be changed to return the same stuff as
+ * the map iteration+localpath extraction does, but I hope it is not
+ * necessary
+ *)
+ match key with
+ | uri when looking_for_dir && check_if_dir_RE uri ->
+ (* directory hit *)
+ let localpart = calculate_localpart uri in
+ if Pcre.pmatch ~rex:no_slashes_RE localpart then
+ begin
+ (* Backward compatibility code to map
+ theory:/path/t.theory into theory:/path/t/index.theory
+ when cic:/path/t/ exists *)
+ if Pcre.pmatch ~rex:theory_RE localpart then
+ candidates_found := localpart :: !candidates_found
else
+ store_obj localpart
+ end
+ else
+ begin
+ store_dir localpart ;
+ if Pcre.pmatch localpart ~rex:dir_local_RE then
begin
- store_dir localpart ;
- if String.sub uri 0 3 = "cic" then
- let dir_found = ref "" in
- let _ =
- Pcre.substitute_first ~rex:dir_RE2
- ~subst:(fun s -> dir_found := s; "") uri in
- let dir =
- "theory" ^ String.sub !dir_found 3
- (String.length !dir_found - 3) ^ ".theory" in
- if not (List.mem dir !valid_candidates) then
- valid_candidates := dir::!valid_candidates
+ let valid =
+ Pcre.replace ~rex:first_comp_RE localpart ^ ".theory"
+ in
+ if not (List.mem valid !valid_candidates) then
+ valid_candidates := valid::!valid_candidates
end
- | uri when (not looking_for_dir) && Pcre.pmatch ~rex:obj_RE uri ->
- (* file hit *)
- store_obj (Pcre.replace ~rex:til_slash_RE uri)
- | uri -> (* miss *)
- if !index_not_generated_yet &&
- Pcre.pmatch ~rex:orig_theory_RE uri
- then
- (index_not_generated_yet := false ;
- store_obj "index.theory"));
- List.iter
- (fun (uri,localpart) ->
- if not (List.mem uri !valid_candidates) then
- store_obj localpart
- ) !candidates_found ;
- let ls_items = ref [] in
- StringSet.iter (fun dir -> ls_items := Ls_section dir :: !ls_items) !dirs;
- Http_getter_misc.hashtbl_sorted_iter
- (fun uri (annflag, typesflag, bodyflag, treeflag) ->
- ls_items :=
- Ls_object {
- uri = uri; ann = annflag;
- types = typesflag; body = bodyflag; proof_tree = treeflag
- } :: !ls_items)
- objs;
- List.rev !ls_items
- end
+ end
+ | uri when (not looking_for_dir) && check_if_obj_RE uri ->
+ (* file hit *)
+ store_obj (Pcre.replace ~rex:til_slash_RE uri)
+ | uri -> ());
+(*
+ (* miss *)
+ if !index_not_generated_yet &&
+ Pcre.pmatch ~rex:orig_theory_RE uri
+ then
+ (index_not_generated_yet := false ;
+ store_obj "index.theory"));
+ *)
+ store_obj "index.theory";
+ List.iter
+ (fun localpart ->
+ if not (List.mem localpart !valid_candidates) then
+ store_obj localpart
+ ) !candidates_found ;
+ let ls_items = ref [] in
+ StringSet.iter (fun dir -> ls_items := Ls_section dir :: !ls_items) !dirs;
+ Http_getter_misc.hashtbl_sorted_iter
+ (fun uri (annflag, typesflag, bodyflag, treeflag) ->
+ ls_items :=
+ Ls_object {
+ uri = uri; ann = annflag;
+ types = typesflag; body = bodyflag; proof_tree = treeflag
+ } :: !ls_items)
+ objs;
+ List.rev !ls_items
+ end
- (* Shorthands from now on *)
+(* Shorthands from now on *)
let getxml' uri = getxml (UriManager.string_of_uri uri)
let resolve' uri = resolve (UriManager.string_of_uri uri)
let register' uri url = register ~uri:(UriManager.string_of_uri uri) ~url
+let unregister' uri = unregister (UriManager.string_of_uri uri)
+
+let sync_dump_file () =
+ if is_prefetch_on () then
+ dump_tree ()
+
let init () =
Http_getter_logger.set_log_level
(Helm_registry.get_opt_default Helm_registry.get_int 1 "getter.log_level");
Http_getter_logger.set_log_file
(Helm_registry.get_opt Helm_registry.get_string "getter.log_file");
- Http_getter_env.reload ()
+ Http_getter_env.reload ();
+ let is_prefetch_set =
+ Helm_registry.get_opt_default Helm_registry.get_bool false "getter.prefetch"
+ in
+ if is_prefetch_set then
+ ignore (Thread.create sync_with_map ())