From 1d15266bc05ac3524d721b66933db9a49e4c24aa Mon Sep 17 00:00:00 2001 From: Stefano Zacchiroli Date: Wed, 27 Apr 2005 13:42:31 +0000 Subject: [PATCH] getter with in memory tree of URIs --- helm/ocaml/getter/.depend | 20 +- helm/ocaml/getter/Makefile | 2 + helm/ocaml/getter/http_getter.ml | 367 +++++++++++++++++--------- helm/ocaml/getter/http_getter.mli | 9 +- helm/ocaml/getter/http_getter_env.ml | 8 +- helm/ocaml/getter/http_getter_env.mli | 3 + helm/ocaml/getter/http_getter_md5.ml | 42 +++ helm/ocaml/getter/http_getter_md5.mli | 2 + helm/ocaml/getter/tree.ml | 76 ++++++ helm/ocaml/getter/tree.mli | 12 + 10 files changed, 402 insertions(+), 139 deletions(-) create mode 100644 helm/ocaml/getter/http_getter_md5.ml create mode 100644 helm/ocaml/getter/http_getter_md5.mli create mode 100644 helm/ocaml/getter/tree.ml create mode 100644 helm/ocaml/getter/tree.mli diff --git a/helm/ocaml/getter/.depend b/helm/ocaml/getter/.depend index 9032552a4..ecf65ea6d 100644 --- a/helm/ocaml/getter/.depend +++ b/helm/ocaml/getter/.depend @@ -2,6 +2,8 @@ http_getter_env.cmi: http_getter_types.cmo http_getter_common.cmi: http_getter_types.cmo http_getter_cache.cmi: http_getter_types.cmo http_getter.cmi: http_getter_types.cmo +tree.cmo: tree.cmi +tree.cmx: tree.cmi clientHTTP.cmo: clientHTTP.cmi clientHTTP.cmx: clientHTTP.cmi http_getter_logger.cmo: http_getter_logger.cmi @@ -14,6 +16,8 @@ http_getter_env.cmo: http_getter_types.cmo http_getter_misc.cmi \ http_getter_logger.cmi http_getter_const.cmi http_getter_env.cmi http_getter_env.cmx: http_getter_types.cmx http_getter_misc.cmx \ http_getter_logger.cmx http_getter_const.cmx http_getter_env.cmi +http_getter_md5.cmo: http_getter_env.cmi http_getter_md5.cmi +http_getter_md5.cmx: http_getter_env.cmx http_getter_md5.cmi http_getter_common.cmo: http_getter_types.cmo http_getter_misc.cmi \ http_getter_env.cmi http_getter_common.cmi http_getter_common.cmx: http_getter_types.cmx http_getter_misc.cmx \ @@ -28,11 +32,11 @@ http_getter_cache.cmo: http_getter_types.cmo http_getter_misc.cmi \ http_getter_cache.cmx: http_getter_types.cmx http_getter_misc.cmx \ http_getter_logger.cmx http_getter_env.cmx http_getter_common.cmx \ http_getter_cache.cmi -http_getter.cmo: http_getter_types.cmo http_getter_misc.cmi \ - http_getter_map.cmi http_getter_logger.cmi http_getter_env.cmi \ - http_getter_const.cmi http_getter_common.cmi http_getter_cache.cmi \ - clientHTTP.cmi http_getter.cmi -http_getter.cmx: http_getter_types.cmx http_getter_misc.cmx \ - http_getter_map.cmx http_getter_logger.cmx http_getter_env.cmx \ - http_getter_const.cmx http_getter_common.cmx http_getter_cache.cmx \ - clientHTTP.cmx http_getter.cmi +http_getter.cmo: tree.cmi http_getter_types.cmo http_getter_misc.cmi \ + http_getter_md5.cmi http_getter_map.cmi http_getter_logger.cmi \ + http_getter_env.cmi http_getter_const.cmi http_getter_common.cmi \ + http_getter_cache.cmi clientHTTP.cmi http_getter.cmi +http_getter.cmx: tree.cmx http_getter_types.cmx http_getter_misc.cmx \ + http_getter_md5.cmx http_getter_map.cmx http_getter_logger.cmx \ + http_getter_env.cmx http_getter_const.cmx http_getter_common.cmx \ + http_getter_cache.cmx clientHTTP.cmx http_getter.cmi diff --git a/helm/ocaml/getter/Makefile b/helm/ocaml/getter/Makefile index 99529c2d4..6827e2bb4 100644 --- a/helm/ocaml/getter/Makefile +++ b/helm/ocaml/getter/Makefile @@ -6,11 +6,13 @@ REQUIRES = \ helm-pxp helm-thread helm-logger helm-urimanager helm-registry INTERFACE_FILES = \ + tree.mli \ clientHTTP.mli \ http_getter_logger.mli \ http_getter_misc.mli \ http_getter_const.mli \ http_getter_env.mli \ + http_getter_md5.mli \ http_getter_common.mli \ http_getter_map.mli \ http_getter_cache.mli \ diff --git a/helm/ocaml/getter/http_getter.ml b/helm/ocaml/getter/http_getter.ml index f7ca2388c..4f80006c6 100644 --- a/helm/ocaml/getter/http_getter.ml +++ b/helm/ocaml/getter/http_getter.ml @@ -67,10 +67,51 @@ let rdf_map = let xsl_map = lazy (new Http_getter_map.map (Lazy.force Http_getter_env.xsl_dbm)) +let uri_tree = ref None +let deref_if_some r = + match !r with + | None -> assert false + | Some x -> x +let is_prefetch_on () = + match !uri_tree with None -> false | Some _ -> true + +let dump_tree () = + let path = Lazy.force Http_getter_env.dump_file in + Tree.save_to_disk path (deref_if_some uri_tree); + Http_getter_md5.create_hash [ + (Lazy.force Http_getter_env.cic_dbm_real); + path ] + +let load_tree () = + if not (Http_getter_md5.check_hash ()) then + assert false + else + uri_tree := Some (Tree.load_from_disk + (Lazy.force Http_getter_env.dump_file)) + +let sync_with_map () = + if not (Http_getter_md5.check_hash ()) then begin + let tree = ref (Some Tree.empty_tree) in + Http_getter_logger.log "Updating cic map dump..."; + let t = Unix.time () in + (Lazy.force cic_map)#iter + (fun k _ -> + tree := Some (Tree.add_uri k (deref_if_some tree))); + uri_tree := !tree; + Http_getter_logger.log + (sprintf "done in %.0f sec" (Unix.time () -. t)); + dump_tree () + end else begin + Http_getter_logger.log "Cic map dump is up to date!"; + load_tree () (* XXX TASSI: race condition here *) + end + let maps = [ cic_map; nuprl_map; rdf_map; xsl_map ] let close_maps () = List.iter (fun m -> (Lazy.force m) # close) maps let clear_maps () = List.iter (fun m -> (Lazy.force m) # clear) maps -let sync_maps () = List.iter (fun m -> (Lazy.force m) # sync) maps +let sync_maps () = + List.iter (fun m -> (Lazy.force m) # sync) maps; + sync_with_map () let map_of_uri = function | uri when is_cic_uri uri -> Lazy.force cic_map @@ -270,14 +311,22 @@ let register ~uri ~url = if remote () then register_remote ~uri ~url else - (map_of_uri uri)#add uri url + begin + (map_of_uri uri)#add uri url; + if is_prefetch_on () then + uri_tree := Some (Tree.add_uri uri (deref_if_some uri_tree)) + end let unregister uri = if remote () then unregister_remote uri else try - (map_of_uri uri)#remove uri + begin + (map_of_uri uri)#remove uri; + if is_prefetch_on () then + uri_tree := Some (Tree.remove_uri uri (deref_if_some uri_tree)) + end with Key_not_found _ -> () let update ?(logger = fun _ -> ()) () = @@ -392,144 +441,208 @@ let getallrdfuris classs = in return_uris (Lazy.force rdf_map) filter -let ls = - let (++) (oldann, oldtypes, oldbody, oldtree) - (newann, newtypes, newbody, newtree) = - ((if newann > oldann then newann else oldann), - (if newtypes > oldtypes then newtypes else oldtypes), - (if newbody > oldbody then newbody else oldbody), - (if newtree > oldtree then newtree else oldtree)) - in - let basepart_RE = - Pcre.regexp - "^([^.]*\\.[^.]*)((\\.body)|(\\.proof_tree)|(\\.types))?(\\.ann)?$" - in - let (types_RE, types_ann_RE, body_RE, body_ann_RE, - proof_tree_RE, proof_tree_ann_RE, trailing_slash_RE, theory_RE) = - (Pcre.regexp "\\.types$", Pcre.regexp "\\.types\\.ann$", - Pcre.regexp "\\.body$", Pcre.regexp "\\.body\\.ann$", - Pcre.regexp "\\.proof_tree$", Pcre.regexp "\\.proof_tree\\.ann$", - Pcre.regexp "/$", Pcre.regexp "\\.theory$") - in - let (slash_RE, til_slash_RE, no_slashes_RE) = - (Pcre.regexp "/", Pcre.regexp "^.*/", Pcre.regexp "^[^/]*$") - in - fun regexp -> - if remote () then - ls_remote regexp - else begin - let looking_for_dir = Pcre.pmatch ~rex:trailing_slash_RE regexp in - let pat = Pcre.replace ~rex:trailing_slash_RE ("^" ^ regexp) in - let (dir_RE, dir_RE2, obj_RE, orig_theory_RE) = - Pcre.regexp (pat ^ "/"), Pcre.regexp (pat ^ "/[^/]*"), - Pcre.regexp (pat ^ "(\\.|$)"), Pcre.regexp (pat ^ ".theory$") - in - let dirs = ref StringSet.empty in - let objs = Hashtbl.create 17 in - let store_dir d = - dirs := StringSet.add (List.hd (Pcre.split ~rex:slash_RE d)) !dirs - in - let store_obj o = - let basepart = Pcre.replace ~rex:basepart_RE ~templ:"$1" o in - let no_flags = false, No, No, No in - let oldflags = - try - Hashtbl.find objs basepart - with Not_found -> (* no ann, no types, no body, no proof tree *) - no_flags - in - let newflags = - match o with - | s when Pcre.pmatch ~rex:types_RE s -> (false, Yes, No, No) - | s when Pcre.pmatch ~rex:types_ann_RE s -> (true, Ann, No, No) - | s when Pcre.pmatch ~rex:body_RE s -> (false, No, Yes, No) - | s when Pcre.pmatch ~rex:body_ann_RE s -> (true, No, Ann, No) - | s when Pcre.pmatch ~rex:proof_tree_RE s -> (false, No, No, Yes) - | s when Pcre.pmatch ~rex:proof_tree_ann_RE s -> (true, No, No, Ann) - | s -> no_flags - in - Hashtbl.replace objs basepart (oldflags ++ newflags) + +let (++) (oldann, oldtypes, oldbody, oldtree) + (newann, newtypes, newbody, newtree) = + ((if newann > oldann then newann else oldann), + (if newtypes > oldtypes then newtypes else oldtypes), + (if newbody > oldbody then newbody else oldbody), + (if newtree > oldtree then newtree else oldtree)) + +let (types_RE, types_ann_RE, body_RE, body_ann_RE, + proof_tree_RE, proof_tree_ann_RE, trailing_slash_RE, theory_RE) = + (Pcre.regexp "\\.types$", Pcre.regexp "\\.types\\.ann$", + Pcre.regexp "\\.body$", Pcre.regexp "\\.body\\.ann$", + Pcre.regexp "\\.proof_tree$", Pcre.regexp "\\.proof_tree\\.ann$", + Pcre.regexp "/$", Pcre.regexp "\\.theory$") + +let basepart_RE = + Pcre.regexp + "^([^.]*\\.[^.]*)((\\.body)|(\\.proof_tree)|(\\.types))?(\\.ann)?$" + +let (slash_RE, til_slash_RE, no_slashes_RE) = + (Pcre.regexp "/", Pcre.regexp "^.*/", Pcre.regexp "^[^/]*$") + +let ls regexp = + if remote () then + ls_remote regexp + else begin + let looking_for_dir = Pcre.pmatch ~rex:trailing_slash_RE regexp in + let pat = Pcre.replace ~rex:trailing_slash_RE ("^" ^ regexp) in + let (dir_RE, dir_local_RE, obj_RE, first_comp_RE) = + Pcre.regexp (pat ^ "/"), Pcre.regexp "[^/]+/[^/]*", + Pcre.regexp (pat ^ "(\\.|$)"), Pcre.regexp "/.*" + in + let toplevel_theory = + match List.rev (Pcre.split ~rex:slash_RE pat) with + | dir :: _ -> Some (dir ^ ".theory") + | _ -> None + in + let dirs = ref StringSet.empty in + let objs = Hashtbl.create 17 in + let store_dir d = + dirs := StringSet.add (List.hd (Pcre.split ~rex:slash_RE d)) !dirs + in + let store_obj o = + let basepart = Pcre.replace ~rex:basepart_RE ~templ:"$1" o in + let no_flags = false, No, No, No in + let oldflags = + try + Hashtbl.find objs basepart + with Not_found -> (* no ann, no types, no body, no proof tree *) + no_flags in - (* Variables used in backward compatibility code to map - theory:/path/t.theory into theory:/path/t/index.theory - when cic:/path/t/ exists *) - let the_candidate_for_remapping = - (* CSC: Here I am making a strong assumption: the pattern - can be only of the form [^:]*:/path where path is - NOT a regular expression *) - "theory:" ^ Pcre.replace ~rex:(Pcre.regexp "[^:]*:") pat + let newflags = + match o with + | s when Pcre.pmatch ~rex:types_RE s -> (false, Yes, No, No) + | s when Pcre.pmatch ~rex:types_ann_RE s -> (true, Ann, No, No) + | s when Pcre.pmatch ~rex:body_RE s -> (false, No, Yes, No) + | s when Pcre.pmatch ~rex:body_ann_RE s -> (true, No, Ann, No) + | s when Pcre.pmatch ~rex:proof_tree_RE s -> (false, No, No, Yes) + | s when Pcre.pmatch ~rex:proof_tree_ann_RE s -> (true, No, No, Ann) + | s -> no_flags in - let index_not_generated_yet = ref true in - let valid_candidates = ref [] in - let candidates_found = ref [] in - (Lazy.force cic_map) # iter - (* BLEARGH Dbm module lacks support for fold-like functions *) - (fun key _ -> - match key with - | uri when looking_for_dir && Pcre.pmatch ~rex:dir_RE uri -> - (* directory hit *) - let localpart = Pcre.replace ~rex:dir_RE uri in - if Pcre.pmatch ~rex:no_slashes_RE localpart then - begin - (* Backward compatibility code to map - theory:/path/t.theory into theory:/path/t/index.theory - when cic:/path/t/ exists *) - if Pcre.pmatch ~rex:theory_RE localpart then - candidates_found := (uri,localpart) :: !candidates_found - else - store_obj localpart + Hashtbl.replace objs basepart (oldflags ++ newflags) + in + (* Variables used in backward compatibility code to map + theory:/path/t.theory into theory:/path/t/index.theory + when cic:/path/t/ exists *) + let the_candidate_for_remapping = + (* CSC: Here I am making a strong assumption: the pattern + can be only of the form [^:]*:/path where path is + NOT a regular expression *) + "theory:" ^ Pcre.replace ~rex:(Pcre.regexp "[^:]*:") pat + in + let index_not_generated_yet = ref true in + let valid_candidates = ref [] in + let candidates_found = ref [] in + + (*(Lazy.force cic_map) # iter*) - end + (* depending on prefetch *) + let if_prefetch if_is if_not = + if is_prefetch_on() then if_is else if_not + in + + let iter_on_right_thing = if_prefetch + (fun f -> List.iter (fun k -> f k "") + (Tree.ls_path regexp (deref_if_some uri_tree))) + (fun f -> (Lazy.force cic_map) # iter f) + in + let calculate_localpart = if_prefetch + (fun uri -> uri) + (fun uri -> Pcre.replace ~rex:dir_RE uri) + in + let check_if_x_RE = if_prefetch + (fun x_RE uri -> true) + (fun x_RE uri -> Pcre.pmatch ~rex:x_RE uri) + in + let check_if_dir_RE = check_if_x_RE dir_RE in + let check_if_obj_RE = check_if_x_RE obj_RE in + + iter_on_right_thing + (fun key _ -> + (* we work in two ways: + * 1 iteration on the whole map + * 2 tree visit + * + * Since in the first case 'key' is a complete uri, while + * in the second case it is only the subtree rooted in the + * query regex, we must relay only on the localpath. + * + * example: + * query::= cic:/aaa/bbb/ + * + * answer1 ::= the whole map + * + * aswer2 ::= [ "ccc/"; "c1.something"] where + * cic:/aaa/bbb/ccc/ and cic:/aaa/bbb/c1.something + * are the (partials) uri that matched query + * + * after the localpath extracion we have more uris in the first case, + * but at least the are all rooted in the same node. + * + * the Tree.get_frontier may be changed to return the same stuff as + * the map iteration+localpath extraction does, but I hope it is not + * necessary + *) + match key with + | uri when looking_for_dir && check_if_dir_RE uri -> + (* directory hit *) + let localpart = calculate_localpart uri in + if Pcre.pmatch ~rex:no_slashes_RE localpart then + begin + (* Backward compatibility code to map + theory:/path/t.theory into theory:/path/t/index.theory + when cic:/path/t/ exists *) + if Pcre.pmatch ~rex:theory_RE localpart then + candidates_found := localpart :: !candidates_found else + store_obj localpart + end + else + begin + store_dir localpart ; + if Pcre.pmatch localpart ~rex:dir_local_RE then begin - store_dir localpart ; - if String.sub uri 0 3 = "cic" then - let dir_found = ref "" in - let _ = - Pcre.substitute_first ~rex:dir_RE2 - ~subst:(fun s -> dir_found := s; "") uri in - let dir = - "theory" ^ String.sub !dir_found 3 - (String.length !dir_found - 3) ^ ".theory" in - if not (List.mem dir !valid_candidates) then - valid_candidates := dir::!valid_candidates + let valid = + Pcre.replace ~rex:first_comp_RE localpart ^ ".theory" + in + if not (List.mem valid !valid_candidates) then + valid_candidates := valid::!valid_candidates end - | uri when (not looking_for_dir) && Pcre.pmatch ~rex:obj_RE uri -> - (* file hit *) - store_obj (Pcre.replace ~rex:til_slash_RE uri) - | uri -> (* miss *) - if !index_not_generated_yet && - Pcre.pmatch ~rex:orig_theory_RE uri - then - (index_not_generated_yet := false ; - store_obj "index.theory")); - List.iter - (fun (uri,localpart) -> - if not (List.mem uri !valid_candidates) then - store_obj localpart - ) !candidates_found ; - let ls_items = ref [] in - StringSet.iter (fun dir -> ls_items := Ls_section dir :: !ls_items) !dirs; - Http_getter_misc.hashtbl_sorted_iter - (fun uri (annflag, typesflag, bodyflag, treeflag) -> - ls_items := - Ls_object { - uri = uri; ann = annflag; - types = typesflag; body = bodyflag; proof_tree = treeflag - } :: !ls_items) - objs; - List.rev !ls_items - end + end + | uri when (not looking_for_dir) && check_if_obj_RE uri -> + (* file hit *) + store_obj (Pcre.replace ~rex:til_slash_RE uri) + | uri -> ()); +(* + (* miss *) + if !index_not_generated_yet && + Pcre.pmatch ~rex:orig_theory_RE uri + then + (index_not_generated_yet := false ; + store_obj "index.theory")); + *) + store_obj "index.theory"; + List.iter + (fun localpart -> + if not (List.mem localpart !valid_candidates) then + store_obj localpart + ) !candidates_found ; + let ls_items = ref [] in + StringSet.iter (fun dir -> ls_items := Ls_section dir :: !ls_items) !dirs; + Http_getter_misc.hashtbl_sorted_iter + (fun uri (annflag, typesflag, bodyflag, treeflag) -> + ls_items := + Ls_object { + uri = uri; ann = annflag; + types = typesflag; body = bodyflag; proof_tree = treeflag + } :: !ls_items) + objs; + List.rev !ls_items + end - (* Shorthands from now on *) +(* Shorthands from now on *) let getxml' uri = getxml (UriManager.string_of_uri uri) let resolve' uri = resolve (UriManager.string_of_uri uri) let register' uri url = register ~uri:(UriManager.string_of_uri uri) ~url +let unregister' uri = unregister (UriManager.string_of_uri uri) + +let sync_dump_file () = + if is_prefetch_on () then + dump_tree () + let init () = Http_getter_logger.set_log_level (Helm_registry.get_opt_default Helm_registry.get_int 1 "getter.log_level"); Http_getter_logger.set_log_file (Helm_registry.get_opt Helm_registry.get_string "getter.log_file"); - Http_getter_env.reload () + Http_getter_env.reload (); + let is_prefetch_set = + Helm_registry.get_opt_default Helm_registry.get_bool false "getter.prefetch" + in + if is_prefetch_set then + ignore (Thread.create sync_with_map ()) diff --git a/helm/ocaml/getter/http_getter.mli b/helm/ocaml/getter/http_getter.mli index c7a2c5c2a..b58c3dcf4 100644 --- a/helm/ocaml/getter/http_getter.mli +++ b/helm/ocaml/getter/http_getter.mli @@ -64,9 +64,10 @@ val ls: string -> ls_item list (** {2 Shorthands} *) -val getxml' : UriManager.uri -> string -val resolve' : UriManager.uri -> string -val register' : UriManager.uri -> string -> unit +val getxml' : UriManager.uri -> string +val resolve' : UriManager.uri -> string +val register' : UriManager.uri -> string -> unit +val unregister' : UriManager.uri -> unit (** {2 Misc} *) @@ -75,3 +76,5 @@ val update_from_one_server: ?logger:logger_callback -> string -> unit val has_server: int -> bool (* does a server with a given position exists? *) val init: unit -> unit + (** cal this at exit() *) +val sync_dump_file: unit -> unit diff --git a/helm/ocaml/getter/http_getter_env.ml b/helm/ocaml/getter/http_getter_env.ml index a0da4b676..575207057 100644 --- a/helm/ocaml/getter/http_getter_env.ml +++ b/helm/ocaml/getter/http_getter_env.ml @@ -38,9 +38,13 @@ let version = Http_getter_const.version let servers_file = lazy ( Helm_registry.get_opt Helm_registry.get "getter.servers_file") let cic_dbm = lazy (Helm_registry.get "getter.maps_dir" ^ "/cic_db") +let cic_dbm_real = lazy (Helm_registry.get "getter.maps_dir" ^ "/cic_db.pag") let nuprl_dbm = lazy (Helm_registry.get "getter.maps_dir" ^ "/nuprl_db") let rdf_dbm = lazy (Helm_registry.get "getter.maps_dir" ^ "/rdf_db") let xsl_dbm = lazy (Helm_registry.get "getter.maps_dir" ^ "/xsl_db") +let dump_file = lazy (Helm_registry.get "getter.maps_dir" ^ + "/cic_db_tree.dump") +let prefetch = lazy (Helm_registry.get_bool "getter.prefetch") let xml_index = lazy ( Helm_registry.get_opt_default Helm_registry.get "index.txt" "getter.xml_indexname") @@ -139,6 +143,8 @@ cic_dir:\t%s nuprl_dir:\t%s rdf_dir:\t%s dtd_dir:\t%s +dump_file:\t%s +prefetch:\t%b servers_file:\t%s host:\t\t%s port:\t\t%d @@ -154,7 +160,7 @@ log_level:\t%d (Lazy.force xsl_dbm) (Lazy.force xml_index) (Lazy.force rdf_index) (Lazy.force xsl_index) (Lazy.force cic_dir) (Lazy.force nuprl_dir) (Lazy.force rdf_dir) - (Lazy.force dtd_dir) + (Lazy.force dtd_dir) (Lazy.force dump_file) (Lazy.force prefetch) (match Lazy.force servers_file with | None -> "no servers file" | Some servers_file -> servers_file) diff --git a/helm/ocaml/getter/http_getter_env.mli b/helm/ocaml/getter/http_getter_env.mli index 5e5a0496c..7d2d1632e 100644 --- a/helm/ocaml/getter/http_getter_env.mli +++ b/helm/ocaml/getter/http_getter_env.mli @@ -35,9 +35,12 @@ val version : string (* getter version *) (* {2 environment gathered data} *) val cic_dbm : string lazy_t (* XML map DBM file for CIC *) +val cic_dbm_real : string lazy_t (* XML map DBM file for CIC ^ ".pag"*) val nuprl_dbm : string lazy_t (* XML map DBM file for NuPRL *) val rdf_dbm : string lazy_t (* RDF map DBM file *) val xsl_dbm : string lazy_t (* XSL map DBM file *) +val dump_file : string lazy_t (* CIC DBM tree dump file *) +val prefetch : bool lazy_t (* prefetch URIs tree? *) val xml_index : string lazy_t (* XMLs' index *) val rdf_index : string lazy_t (* RDFs' index *) val xsl_index : string lazy_t (* XSLTs' index *) diff --git a/helm/ocaml/getter/http_getter_md5.ml b/helm/ocaml/getter/http_getter_md5.ml new file mode 100644 index 000000000..ea9116fde --- /dev/null +++ b/helm/ocaml/getter/http_getter_md5.ml @@ -0,0 +1,42 @@ +(* md5 helpers *) +let md5_create files reg = + let opts = "md5sum " ^ String.concat " " files ^ " > " ^ reg in + let rc = Unix.system opts in + match rc with + | Unix.WEXITED 0 -> () + | Unix.WEXITED i -> raise (Failure "Unable to create the md5sums file") + | _ -> assert false + +let md5_check reg = + let opts = "md5sum -c " ^ reg in + let rc = Unix.system opts in + match rc with + | Unix.WEXITED 0 -> true + | Unix.WEXITED _ -> false + | _ -> assert false + +let reg () = (Lazy.force Http_getter_env.dump_file) ^ ".md5" + +(* sync checks *) + +(* maybe should be useda as a fallback *) +(* +let is_in_sync_date () = + let get_last_mod_date f = + try + (Unix.stat f).Unix.st_mtime + with + | Unix.Unix_error (Unix.ENOENT, _, _)-> 0.0 + in + let map_date = get_last_mod_date (Lazy.force Http_getter_env.cic_dbm_real) in + let dump_date = get_last_mod_date (Lazy.force Http_getter_env.dump_file) in + dump_date < map_date +*) + +let check_hash () = + md5_check (reg ()) + +let create_hash files = + md5_create files (reg ()) + + diff --git a/helm/ocaml/getter/http_getter_md5.mli b/helm/ocaml/getter/http_getter_md5.mli new file mode 100644 index 000000000..c787843d4 --- /dev/null +++ b/helm/ocaml/getter/http_getter_md5.mli @@ -0,0 +1,2 @@ +val check_hash: unit -> bool +val create_hash: string list -> unit diff --git a/helm/ocaml/getter/tree.ml b/helm/ocaml/getter/tree.ml new file mode 100644 index 000000000..6b2fc6fa6 --- /dev/null +++ b/helm/ocaml/getter/tree.ml @@ -0,0 +1,76 @@ +(* to avoid the need of -rectypes *) +type tree = Foo of (string * tree) list + +let rec add_path t l = + match l with + | [] -> t (* no more path to add *) + | name::tl -> add_path_aux t name tl + +and add_path_aux t name tl = + match t with + | Foo [] -> Foo [(name, add_path (Foo []) tl)] + | Foo ((n, t')::bro) when n = name -> Foo ((n, (add_path t' tl))::bro) + | Foo (((n, t') as x)::bro) -> + let tmp = add_path_aux (Foo bro) name tl in + match tmp with Foo y -> Foo (x::y) + +let rec get_frontier t l = + match l with + | [] -> (match t with Foo bla -> + List.map (function (x,Foo []) -> x | (x,_) -> (x^"/")) bla) + | name::tl -> get_frontier_aux t name tl + +and get_frontier_aux (t:tree) name tl = + match t with + | Foo [] -> [] + | Foo ((n, t')::bro) when Pcre.pmatch ~pat:("^" ^ name ^ "$") n -> + (* since regex are no more "unique" matches, we have to continue + * searching on the brothers. + *) + get_frontier t' tl @ get_frontier_aux (Foo bro) name tl + | Foo (_::bro) -> get_frontier_aux (Foo bro) name tl + +let rec remove_path t path = + match path with + | [] -> t + | name::tl -> remove_path_aux t name tl + +and remove_path_aux t name tl = + match t with + | Foo [] -> assert false + | Foo ((n, t')::bro) when n = name -> + let tmp = remove_path t' tl in + (match tmp with + | Foo [] -> Foo bro + | Foo x -> Foo ((n, Foo x)::bro)) + | Foo (((n, t') as x)::bro) -> + let tmp = remove_path_aux (Foo bro) name tl in + match tmp with Foo y -> Foo (x::y) + +let split_RE = Pcre.regexp "/" + +let empty_tree = Foo [] + +let add_uri suri t = + let s = (Pcre.split ~rex:split_RE suri) in + add_path t s + +let ls_path path t = + let s = (Pcre.split ~rex:split_RE path) in + get_frontier t s + +let remove_uri suri t = + let s = (Pcre.split ~rex:split_RE suri) in + remove_path t s + +let save_to_disk path t = + let o = open_out path in + Marshal.to_channel o t []; + close_out o + +let load_from_disk path = + let i = open_in path in + let t = Marshal.from_channel i in + close_in i; + t + diff --git a/helm/ocaml/getter/tree.mli b/helm/ocaml/getter/tree.mli new file mode 100644 index 000000000..76118a142 --- /dev/null +++ b/helm/ocaml/getter/tree.mli @@ -0,0 +1,12 @@ +type tree + +val empty_tree: tree + +val add_uri: string -> tree -> tree +val remove_uri: string -> tree -> tree + +val ls_path: string -> tree -> string list + +val save_to_disk: string -> tree -> unit +val load_from_disk: string -> tree + -- 2.39.2