let _ = Helm_registry.load_from "extractor.conf.xml"
-let _ = Unix.system ("mkdir -p " ^ (Helm_registry.get "tmp.dir"))
-let _ = Http_getter.init ()
-(*let _ = Http_getter.update () *)
-let dbd =
- Mysql.quick_connect
- ~host:(Helm_registry.get "db.host")
- ~user:(Helm_registry.get "db.user")
- ~database:(Helm_registry.get "db.database") ()
+let usage () =
+ prerr_endline "
-let _ =
+!! This binary should not be called by hand, use the extractor_manager. !!
+
+usage: ./extractor[.opt] path owner
+
+path: the path for the getter maps
+owner: the owner of the tables to update
+
+"
+
+let _ =
try
- MetadataTypes.ownerize_tables Sys.argv.(1)
- with Invalid_argument _ -> MetadataTypes.ownerize_tables "NEW"
+ let _ = Sys.argv.(2), Sys.argv.(1) in
+ if Sys.argv.(1) = "-h"||Sys.argv.(1) = "-help"||Sys.argv.(1) = "--help" then
+ begin
+ usage ();
+ exit 1
+ end
+ with
+ Invalid_argument _ -> usage (); exit 1
-let uri_RE = Str.regexp ".*\\(ind\\|var\\|con\\)$"
-
-let create_all () =
- let obj_tbl = MetadataTypes.obj_tbl () in
- let sort_tbl = MetadataTypes.sort_tbl () in
- let rel_tbl = MetadataTypes.rel_tbl () in
- let name_tbl = MetadataTypes.name_tbl () in
- let count_tbl = MetadataTypes.count_tbl () in
- let tbls = [
- (obj_tbl,`RefObj) ; (sort_tbl,`RefSort) ; (rel_tbl,`RefRel) ;
- (name_tbl,`ObjectName) ; (count_tbl,`Count) ]
- in
- let statements =
- (SqlStatements.create_tables tbls) @ (SqlStatements.create_indexes tbls)
- in
- List.iter (fun statement ->
- try
- ignore (Mysql.exec dbd statement)
- with
- exn ->
- let status = Mysql.status dbd in
- match status with
- | Mysql.StatusError Mysql.Table_exists_error -> ()
- | Mysql.StatusError _ -> raise exn
- | _ -> ()
- ) statements
+let owner = Sys.argv.(2)
+let path = Sys.argv.(1)
-let drop_all () =
- let obj_tbl = MetadataTypes.obj_tbl () in
- let sort_tbl = MetadataTypes.sort_tbl () in
- let rel_tbl = MetadataTypes.rel_tbl () in
- let name_tbl = MetadataTypes.name_tbl () in
- let count_tbl = MetadataTypes.count_tbl () in
- let tbls = [
- (obj_tbl,`RefObj) ; (sort_tbl,`RefSort) ; (rel_tbl,`RefRel) ;
- (name_tbl,`ObjectName) ; (count_tbl,`Count) ]
+let main () =
+ print_endline (Printf.sprintf "%d alive on path:%s owner:%s"
+ (Unix.getpid()) path owner);
+ Helm_registry.set "tmp.dir" path;
+ Http_getter.init ();
+ let dbd =
+ Mysql.quick_connect
+ ~host:(Helm_registry.get "db.host")
+ ~user:(Helm_registry.get "db.user")
+ ~database:(Helm_registry.get "db.database") ()
in
- let statements =
- (SqlStatements.drop_tables tbls) @ (SqlStatements.drop_indexes tbls)
+ MetadataTypes.ownerize_tables owner;
+ let uris =
+ let ic = open_in (path ^ "/todo") in
+ let acc = ref [] in
+ (try
+ while true do
+ let l = input_line ic in
+ acc := l :: !acc
+ done
+ with
+ End_of_file -> ());
+ close_in ic;
+ !acc
in
- List.iter (fun statement ->
- try
- ignore (Mysql.exec dbd statement)
- with Mysql.Error _ as exn ->
- match Mysql.errno dbd with
- | Mysql.Bad_table_error
- | Mysql.No_such_index | Mysql.No_such_table -> ()
- | _ -> raise exn
- ) statements
-
-
-let main () =
- drop_all ();
- create_all ();
- let uris = Http_getter.getalluris () in
- let uris = List.filter (fun u -> Str.string_match uri_RE u 0) uris in
let len = float_of_int (List.length uris) in
let i = ref 0 in
+ let magic = 45 in
List.iter (fun u ->
incr i;
let perc = ((float_of_int !i) /. len *. 100.0) in
let l = String.length u in
let short =
- if l < 60 then
- u ^ String.make (63 - l) ' '
+ if l < magic then
+ u ^ String.make (magic + 3 - l) ' '
else
- "..." ^ String.sub u (l - 60) 60
+ "..." ^ String.sub u (l - magic) magic
in
- Printf.printf "\rIndexing (%3.1f%%): %s" perc short;
+ Printf.printf "%d (%d of %.0f = %3.1f%%): %s\n"
+ (Unix.getpid ()) !i len perc short;
flush stdout;
let uri = UriManager.uri_of_string u in
MetadataDb.index_obj ~dbd ~uri)
- uris
+ uris;
+ print_string "END "; Unix.system "date"
;;
main ()
+
--- /dev/null
+(* HELPERS *)
+let create_all dbd =
+ let obj_tbl = MetadataTypes.obj_tbl () in
+ let sort_tbl = MetadataTypes.sort_tbl () in
+ let rel_tbl = MetadataTypes.rel_tbl () in
+ let name_tbl = MetadataTypes.name_tbl () in
+ let count_tbl = MetadataTypes.count_tbl () in
+ let tbls = [
+ (obj_tbl,`RefObj) ; (sort_tbl,`RefSort) ; (rel_tbl,`RefRel) ;
+ (name_tbl,`ObjectName) ; (count_tbl,`Count) ]
+ in
+ let statements =
+ (SqlStatements.create_tables tbls) @ (SqlStatements.create_indexes tbls)
+ in
+ List.iter (fun statement ->
+ try
+ ignore (Mysql.exec dbd statement)
+ with
+ exn ->
+ let status = Mysql.status dbd in
+ match status with
+ | Mysql.StatusError Mysql.Table_exists_error -> ()
+ | Mysql.StatusError _ -> raise exn
+ | _ -> ()
+ ) statements
+
+let drop_all dbd =
+ let obj_tbl = MetadataTypes.obj_tbl () in
+ let sort_tbl = MetadataTypes.sort_tbl () in
+ let rel_tbl = MetadataTypes.rel_tbl () in
+ let name_tbl = MetadataTypes.name_tbl () in
+ let count_tbl = MetadataTypes.count_tbl () in
+ let tbls = [
+ (obj_tbl,`RefObj) ; (sort_tbl,`RefSort) ; (rel_tbl,`RefRel) ;
+ (name_tbl,`ObjectName) ; (count_tbl,`Count) ]
+ in
+ let statements =
+ (SqlStatements.drop_tables tbls) @ (SqlStatements.drop_indexes tbls)
+ in
+ List.iter (fun statement ->
+ try
+ ignore (Mysql.exec dbd statement)
+ with Mysql.Error _ as exn ->
+ match Mysql.errno dbd with
+ | Mysql.Bad_table_error
+ | Mysql.No_such_index | Mysql.No_such_table -> ()
+ | _ -> raise exn
+ ) statements
+
+let slash_RE = Str.regexp "/"
+
+let partition l =
+ let l = List.fast_sort Pervasives.compare l in
+ let matches s1 s2 =
+ let l1,l2 = Str.split slash_RE s1, Str.split slash_RE s2 in
+ match l1,l2 with
+ | _::x::_,_::y::_ -> x = y
+ | _ -> false
+ in
+ let rec chunk l =
+ match l with
+ | [] -> [],[]
+ | h::(h1::tl as rest) when matches h h1 ->
+ let ch,todo = chunk rest in
+ (h::ch),todo
+ | h::(h1::tl as rest)-> [h],rest
+ | h::_ -> [h],[]
+ in
+ let rec split l =
+ let ch, todo = chunk l in
+ match todo with
+ | [] -> [ch]
+ | _ -> ch :: split todo
+ in
+ split l
+
+
+(* ARGV PARSING *)
+
+let _ =
+ try
+ if Sys.argv.(1) = "-h"||Sys.argv.(1) = "-help"||Sys.argv.(1) = "--help" then
+ begin
+ prerr_endline "
+usage: ./extractor_manager[.opt] [processes] [owner]
+
+defaults:
+ processes = 2
+ owner = NEW
+
+";
+ exit 1
+ end
+ with Invalid_argument _ -> ()
+
+let processes =
+ try
+ int_of_string (Sys.argv.(1))
+ with
+ Invalid_argument _ -> 2
+
+let owner =
+ try
+ Sys.argv.(2)
+ with Invalid_argument _ -> "NEW"
+
+let create_peons i =
+ let rec aux = function
+ | 0 -> []
+ | n -> (n,0) :: aux (n-1)
+ in
+ ref (aux i)
+
+let is_a_peon_idle peons =
+ List.exists (fun (_,x) -> x = 0) !peons
+
+let get_ide_peon peons =
+ let p = fst(List.find (fun (_,x) -> x = 0) !peons) in
+ peons := List.filter (fun (x,_) -> x <> p) !peons;
+ p
+
+let assign_peon peon pid peons =
+ peons := (peon,pid) :: !peons
+
+let wait_a_peon peons =
+ let pid,status = Unix.wait () in
+ (match status with
+ | Unix.WEXITED 0 -> ()
+ | _ -> prerr_endline (Printf.sprintf "PEON %d HAD A PROBLEM" pid));
+ let p = fst(List.find (fun (_,x) -> x = pid) !peons) in
+ peons := List.filter (fun (x,_) -> x <> p) !peons;
+ peons := (p,0) :: !peons
+
+let is_a_peon_busy peons =
+ List.exists (fun (_,x) -> x <> 0) !peons
+
+(* MAIN *)
+let main () =
+ Helm_registry.load_from "extractor.conf.xml";
+ Http_getter.init ();
+ print_endline "Updating the getter....";
+ Http_getter.update ();
+ let base = (Helm_registry.get "tmp.dir") ^ "/maps" in
+ let formats i =
+ (Helm_registry.get "tmp.dir") ^ "/"^(string_of_int i)^"/maps"
+ in
+ for i = 1 to processes do
+ let fmt = formats i in
+ ignore(Unix.system ("rm -rf " ^ fmt));
+ ignore(Unix.system ("mkdir -p " ^ fmt));
+ ignore(Unix.system ("cp -r " ^ base ^ " " ^ fmt ^ "/../"));
+ done;
+ let dbd =
+ Mysql.quick_connect
+ ~host:(Helm_registry.get "db.host")
+ ~user:(Helm_registry.get "db.user")
+ ~database:(Helm_registry.get "db.database") ()
+ in
+ MetadataTypes.ownerize_tables owner;
+ let uri_RE = Str.regexp ".*\\(ind\\|var\\|con\\)$" in
+ drop_all dbd;
+ create_all dbd;
+ let uris = Http_getter.getalluris () in
+ let uris = List.filter (fun u -> Str.string_match uri_RE u 0) uris in
+ let todo = partition uris in
+ let cur = ref 0 in
+ let tot = List.length todo in
+ let peons = create_peons processes in
+ print_string "START "; flush stdout;
+ ignore(Unix.system "date");
+ while !cur < tot do
+ if is_a_peon_idle peons then
+ let peon = get_ide_peon peons in
+ let fmt = formats peon in
+ let oc = open_out (fmt ^ "/../todo") in
+ List.iter (fun s -> output_string oc (s^"\n")) (List.nth todo !cur);
+ close_out oc;
+ let pid = Unix.fork () in
+ if pid = 0 then
+ Unix.execv
+ "./extractor.opt" [| "./extractor.opt" ; fmt ^ "/../" ; owner|]
+ else
+ begin
+ assign_peon peon pid peons;
+ incr cur
+ end
+ else
+ wait_a_peon peons
+ done;
+ while is_a_peon_busy peons do wait_a_peon peons done;
+ print_string "END "; flush stdout;
+ ignore(Unix.system "date")
+;;
+
+main ()