From d90d1c964b365ece207f27d41f26e87a18719d59 Mon Sep 17 00:00:00 2001 From: Enrico Tassi Date: Wed, 4 May 2005 13:25:58 +0000 Subject: [PATCH] added extractor_manager --- helm/ocaml/metadata/extractor/Makefile | 20 +- helm/ocaml/metadata/extractor/extractor.ml | 123 +++++------ .../metadata/extractor/extractor_manager.ml | 195 ++++++++++++++++++ 3 files changed, 266 insertions(+), 72 deletions(-) create mode 100644 helm/ocaml/metadata/extractor/extractor_manager.ml diff --git a/helm/ocaml/metadata/extractor/Makefile b/helm/ocaml/metadata/extractor/Makefile index 1275aaab8..0eccebee4 100644 --- a/helm/ocaml/metadata/extractor/Makefile +++ b/helm/ocaml/metadata/extractor/Makefile @@ -4,10 +4,10 @@ INTERFACE_FILES = IMPLEMENTATION_FILES = $(INTERFACE_FILES:%.mli=%.ml) EXTRA_OBJECTS_TO_INSTALL = EXTRA_OBJECTS_TO_CLEAN = \ - extractor extractor.opt + extractor extractor.opt *.err *.out extractor_manager extractor_manager.opt -all: extractor -opt: extractor.opt +all: extractor extractor_manager +opt: extractor.opt extractor_manager.opt extractor: extractor.ml $(OCAMLFIND) ocamlc \ @@ -17,5 +17,19 @@ extractor.opt: extractor.ml $(OCAMLFIND) ocamlopt \ -thread -package mysql,helm-metadata -linkpkg -o $@ $< +extractor_manager: extractor_manager.ml + $(OCAMLFIND) ocamlc \ + -thread -package mysql,helm-metadata -linkpkg -o $@ $< + +extractor_manager.opt: extractor_manager.ml + $(OCAMLFIND) ocamlopt \ + -thread -package mysql,helm-metadata -linkpkg -o $@ $< + +export: extractor.opt extractor_manager.opt + nice -n 20 \ + time \ + ./extractor_manager.opt 1>export.out 2>export.err + + include ../../Makefile.common include .depend diff --git a/helm/ocaml/metadata/extractor/extractor.ml b/helm/ocaml/metadata/extractor/extractor.ml index f3c39d382..c1ade99ea 100644 --- a/helm/ocaml/metadata/extractor/extractor.ml +++ b/helm/ocaml/metadata/extractor/extractor.ml @@ -1,92 +1,77 @@ let _ = Helm_registry.load_from "extractor.conf.xml" -let _ = Unix.system ("mkdir -p " ^ (Helm_registry.get "tmp.dir")) -let _ = Http_getter.init () -(*let _ = Http_getter.update () *) -let dbd = - Mysql.quick_connect - ~host:(Helm_registry.get "db.host") - ~user:(Helm_registry.get "db.user") - ~database:(Helm_registry.get "db.database") () +let usage () = + prerr_endline " -let _ = +!! This binary should not be called by hand, use the extractor_manager. !! + +usage: ./extractor[.opt] path owner + +path: the path for the getter maps +owner: the owner of the tables to update + +" + +let _ = try - MetadataTypes.ownerize_tables Sys.argv.(1) - with Invalid_argument _ -> MetadataTypes.ownerize_tables "NEW" + let _ = Sys.argv.(2), Sys.argv.(1) in + if Sys.argv.(1) = "-h"||Sys.argv.(1) = "-help"||Sys.argv.(1) = "--help" then + begin + usage (); + exit 1 + end + with + Invalid_argument _ -> usage (); exit 1 -let uri_RE = Str.regexp ".*\\(ind\\|var\\|con\\)$" - -let create_all () = - let obj_tbl = MetadataTypes.obj_tbl () in - let sort_tbl = MetadataTypes.sort_tbl () in - let rel_tbl = MetadataTypes.rel_tbl () in - let name_tbl = MetadataTypes.name_tbl () in - let count_tbl = MetadataTypes.count_tbl () in - let tbls = [ - (obj_tbl,`RefObj) ; (sort_tbl,`RefSort) ; (rel_tbl,`RefRel) ; - (name_tbl,`ObjectName) ; (count_tbl,`Count) ] - in - let statements = - (SqlStatements.create_tables tbls) @ (SqlStatements.create_indexes tbls) - in - List.iter (fun statement -> - try - ignore (Mysql.exec dbd statement) - with - exn -> - let status = Mysql.status dbd in - match status with - | Mysql.StatusError Mysql.Table_exists_error -> () - | Mysql.StatusError _ -> raise exn - | _ -> () - ) statements +let owner = Sys.argv.(2) +let path = Sys.argv.(1) -let drop_all () = - let obj_tbl = MetadataTypes.obj_tbl () in - let sort_tbl = MetadataTypes.sort_tbl () in - let rel_tbl = MetadataTypes.rel_tbl () in - let name_tbl = MetadataTypes.name_tbl () in - let count_tbl = MetadataTypes.count_tbl () in - let tbls = [ - (obj_tbl,`RefObj) ; (sort_tbl,`RefSort) ; (rel_tbl,`RefRel) ; - (name_tbl,`ObjectName) ; (count_tbl,`Count) ] +let main () = + print_endline (Printf.sprintf "%d alive on path:%s owner:%s" + (Unix.getpid()) path owner); + Helm_registry.set "tmp.dir" path; + Http_getter.init (); + let dbd = + Mysql.quick_connect + ~host:(Helm_registry.get "db.host") + ~user:(Helm_registry.get "db.user") + ~database:(Helm_registry.get "db.database") () in - let statements = - (SqlStatements.drop_tables tbls) @ (SqlStatements.drop_indexes tbls) + MetadataTypes.ownerize_tables owner; + let uris = + let ic = open_in (path ^ "/todo") in + let acc = ref [] in + (try + while true do + let l = input_line ic in + acc := l :: !acc + done + with + End_of_file -> ()); + close_in ic; + !acc in - List.iter (fun statement -> - try - ignore (Mysql.exec dbd statement) - with Mysql.Error _ as exn -> - match Mysql.errno dbd with - | Mysql.Bad_table_error - | Mysql.No_such_index | Mysql.No_such_table -> () - | _ -> raise exn - ) statements - - -let main () = - drop_all (); - create_all (); - let uris = Http_getter.getalluris () in - let uris = List.filter (fun u -> Str.string_match uri_RE u 0) uris in let len = float_of_int (List.length uris) in let i = ref 0 in + let magic = 45 in List.iter (fun u -> incr i; let perc = ((float_of_int !i) /. len *. 100.0) in let l = String.length u in let short = - if l < 60 then - u ^ String.make (63 - l) ' ' + if l < magic then + u ^ String.make (magic + 3 - l) ' ' else - "..." ^ String.sub u (l - 60) 60 + "..." ^ String.sub u (l - magic) magic in - Printf.printf "\rIndexing (%3.1f%%): %s" perc short; + Printf.printf "%d (%d of %.0f = %3.1f%%): %s\n" + (Unix.getpid ()) !i len perc short; flush stdout; let uri = UriManager.uri_of_string u in MetadataDb.index_obj ~dbd ~uri) - uris + uris; + print_string "END "; Unix.system "date" ;; main () + diff --git a/helm/ocaml/metadata/extractor/extractor_manager.ml b/helm/ocaml/metadata/extractor/extractor_manager.ml new file mode 100644 index 000000000..493868d54 --- /dev/null +++ b/helm/ocaml/metadata/extractor/extractor_manager.ml @@ -0,0 +1,195 @@ +(* HELPERS *) +let create_all dbd = + let obj_tbl = MetadataTypes.obj_tbl () in + let sort_tbl = MetadataTypes.sort_tbl () in + let rel_tbl = MetadataTypes.rel_tbl () in + let name_tbl = MetadataTypes.name_tbl () in + let count_tbl = MetadataTypes.count_tbl () in + let tbls = [ + (obj_tbl,`RefObj) ; (sort_tbl,`RefSort) ; (rel_tbl,`RefRel) ; + (name_tbl,`ObjectName) ; (count_tbl,`Count) ] + in + let statements = + (SqlStatements.create_tables tbls) @ (SqlStatements.create_indexes tbls) + in + List.iter (fun statement -> + try + ignore (Mysql.exec dbd statement) + with + exn -> + let status = Mysql.status dbd in + match status with + | Mysql.StatusError Mysql.Table_exists_error -> () + | Mysql.StatusError _ -> raise exn + | _ -> () + ) statements + +let drop_all dbd = + let obj_tbl = MetadataTypes.obj_tbl () in + let sort_tbl = MetadataTypes.sort_tbl () in + let rel_tbl = MetadataTypes.rel_tbl () in + let name_tbl = MetadataTypes.name_tbl () in + let count_tbl = MetadataTypes.count_tbl () in + let tbls = [ + (obj_tbl,`RefObj) ; (sort_tbl,`RefSort) ; (rel_tbl,`RefRel) ; + (name_tbl,`ObjectName) ; (count_tbl,`Count) ] + in + let statements = + (SqlStatements.drop_tables tbls) @ (SqlStatements.drop_indexes tbls) + in + List.iter (fun statement -> + try + ignore (Mysql.exec dbd statement) + with Mysql.Error _ as exn -> + match Mysql.errno dbd with + | Mysql.Bad_table_error + | Mysql.No_such_index | Mysql.No_such_table -> () + | _ -> raise exn + ) statements + +let slash_RE = Str.regexp "/" + +let partition l = + let l = List.fast_sort Pervasives.compare l in + let matches s1 s2 = + let l1,l2 = Str.split slash_RE s1, Str.split slash_RE s2 in + match l1,l2 with + | _::x::_,_::y::_ -> x = y + | _ -> false + in + let rec chunk l = + match l with + | [] -> [],[] + | h::(h1::tl as rest) when matches h h1 -> + let ch,todo = chunk rest in + (h::ch),todo + | h::(h1::tl as rest)-> [h],rest + | h::_ -> [h],[] + in + let rec split l = + let ch, todo = chunk l in + match todo with + | [] -> [ch] + | _ -> ch :: split todo + in + split l + + +(* ARGV PARSING *) + +let _ = + try + if Sys.argv.(1) = "-h"||Sys.argv.(1) = "-help"||Sys.argv.(1) = "--help" then + begin + prerr_endline " +usage: ./extractor_manager[.opt] [processes] [owner] + +defaults: + processes = 2 + owner = NEW + +"; + exit 1 + end + with Invalid_argument _ -> () + +let processes = + try + int_of_string (Sys.argv.(1)) + with + Invalid_argument _ -> 2 + +let owner = + try + Sys.argv.(2) + with Invalid_argument _ -> "NEW" + +let create_peons i = + let rec aux = function + | 0 -> [] + | n -> (n,0) :: aux (n-1) + in + ref (aux i) + +let is_a_peon_idle peons = + List.exists (fun (_,x) -> x = 0) !peons + +let get_ide_peon peons = + let p = fst(List.find (fun (_,x) -> x = 0) !peons) in + peons := List.filter (fun (x,_) -> x <> p) !peons; + p + +let assign_peon peon pid peons = + peons := (peon,pid) :: !peons + +let wait_a_peon peons = + let pid,status = Unix.wait () in + (match status with + | Unix.WEXITED 0 -> () + | _ -> prerr_endline (Printf.sprintf "PEON %d HAD A PROBLEM" pid)); + let p = fst(List.find (fun (_,x) -> x = pid) !peons) in + peons := List.filter (fun (x,_) -> x <> p) !peons; + peons := (p,0) :: !peons + +let is_a_peon_busy peons = + List.exists (fun (_,x) -> x <> 0) !peons + +(* MAIN *) +let main () = + Helm_registry.load_from "extractor.conf.xml"; + Http_getter.init (); + print_endline "Updating the getter...."; + Http_getter.update (); + let base = (Helm_registry.get "tmp.dir") ^ "/maps" in + let formats i = + (Helm_registry.get "tmp.dir") ^ "/"^(string_of_int i)^"/maps" + in + for i = 1 to processes do + let fmt = formats i in + ignore(Unix.system ("rm -rf " ^ fmt)); + ignore(Unix.system ("mkdir -p " ^ fmt)); + ignore(Unix.system ("cp -r " ^ base ^ " " ^ fmt ^ "/../")); + done; + let dbd = + Mysql.quick_connect + ~host:(Helm_registry.get "db.host") + ~user:(Helm_registry.get "db.user") + ~database:(Helm_registry.get "db.database") () + in + MetadataTypes.ownerize_tables owner; + let uri_RE = Str.regexp ".*\\(ind\\|var\\|con\\)$" in + drop_all dbd; + create_all dbd; + let uris = Http_getter.getalluris () in + let uris = List.filter (fun u -> Str.string_match uri_RE u 0) uris in + let todo = partition uris in + let cur = ref 0 in + let tot = List.length todo in + let peons = create_peons processes in + print_string "START "; flush stdout; + ignore(Unix.system "date"); + while !cur < tot do + if is_a_peon_idle peons then + let peon = get_ide_peon peons in + let fmt = formats peon in + let oc = open_out (fmt ^ "/../todo") in + List.iter (fun s -> output_string oc (s^"\n")) (List.nth todo !cur); + close_out oc; + let pid = Unix.fork () in + if pid = 0 then + Unix.execv + "./extractor.opt" [| "./extractor.opt" ; fmt ^ "/../" ; owner|] + else + begin + assign_peon peon pid peons; + incr cur + end + else + wait_a_peon peons + done; + while is_a_peon_busy peons do wait_a_peon peons done; + print_string "END "; flush stdout; + ignore(Unix.system "date") +;; + +main () -- 2.39.2