+(* Copyright (C) 2004-2005, HELM Team.
+ *
+ * This file is part of HELM, an Hypertextual, Electronic
+ * Library of Mathematics, developed at the Computer Science
+ * Department, University of Bologna, Italy.
+ *
+ * HELM is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * HELM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HELM; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ * For details, see the HELM World-Wide-Web page,
+ * http://helm.cs.unibo.it/
+ *)
+
+(* HELPERS *)
+
+let create_all dbd =
+ let obj_tbl = MetadataTypes.obj_tbl () in
+ let sort_tbl = MetadataTypes.sort_tbl () in
+ let rel_tbl = MetadataTypes.rel_tbl () in
+ let name_tbl = MetadataTypes.name_tbl () in
+ let count_tbl = MetadataTypes.count_tbl () in
+ let tbls = [
+ (obj_tbl,`RefObj) ; (sort_tbl,`RefSort) ; (rel_tbl,`RefRel) ;
+ (name_tbl,`ObjectName) ; (count_tbl,`Count) ]
+ in
+ let statements =
+ (SqlStatements.create_tables tbls) @ (SqlStatements.create_indexes tbls)
+ in
+ List.iter (fun statement ->
+ try
+ ignore (Mysql.exec dbd statement)
+ with
+ exn ->
+ let status = Mysql.status dbd in
+ match status with
+ | Mysql.StatusError Mysql.Table_exists_error -> ()
+ | Mysql.StatusError _ -> raise exn
+ | _ -> ()
+ ) statements
+
+let drop_all dbd =
+ let obj_tbl = MetadataTypes.obj_tbl () in
+ let sort_tbl = MetadataTypes.sort_tbl () in
+ let rel_tbl = MetadataTypes.rel_tbl () in
+ let name_tbl = MetadataTypes.name_tbl () in
+ let count_tbl = MetadataTypes.count_tbl () in
+ let tbls = [
+ (obj_tbl,`RefObj) ; (sort_tbl,`RefSort) ; (rel_tbl,`RefRel) ;
+ (name_tbl,`ObjectName) ; (count_tbl,`Count) ]
+ in
+ let statements =
+ (SqlStatements.drop_tables tbls) @ (SqlStatements.drop_indexes tbls)
+ in
+ List.iter (fun statement ->
+ try
+ ignore (Mysql.exec dbd statement)
+ with Mysql.Error _ as exn ->
+ match Mysql.errno dbd with
+ | Mysql.Bad_table_error
+ | Mysql.No_such_index | Mysql.No_such_table -> ()
+ | _ -> raise exn
+ ) statements
+
+let slash_RE = Str.regexp "/"
+
+let partition l =
+ let l = List.fast_sort Pervasives.compare l in
+ let matches s1 s2 =
+ let l1,l2 = Str.split slash_RE s1, Str.split slash_RE s2 in
+ match l1,l2 with
+ | _::x::_,_::y::_ -> x = y
+ | _ -> false
+ in
+ let rec chunk l =
+ match l with
+ | [] -> [],[]
+ | h::(h1::tl as rest) when matches h h1 ->
+ let ch,todo = chunk rest in
+ (h::ch),todo
+ | h::(h1::tl as rest)-> [h],rest
+ | h::_ -> [h],[]
+ in
+ let rec split l =
+ let ch, todo = chunk l in
+ match todo with
+ | [] -> [ch]
+ | _ -> ch :: split todo
+ in
+ split l
+
+
+(* ARGV PARSING *)
+
+let _ =
+ try
+ if Sys.argv.(1) = "-h"||Sys.argv.(1) = "-help"||Sys.argv.(1) = "--help" then
+ begin
+ prerr_endline "
+usage: ./extractor_manager[.opt] [processes] [owner]
+
+defaults:
+ processes = 2
+ owner = NEW
+
+";
+ exit 1
+ end
+ with Invalid_argument _ -> ()
+
+let processes =
+ try
+ int_of_string (Sys.argv.(1))
+ with
+ Invalid_argument _ -> 2
+
+let owner =
+ try
+ Sys.argv.(2)
+ with Invalid_argument _ -> "NEW"
+
+let create_peons i =
+ let rec aux = function
+ | 0 -> []
+ | n -> (n,0) :: aux (n-1)
+ in
+ ref (aux i)
+
+let is_a_peon_idle peons =
+ List.exists (fun (_,x) -> x = 0) !peons
+
+let get_ide_peon peons =
+ let p = fst(List.find (fun (_,x) -> x = 0) !peons) in
+ peons := List.filter (fun (x,_) -> x <> p) !peons;
+ p
+
+let assign_peon peon pid peons =
+ peons := (peon,pid) :: !peons
+
+let wait_a_peon peons =
+ let pid,status = Unix.wait () in
+ (match status with
+ | Unix.WEXITED 0 -> ()
+ | Unix.WEXITED s ->
+ prerr_endline (Printf.sprintf "PEON %d EXIT STATUS %d" pid s)
+ | Unix.WSIGNALED s ->
+ prerr_endline
+ (Printf.sprintf "PEON %d HAD A PROBLEM, KILLED BY SIGNAL %d" pid s)
+ | Unix.WSTOPPED s ->
+ prerr_endline
+ (Printf.sprintf "PEON %d HAD A PROBLEM, STOPPED BY %d" pid s));
+ let p = fst(List.find (fun (_,x) -> x = pid) !peons) in
+ peons := List.filter (fun (x,_) -> x <> p) !peons;
+ peons := (p,0) :: !peons
+
+let is_a_peon_busy peons =
+ List.exists (fun (_,x) -> x <> 0) !peons
+
+(* MAIN *)
+let main () =
+ Helm_registry.load_from "extractor.conf.xml";
+ Http_getter.init ();
+ print_endline "Updating the getter....";
+ let base = (Helm_registry.get "tmp.dir") ^ "/maps" in
+ let formats i =
+ (Helm_registry.get "tmp.dir") ^ "/"^(string_of_int i)^"/maps"
+ in
+ for i = 1 to processes do
+ let fmt = formats i in
+ ignore(Unix.system ("rm -rf " ^ fmt));
+ ignore(Unix.system ("mkdir -p " ^ fmt));
+ ignore(Unix.system ("cp -r " ^ base ^ " " ^ fmt ^ "/../"));
+ done;
+ let dbd =
+ Mysql.quick_connect
+ ~host:(Helm_registry.get "db.host")
+ ~user:(Helm_registry.get "db.user")
+ ~database:(Helm_registry.get "db.database") ()
+ in
+ MetadataTypes.ownerize_tables owner;
+ let uri_RE = Str.regexp ".*\\(ind\\|var\\|con\\)$" in
+ drop_all dbd;
+ create_all dbd;
+ let uris = Http_getter.getalluris () in
+ let uris = List.filter (fun u -> Str.string_match uri_RE u 0) uris in
+ let todo = partition uris in
+ let cur = ref 0 in
+ let tot = List.length todo in
+ let peons = create_peons processes in
+ print_string "START "; flush stdout;
+ ignore(Unix.system "date");
+ while !cur < tot do
+ if is_a_peon_idle peons then
+ let peon = get_ide_peon peons in
+ let fmt = formats peon in
+ let oc = open_out (fmt ^ "/../todo") in
+ List.iter (fun s -> output_string oc (s^"\n")) (List.nth todo !cur);
+ close_out oc;
+ let pid = Unix.fork () in
+ if pid = 0 then
+ Unix.execv
+ "./extractor.opt" [| "./extractor.opt" ; fmt ^ "/../" ; owner|]
+ else
+ begin
+ assign_peon peon pid peons;
+ incr cur
+ end
+ else
+ wait_a_peon peons
+ done;
+ while is_a_peon_busy peons do wait_a_peon peons done;
+ print_string "END "; flush stdout;
+ ignore(Unix.system "date");
+ (* and now the rename table stuff *)
+ let obj_tbl = MetadataTypes.library_obj_tbl in
+ let sort_tbl = MetadataTypes.library_sort_tbl in
+ let rel_tbl = MetadataTypes.library_rel_tbl in
+ let name_tbl = MetadataTypes.library_name_tbl in
+ let count_tbl = MetadataTypes.library_count_tbl in
+ let hits_tbl = MetadataTypes.library_hits_tbl in
+ let obj_tbl_b = obj_tbl ^ "_BACKUP" in
+ let sort_tbl_b = sort_tbl ^ "_BACKUP" in
+ let rel_tbl_b = rel_tbl ^ "_BACKUP" in
+ let name_tbl_b = name_tbl ^ "_BACKUP" in
+ let count_tbl_b = count_tbl ^ "_BACKUP" in
+ let obj_tbl_c = MetadataTypes.obj_tbl () in
+ let sort_tbl_c = MetadataTypes.sort_tbl () in
+ let rel_tbl_c = MetadataTypes.rel_tbl () in
+ let name_tbl_c = MetadataTypes.name_tbl () in
+ let count_tbl_c = MetadataTypes.count_tbl () in
+ let stats =
+ SqlStatements.drop_tables [
+ (obj_tbl_b,`RefObj);
+ (sort_tbl_b,`RefSort);
+ (rel_tbl_b,`RefRel);
+ (name_tbl_b,`ObjectName);
+ (count_tbl_b,`Count);
+ (hits_tbl,`Hits) ] @
+ SqlStatements.drop_indexes [
+ (obj_tbl,`RefObj);
+ (sort_tbl,`RefSort);
+ (rel_tbl,`RefRel);
+ (name_tbl,`ObjectName);
+ (count_tbl,`Count);
+ (obj_tbl_c,`RefObj);
+ (sort_tbl_c,`RefSort);
+ (rel_tbl_c,`RefRel);
+ (name_tbl_c,`ObjectName);
+ (count_tbl_c,`Count);
+ (hits_tbl,`Hits) ] @
+ SqlStatements.rename_tables [
+ (obj_tbl,obj_tbl_b);
+ (sort_tbl,sort_tbl_b);
+ (rel_tbl,rel_tbl_b);
+ (name_tbl,name_tbl_b);
+ (count_tbl,count_tbl_b) ] @
+ SqlStatements.rename_tables [
+ (obj_tbl_c,obj_tbl);
+ (sort_tbl_c,sort_tbl);
+ (rel_tbl_c,rel_tbl);
+ (name_tbl_c,name_tbl);
+ (count_tbl_c,count_tbl) ] @
+ SqlStatements.create_tables [
+ (hits_tbl,`Hits) ] @
+ SqlStatements.fill_hits obj_tbl hits_tbl @
+ SqlStatements.create_indexes [
+ (obj_tbl,`RefObj);
+ (sort_tbl,`RefSort);
+ (rel_tbl,`RefRel);
+ (name_tbl,`ObjectName);
+ (count_tbl,`Count);
+ (hits_tbl,`Hits) ]
+ in
+ List.iter (fun statement ->
+ try
+(* prerr_endline statement;*)
+ ignore (Mysql.exec dbd statement)
+ with exn ->
+ let status = Mysql.status dbd in
+ match status with
+ | Mysql.StatusError Mysql.Table_exists_error
+ | Mysql.StatusError Mysql.Bad_table_error
+ | Mysql.StatusError Mysql.Cant_drop_field_or_key
+ | Mysql.StatusError Mysql.Unknown_table -> ()
+ | Mysql.StatusError status ->
+(* prerr_endline (string_of_int (Obj.magic status));*)
+ prerr_endline (Printexc.to_string exn);
+ raise exn
+ | _ ->
+ prerr_endline (Printexc.to_string exn);
+ ())
+ stats
+;;
+
+main ()