X-Git-Url: http://matita.cs.unibo.it/gitweb/?p=helm.git;a=blobdiff_plain;f=helm%2Focaml%2Flibrary%2FlibraryClean.ml;fp=helm%2Focaml%2Flibrary%2FlibraryClean.ml;h=d09769fc20628e95f2c9ff65ed24c6f62803d2ce;hp=0000000000000000000000000000000000000000;hb=792b5d29ebae8f917043d9dd226692919b5d6ca1;hpb=a14a8c7637fd0b95e9d4deccb20c6abc98e8f953 diff --git a/helm/ocaml/library/libraryClean.ml b/helm/ocaml/library/libraryClean.ml new file mode 100644 index 000000000..d09769fc2 --- /dev/null +++ b/helm/ocaml/library/libraryClean.ml @@ -0,0 +1,238 @@ +(* Copyright (C) 2005, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +(* $Id$ *) + +open Printf + +let debug = false +let debug_prerr = if debug then prerr_endline else ignore + +module HGT = Http_getter_types;; +module HG = Http_getter;; +module UM = UriManager;; + +let cache_of_processed_baseuri = Hashtbl.create 1024 + +let one_step_depend suri = + let buri = + try + UM.buri_of_uri (UM.uri_of_string suri) + with UM.IllFormedUri _ -> suri + in + if Hashtbl.mem cache_of_processed_baseuri buri then + [] + else + begin + Hashtbl.add cache_of_processed_baseuri buri true; + let query = + let buri = buri ^ "/" in + let buri = HMysql.escape buri in + let obj_tbl = MetadataTypes.obj_tbl () in + sprintf + ("SELECT source, h_occurrence FROM %s WHERE " ^^ + "h_occurrence REGEXP '^%s[^/]*$'") + obj_tbl buri + in + try + let rc = HMysql.exec (LibraryDb.instance ()) query in + let l = ref [] in + HMysql.iter rc ( + fun row -> + match row.(0), row.(1) with + | Some uri, Some occ when Filename.dirname occ = buri -> + l := uri :: !l + | _ -> ()); + let l = List.sort Pervasives.compare !l in + HExtlib.list_uniq l + with + exn -> raise exn (* no errors should be accepted *) + end + +let safe_buri_of_suri suri = + try + UM.buri_of_uri (UM.uri_of_string suri) + with + UM.IllFormedUri _ -> suri + +let close_uri_list uri_to_remove = + (* to remove an uri you have to remove the whole script *) + let buri_to_remove = + HExtlib.list_uniq + (List.fast_sort Pervasives.compare + (List.map safe_buri_of_suri uri_to_remove)) + in + (* cleand the already visided baseuris *) + let buri_to_remove = + List.filter + (fun buri -> + if Hashtbl.mem cache_of_processed_baseuri buri then false + else true) + buri_to_remove + in + (* now calculate the list of objects that belong to these baseuris *) + let uri_to_remove = + try + List.fold_left + (fun acc buri -> + let inhabitants = HG.ls (buri ^ "/") in + let inhabitants = List.filter + (function HGT.Ls_object _ -> true | _ -> false) + inhabitants + in + let inhabitants = List.map + (function + | HGT.Ls_object e -> buri ^ "/" ^ e.HGT.uri + | _ -> assert false) + inhabitants + in + inhabitants @ acc) + [] buri_to_remove + with HGT.Invalid_URI u -> + HLog.error ("We were listing an invalid buri: " ^ u); + exit 1 + in + (* now we want the list of all uri that depend on them *) + let depend = + List.fold_left + (fun acc u -> one_step_depend u @ acc) [] uri_to_remove + in + let depend = + HExtlib.list_uniq (List.fast_sort Pervasives.compare depend) + in + uri_to_remove, depend + +let rec close_db uris next = + match next with + | [] -> uris + | l -> let uris, next = close_uri_list l in close_db uris next @ uris + +let cleaned_no = ref 0;; + + (** TODO repellent code ... *) +let moo_root_dir = lazy ( + let url = + List.assoc "cic:/matita/" + (List.map + (fun pair -> + match + Str.split (Str.regexp "[ \t\r\n]+") (HExtlib.trim_blanks pair) + with + | [a;b] -> a, b + | _ -> assert false) + (Helm_registry.get_list Helm_registry.string "getter.prefix")) + in + String.sub url 7 (String.length url - 7) (* remove heading "file:///" *) +) + +let close_nodb ~basedir buris = + let rev_deps = Hashtbl.create 97 in + let all_metadata = + HExtlib.find ~test:(fun name -> Filename.check_suffix name ".metadata") + (Lazy.force moo_root_dir) + in + List.iter + (fun path -> + let metadata = LibraryNoDb.load_metadata ~fname:path in + let baseuri_of_current_metadata = + let dirname = Filename.dirname path in + let basedirlen = String.length basedir in + assert (String.sub dirname 0 basedirlen = basedir); + "cic:" ^ + String.sub dirname basedirlen (String.length dirname - basedirlen) ^ + Filename.basename path + in + let deps = + HExtlib.filter_map + (function LibraryNoDb.Dependency buri -> Some buri) + metadata + in + List.iter + (fun buri -> Hashtbl.add rev_deps buri baseuri_of_current_metadata) deps) + all_metadata; + let buris_to_remove = + HExtlib.list_uniq + (List.fast_sort Pervasives.compare + (List.flatten (List.map (Hashtbl.find_all rev_deps) buris))) + in + let objects_to_remove = + let objs_of_buri buri = + HExtlib.filter_map + (function + | Http_getter_types.Ls_object o -> + Some (buri ^ "/" ^ o.Http_getter_types.uri) + | _ -> None) + (Http_getter.ls buri) + in + List.flatten (List.map objs_of_buri (buris @ buris_to_remove)) + in + objects_to_remove + +let clean_baseuris ?(verbose=true) ~basedir buris = + Hashtbl.clear cache_of_processed_baseuri; + let buris = List.map Http_getter_misc.strip_trailing_slash buris in + debug_prerr "clean_baseuris called on:"; + if debug then + List.iter debug_prerr buris; + let l = + if Helm_registry.get_bool "db.nodb" then + close_nodb ~basedir buris + else + close_db [] buris + in + let l = HExtlib.list_uniq (List.fast_sort Pervasives.compare l) in + let l = List.map UriManager.uri_of_string l in + debug_prerr "clean_baseuri will remove:"; + if debug then + List.iter (fun u -> debug_prerr (UriManager.string_of_uri u)) l; + List.iter + (fun buri -> + HExtlib.safe_remove (LibraryMisc.obj_file_of_baseuri basedir buri); + HExtlib.safe_remove (LibraryMisc.metadata_file_of_baseuri basedir buri); + HExtlib.safe_remove (LibraryMisc.lexicon_file_of_baseuri basedir buri)) + (HExtlib.list_uniq (List.fast_sort Pervasives.compare + (List.map (UriManager.buri_of_uri) l))); + List.iter + (let last_baseuri = ref "" in + fun uri -> + let buri = UriManager.buri_of_uri uri in + if buri <> !last_baseuri then + begin + HLog.message ("Removing: " ^ buri ^ "/*"); + last_baseuri := buri + end; + LibrarySync.remove_obj uri + ) l; + cleaned_no := !cleaned_no + List.length l; + if !cleaned_no > 30 then + begin + cleaned_no := 0; + List.iter + (function table -> + ignore (HMysql.exec (LibraryDb.instance ()) ("OPTIMIZE TABLE " ^ table))) + [MetadataTypes.name_tbl (); MetadataTypes.rel_tbl (); + MetadataTypes.sort_tbl (); MetadataTypes.obj_tbl(); + MetadataTypes.count_tbl()] + end