1 (* Copyright (C) 2000, HELM Team.
3 * This file is part of HELM, an Hypertextual, Electronic
4 * Library of Mathematics, developed at the Computer Science
5 * Department, University of Bologna, Italy.
7 * HELM is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * HELM is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with HELM; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
22 * For details, see the HELM World-Wide-Web page,
23 * http://www.cs.unibo.it/helm/.
26 (* AUTOR: Ferruccio Guidi <fguidi@cs.unibo.it>
31 module X = HxpXML.Make
33 let build = ref "EXPORT"
35 let lib = ref Filename.current_dir_name
37 let uq s = String.sub s 1 (String.length s - 2)
41 let i = String.index s '\'' in
42 let l = String.length s in
43 String.sub s 0 i ^ "\\\'" ^ esc (String.sub s (succ i) (l - succ i))
46 let split_filename f =
47 let len = String.length f in
48 let dot = String.rindex f '.' in
49 let slash = String.rindex f '/' in
50 let path = String.sub f 0 slash in
51 let name = String.sub f (succ slash) (dot - succ slash) in
52 let ext = String.sub f (succ dot) (len - succ dot) in
55 let ins0 name filter =
56 let sort = ref "\"\"" in
57 let relative = ref 0 in
60 let flags = String.create 3 in
61 let set_flags_for = function
62 | T.XML_Open "LAMBDA" -> flags.[0] <- 'L'
63 | T.XML_Open "MUTCASE" -> flags.[1] <- 'C'
64 | T.XML_Open "FIX" -> flags.[2] <- 'F'
65 | T.XML_Open "COFIX" -> flags.[2] <- 'F'
68 let rec skip_premises h =
70 | h, (T.XML_Attribute _, _) -> skip_premises h
71 | h, (T.XML_End, _) -> skip_premises h
72 | h, (T.XML_Open "PROD", i) ->
74 if ! sort = "\"\"" then
76 | h, (T.XML_Attribute ("type", str), _) -> sort := str; h
77 | h, _ -> sort := ""; h
80 let h = X.scan_for h (T.XML_Close "source", succ i) in
81 let h, (obj, i) = X.xnext h in
82 relative := i + 2; (* XML_Open "target" *)
85 print_string (! sort ^ " " ^ T.string_of_object obj);
90 | h, (T.XML_Done, _) -> h
91 | h, (T.XML_Attribute ("uri", v), i) ->
93 if i > ! max then max := i;
96 set_flags_for obj; ins_aux h
98 if snd (snd (split_filename name)) = "con" then begin
99 flags.[0] <- '-'; flags.[1] <- '-'; flags.[2] <- '-';
100 let h = X.start (Filename.concat ! lib (name ^ ".xml")) filter in
101 let h = X.scan_for h (T.XML_Open "type", 3) in
102 X.stop (ins_aux (skip_premises h));
103 print_endline (" \"cic:" ^ name ^ "\" " ^ flags ^
104 " count " ^ string_of_int ! count ^
105 " max_depth " ^ string_of_int (! max - ! relative) ^
110 type style_t = HTML | PG | RDF
112 let ins1 style name filter =
114 let minor = ref (-1) in
116 let make_ref pre sep post =
118 name ^ pre ^ string_of_int ! maior ^
119 (if ! minor > 0 then sep ^ string_of_int ! minor else "") ^ post
123 (make_ref "#xpointer(1/" "/" ")", make_ref "," "," "")
128 print_endline ("\"" ^ n ^ "\" alias of \"cic:" ^
129 fst (make_uris ()) ^ "\" <p>")
131 print_endline ("cic:/" ^ fst (make_uris ()) ^ "\t" ^ n)
133 let uris = make_uris () in
134 let dir = ! build ^ fst (split_filename name) in
135 let rc = Sys.command ("mkdir -p " ^ esc dir) in
136 let och = open_out (! build ^ snd uris ^ ".xml") in
137 output_string och "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n\n";
138 output_string och "<rdf:RDF xml:lang=\"en\"\n";
139 output_string och " xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n";
140 output_string och " xmlns:h=\"http://www.cs.unibo.it/helm/schemas/mattone.rdf#\">\n";
141 output_string och (" <h:Object rdf:about=\"cic:" ^ fst uris ^ "\">\n");
142 output_string och (" <h:name>" ^ n ^ "</h:name>\n");
143 output_string och " </h:Object>\n";
144 output_string och "</rdf:RDF>\n";
146 (* print_endline ("\"" ^ fst uris ^ "\" alias \"" ^ n ^ "\"")
150 | h, (T.XML_Done, _) -> h
151 | h, (T.XML_Attribute ("name", n), i) ->
152 if i = 4 then begin incr maior; minor := -1; out_alias (uq n) end else
153 if i = 5 then out_alias (uq n);
157 let split = snd (split_filename name) in
158 if snd split <> "ind" then out_alias (fst split) else
159 let h = X.start (Filename.concat ! lib (name ^ ".xml")) filter in
163 let test s name filter =
164 if s = "con-info" then ins0 name filter else
165 if s = "html-names" then ins1 HTML name filter else
166 if s = "pg-names" then ins1 PG name filter else
167 if s = "rdf-names" then ins1 RDF name filter else ()
169 let read_index_txt () =
171 let ich = open_in (Filename.concat ! lib "index.txt") in
173 let line = input_line ich in
174 let p0 = succ (String.index line '/') in
175 let p1 = succ (String.rindex line '.') in
176 let uri = String.sub line p0 (p1 - p0) in
177 let ext = String.sub line p1 (String.length line - p1) in
179 | "con gz" -> uris := (uri ^ "con", true) :: ! uris; aux ()
180 | "ind gz" -> uris := (uri ^ "ind", true) :: ! uris; aux ()
181 | "var gz" -> uris := (uri ^ "var", true) :: ! uris; aux ()
182 | "con" -> uris := (uri ^ "con", false) :: ! uris; aux ()
183 | "ind" -> uris := (uri ^ "ind", false) :: ! uris; aux ()
184 | "var" -> uris := (uri ^ "var", false) :: ! uris; aux ()
187 begin try aux () with End_of_file -> close_in ich end; ! uris
190 let l = read_index_txt () in
191 let rec loop_on = function
193 | (uri, filter) :: tail -> test s uri filter; loop_on tail
195 prerr_endline ("total time: " ^ string_of_float (Sys.time ()) ^ " seconds")
198 let lexbuf = Lexing.from_channel stdin in
200 let cont = ref true in
202 let data = Parser.tm Lexer.tmt lexbuf in
203 if fst data > 0. then begin
204 let d = (fst data, snd data, snd data /. fst data) in
205 tm := d :: ! tm end else
206 if fst data < 0. then cont := false else
207 print_endline ("[" ^ string_of_int (1 + List.length ! tm) ^ "] ");
208 done; print_newline (); ! tm
210 let compare1 (r1, t1, p1) (r2, t2, p2) =
211 if r1 > r2 then 1 else
212 if r1 < r2 then -1 else
213 if p1 > p2 then 1 else
214 if p1 < p2 then -1 else 0
216 let rec count min max = function
218 | (r, _, _) :: tail ->
219 let p = count min max tail in
220 if r >= min && r <= max then succ p else p
225 let rec mean_aux = function
226 | [] -> ! sum /. ! num
227 | (r, _, p) :: tail ->
228 if r >= min && r <= max then begin
229 num := ! num +. 1.; sum := ! sum +. p; mean_aux tail end
233 let variance min max m l =
236 let rec variance_aux = function
237 | [] -> ! sum /. ! num
238 | (r, _, p) :: tail ->
239 if r >= min && r <= max then begin
240 num := ! num +. 1.; sum := ! sum +. (p -. m) *. (p -. m);
241 variance_aux tail end
242 else variance_aux tail
243 in sqrt (variance_aux l)
245 let read_timing min max =
246 let l = List.sort compare1 (get_timing ()) in
247 let c = count min max l in
248 let m = mean min max l in
249 let v = variance min max m l in
251 print_endline (string_of_int c ^ " " ^ string_of_float m ^ " " ^
256 let rec diff_aux = function
257 | ((r1, q1, p1) :: t1, (r2, q2, p2) :: t2) when r1 = r2 ->
258 (r1, q1 -. q2, p1 -. p2) :: diff_aux (t1, t2)
259 | ([], []) -> print_newline (); []
260 | (_ :: t1, _ :: t2) ->
261 print_string ("[" ^ string_of_int (1 + List.length t1) ^ "] ");
265 let l1 = get_timing () in
266 let l2 = get_timing () in
267 print_string (string_of_int (List.length l1) ^ " ");
268 print_string (string_of_int (List.length l2) ^ "\n");
271 let comp_timing min max =
272 let l = List.sort compare1 (diff_timing ()) in
273 let c = count min max l in
274 let m = mean min max l in
275 let v = variance min max m l in
277 print_endline (string_of_int c ^ " " ^ string_of_float m ^ " " ^
282 let rec print_aux () =
283 let xobj = Xml.xnext() in
284 if fst xobj = XML_Done then () else
285 begin print_endline (string_of_xobject xobj); print_aux () end
286 in Xml.start file false; print_aux (); Xml.stop ()
289 let mbytes = ref 0.0 in
291 let k = 1024.0 ** float (int_of_string s) in
292 let rec count_bytes_aux () =
294 let s = read_line () in
295 (* prerr_endline ("*" ^ s ^ "*"); *)
296 let j = String.rindex s ' ' in
298 try succ (String.rindex_from s (pred j) ' ')
299 with Not_found -> 0 in
300 (* prerr_endline ("*" ^ String.sub s i (j - i) ^ "*"); *)
301 let b = int_of_string (String.sub s i (j - i)) in
302 mbytes := ! mbytes +. float b /. k; incr num;
304 with End_of_file -> ()
307 prerr_endline (string_of_int ! num ^ " " ^ string_of_float ! mbytes)
311 let usage = "Usage: hxp [-lx <dir>] [-p <action>]" in
313 let _l = " set the path to the index.txt file (default is .)" in
314 let _x = " set the path to the RDF export directory (default is EXPORT)" in
315 let _r = " read the XML files extracting the specified information
316 <action>: con-info produce information about .con files in HTML format
317 html-names produce short names' list in HTML format
318 pg-names produce short names' list in Postgres input format
319 rdf-names produce RDF files with short names' metadata"
323 ("-l", Arg.String (fun dir -> lib := dir), _l);
324 ("-x", Arg.String (fun dir -> build := dir), _x);
325 ("-r", Arg.String read_xml, _r)
330 let rec parse = function
332 | ("-l"|"-library") :: dir :: rem -> lib := dir; parse rem
333 | ("-x"|"-export") :: dir :: rem -> build := dir; parse rem
334 | ("-s"|"-stat") :: min :: max :: rem ->
335 read_timing (float_of_string min) (float_of_string max); parse rem
336 | ("-c"|"-comp") :: min :: max :: rem ->
337 comp_timing (float_of_string min) (float_of_string max); parse rem
338 | ("-i"|"-ins") :: s :: file :: rem -> test s file false; parse rem
339 | ("-t"|"-text") :: s :: rem -> read_xml s false; parse rem
340 | ("-z"|"-gzip") :: s :: rem -> read_xml s true; parse rem
341 | ("-b"|"-bytes") :: s :: rem -> count_bytes s; parse rem
342 | _ :: rem -> parse rem
344 parse (List.tl (Array.to_list Sys.argv))