From: Ferruccio Guidi Date: Mon, 6 Oct 2003 17:25:23 +0000 (+0000) Subject: hxp: performes several tasks among which some metadata extraction X-Git-Tag: V_0_5_1_1~5 X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=commitdiff_plain;h=399f84005987d007bd24f8a0dea3bb2211070a18;p=helm.git hxp: performes several tasks among which some metadata extraction --- diff --git a/helm/hxp/.cvsignore b/helm/hxp/.cvsignore new file mode 100644 index 000000000..bc0401ddc --- /dev/null +++ b/helm/hxp/.cvsignore @@ -0,0 +1,2 @@ +*.cm[aiox] *.cmxa *.output *.opt hxp +hxpParser.ml hxpParser.mli hxpLexer.ml diff --git a/helm/hxp/.depend b/helm/hxp/.depend new file mode 100644 index 000000000..7f855a44b --- /dev/null +++ b/helm/hxp/.depend @@ -0,0 +1,9 @@ +hxpTop.cmo: hxpTypes.cmo hxpXML.cmo +hxpTop.cmx: hxpTypes.cmx hxpXML.cmx +hxpParser.cmo: hxpTypes.cmo hxpParser.cmi +hxpParser.cmx: hxpTypes.cmx hxpParser.cmi +hxpParser.cmi: hxpTypes.cmo +hxpLexer.cmo: hxpParser.cmi +hxpLexer.cmx: hxpParser.cmx +hxpXML.cmo: hxpLexer.cmo hxpParser.cmi hxpTypes.cmo +hxpXML.cmx: hxpLexer.cmx hxpParser.cmx hxpTypes.cmx diff --git a/helm/hxp/EXAMPLE.txt b/helm/hxp/EXAMPLE.txt new file mode 100644 index 000000000..83e02a5cb --- /dev/null +++ b/helm/hxp/EXAMPLE.txt @@ -0,0 +1,5 @@ +On mowgli.cs.unibo.it this line outputs the names' list in a format suitable for +Postgres. Each line of this list contains a uri and its short name separated by +a tab character (as produced by pg_dump). see hxp --help for details. + +hxp.opt -l /projects/helm/EXPORT/examples_mowgli/objects -r pg-names diff --git a/helm/hxp/Makefile b/helm/hxp/Makefile new file mode 100644 index 000000000..4c41e6598 --- /dev/null +++ b/helm/hxp/Makefile @@ -0,0 +1,66 @@ +BIN_DIR = /usr/local/bin +REQUIRES = zip +# unix helm-cic_textual_parser \ +# helm-mathql helm-mathql_interpreter helm-mathql_generator +PREDICATES = +OCAMLOPTIONS = -package "$(REQUIRES)" -predicates "$(PREDICATES)" +OCAMLC = ocamlfind ocamlc $(OCAMLOPTIONS) +OCAMLOPT = ocamlfind ocamlopt $(OCAMLOPTIONS) +OCAMLDEP = ocamldep +OCAMLYACC = ocamlyacc +OCAMLLEX = ocamllex + +LIBRARIES = $(shell ocamlfind query -recursive -predicates "byte $(PREDICATES)" -format "%d/%a" $(REQUIRES)) +LIBRARIES_OPT = $(shell ocamlfind query -recursive -predicates "native $(PREDICATES)" -format "%d/%a" $(REQUIRES)) + +HXPTOP = hxpTop.ml + +DEPOBJS = $(HXPTOP) +AUXOBJS = hxpTypes.ml hxpParser.ml hxpParser.mli hxpLexer.ml hxpXML.ml + +all: hxp +opt: hxp.opt + +depend: $(AUXOBJS) + $(OCAMLDEP) $(DEPOBJS) $(AUXOBJS) > .depend + +hxp: hxpParser.cmi $(AUXOBJS:.ml=.cmo) $(HXPTOP:.ml=.cmo) $(LIBRARIES) + $(OCAMLC) -linkpkg -o hxp $(AUXOBJS:.ml=.cmo) $(HXPTOP:.ml=.cmo) + +hxp.opt: $(AUXOBJS:.ml=.cmx) $(HXPTOP:.ml=.cmx) $(LIBRARIES_OPT) + $(OCAMLOPT) -linkpkg -o hxp.opt $(AUXOBJS:.ml=.cmx) $(HXPTOP:.ml=.cmx) + +.SUFFIXES: .ml .mli .cmo .cmi .cmx .mly .mll +.ml.cmo: $(LIBRARIES) + $(OCAMLC) -c $< +.mli.cmi: $(LIBRARIES) + $(OCAMLC) -c $< +.ml.cmx: $(LIBRARIES_OPT) + $(OCAMLOPT) -c $< +.mly.ml: + $(OCAMLYACC) $< +.mly.mli: + $(OCAMLYACC) $< +.mll.ml: + $(OCAMLLEX) $< + +$(DEPOBJS:%.ml=%.cmo): $(LIBRARIES) +$(DEPOBJS:%.ml=%.cmx): $(LIBRARIES_OPT) + +clean: + rm -f *.cm[iox] *.o hxp hxp.opt \ + hxpParser.ml hxpParser.mli hxpLexer.ml + +install: + cp hxp hxp.opt $(BIN_DIR) + +uninstall: + cd $(BIN_DIR) + rm -f hxp hxp.opt + +.PHONY: install uninstall clean + +ifneq ($(MAKECMDGOALS), depend) + include .depend +endif + diff --git a/helm/hxp/hxpLexer.mll b/helm/hxp/hxpLexer.mll new file mode 100644 index 000000000..fbec4aca7 --- /dev/null +++ b/helm/hxp/hxpLexer.mll @@ -0,0 +1,56 @@ +(* Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://www.cs.unibo.it/helm/. + *) + +(* AUTOR: Ferruccio Guidi + *) + +{ + open HxpParser +} + +let SPC = [' ' '\t' '\n' '\r' '?' '!']+ +let ALPHA = ['A'-'Z' 'a'-'z']+ +let STR = '"'[^'"']*'"' +let NUM = ['0'-'9' '.']+ +let BEF = [^ '=' '0'-'9' '.' ':' '*']* + +rule xml_token = parse + | SPC { xml_token lexbuf } + | '<' { OPEN } + | '>' { CLOSE } + | '=' { EQ } + | '/' { SL } + | ALPHA { NAME (Lexing.lexeme lexbuf) } + | STR { VAL (Lexing.lexeme lexbuf) } + | eof { DONE } + +and time_token = parse + | BEF { time_token lexbuf } + | NUM { DATA (float_of_string (Lexing.lexeme lexbuf)) } + | '=' { EQ } + | ':' { CLN } + | _ { DONE } + | eof { DONE } + diff --git a/helm/hxp/hxpParser.mly b/helm/hxp/hxpParser.mly new file mode 100644 index 000000000..aed627abf --- /dev/null +++ b/helm/hxp/hxpParser.mly @@ -0,0 +1,56 @@ +/* Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://cs.unibo.it/helm/. + */ + +/* AUTOR: Ferruccio Guidi + */ + +%{ + module T = HxpTypes +%} + %token DONE + %token NAME VAL + %token OPEN CLOSE EQ SL /* < > = / */ + %start xml + %type xml + + %token DATA + %token CLN + %start time + %type time +%% + xml : + OPEN NAME { T.XML_Open $2 } + | NAME EQ VAL { T.XML_Attribute ($1, $3) } + | CLOSE { T.XML_End } + | OPEN SL NAME CLOSE { T.XML_Close $3 } + | SL CLOSE { T.XML_Close "" } + | NAME NAME VAL { T.XML_Attribute ($1 ^ " " ^ $2, $3) } + | DONE { T.XML_Done } + ; + time : + EQ DATA CLN DATA { ($2, $4) } + | DATA EQ DATA CLN DATA { ($3, $5) } + | DONE { (-1., -1.) } + ; diff --git a/helm/hxp/hxpTop.ml b/helm/hxp/hxpTop.ml new file mode 100644 index 000000000..1030c6447 --- /dev/null +++ b/helm/hxp/hxpTop.ml @@ -0,0 +1,345 @@ +(* Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://www.cs.unibo.it/helm/. + *) + +(* AUTOR: Ferruccio Guidi + *) + +module T = HxpTypes + +module X = HxpXML.Make + +let build = ref "EXPORT" + +let lib = ref Filename.current_dir_name + +let uq s = String.sub s 1 (String.length s - 2) + +let rec esc s = + try + let i = String.index s '\'' in + let l = String.length s in + String.sub s 0 i ^ "\\\'" ^ esc (String.sub s (succ i) (l - succ i)) + with Not_found -> s + +let split_filename f = + let len = String.length f in + let dot = String.rindex f '.' in + let slash = String.rindex f '/' in + let path = String.sub f 0 slash in + let name = String.sub f (succ slash) (dot - succ slash) in + let ext = String.sub f (succ dot) (len - succ dot) in + (path, (name, ext)) + +let ins0 name filter = + let sort = ref "\"\"" in + let relative = ref 0 in + let count = ref 0 in + let max = ref 0 in + let flags = String.create 3 in + let set_flags_for = function + | T.XML_Open "LAMBDA" -> flags.[0] <- 'L' + | T.XML_Open "MUTCASE" -> flags.[1] <- 'C' + | T.XML_Open "FIX" -> flags.[2] <- 'F' + | T.XML_Open "COFIX" -> flags.[2] <- 'F' + | _ -> () + in + let rec skip_premises h = + match X.xnext h with + | h, (T.XML_Attribute _, _) -> skip_premises h + | h, (T.XML_End, _) -> skip_premises h + | h, (T.XML_Open "PROD", i) -> + let h = + if ! sort = "\"\"" then + match X.xnext h with + | h, (T.XML_Attribute ("type", str), _) -> sort := str; h + | h, _ -> sort := ""; h + else h + in + let h = X.scan_for h (T.XML_Close "source", succ i) in + let h, (obj, i) = X.xnext h in + relative := i + 2; (* XML_Open "target" *) + skip_premises h + | h, (obj, _) -> + print_string (! sort ^ " " ^ T.string_of_object obj); + set_flags_for obj; h + in + let rec ins_aux h = + match X.xnext h with + | h, (T.XML_Done, _) -> h + | h, (T.XML_Attribute ("uri", v), i) -> + incr count; + if i > ! max then max := i; + ins_aux h + | h, (obj, _) -> + set_flags_for obj; ins_aux h + in + if snd (snd (split_filename name)) = "con" then begin + flags.[0] <- '-'; flags.[1] <- '-'; flags.[2] <- '-'; + let h = X.start (Filename.concat ! lib (name ^ ".xml")) filter in + let h = X.scan_for h (T.XML_Open "type", 3) in + X.stop (ins_aux (skip_premises h)); + print_endline (" \"cic:" ^ name ^ "\" " ^ flags ^ + " count " ^ string_of_int ! count ^ + " max_depth " ^ string_of_int (! max - ! relative) ^ + "

"); + flush stdout + end + +type style_t = HTML | PG | RDF + +let ins1 style name filter = + let maior = ref 0 in + let minor = ref (-1) in + let make_uris () = + let make_ref pre sep post = + if ! maior > 0 then + name ^ pre ^ string_of_int ! maior ^ + (if ! minor > 0 then sep ^ string_of_int ! minor else "") ^ post + else name + in + incr minor; + (make_ref "#xpointer(1/" "/" ")", make_ref "," "," "") + in + let out_alias n = + match style with + | HTML -> + print_endline ("\"" ^ n ^ "\" alias of \"cic:" ^ + fst (make_uris ()) ^ "\"

") + | PG -> + print_endline ("cic:/" ^ fst (make_uris ()) ^ "\t" ^ n) + | RDF -> + let uris = make_uris () in + let dir = ! build ^ fst (split_filename name) in + let rc = Sys.command ("mkdir -p " ^ esc dir) in + let och = open_out (! build ^ snd uris ^ ".xml") in + output_string och "\n\n"; + output_string och "\n"; + output_string och (" \n"); + output_string och (" " ^ n ^ "\n"); + output_string och " \n"; + output_string och "\n"; + close_out och; +(* print_endline ("\"" ^ fst uris ^ "\" alias \"" ^ n ^ "\"") +*) in + let rec ins_aux h = + match X.xnext h with + | h, (T.XML_Done, _) -> h + | h, (T.XML_Attribute ("name", n), i) -> + if i = 4 then begin incr maior; minor := -1; out_alias (uq n) end else + if i = 5 then out_alias (uq n); + ins_aux h + | h, _ -> ins_aux h + in + let split = snd (split_filename name) in + if snd split <> "ind" then out_alias (fst split) else + let h = X.start (Filename.concat ! lib (name ^ ".xml")) filter in + X.stop (ins_aux h); + flush stdout + +let test s name filter = + if s = "con-info" then ins0 name filter else + if s = "html-names" then ins1 HTML name filter else + if s = "pg-names" then ins1 PG name filter else + if s = "rdf-names" then ins1 RDF name filter else () + +let read_index_txt () = + let uris = ref [] in + let ich = open_in (Filename.concat ! lib "index.txt") in + let rec aux () = + let line = input_line ich in + let p0 = succ (String.index line '/') in + let p1 = succ (String.rindex line '.') in + let uri = String.sub line p0 (p1 - p0) in + let ext = String.sub line p1 (String.length line - p1) in + match ext with + | "con gz" -> uris := (uri ^ "con", true) :: ! uris; aux () + | "ind gz" -> uris := (uri ^ "ind", true) :: ! uris; aux () + | "var gz" -> uris := (uri ^ "var", true) :: ! uris; aux () + | "con" -> uris := (uri ^ "con", false) :: ! uris; aux () + | "ind" -> uris := (uri ^ "ind", false) :: ! uris; aux () + | "var" -> uris := (uri ^ "var", false) :: ! uris; aux () + | _ -> aux () + in + begin try aux () with End_of_file -> close_in ich end; ! uris + +let read_xml s = + let l = read_index_txt () in + let rec loop_on = function + | [] -> () + | (uri, filter) :: tail -> test s uri filter; loop_on tail + in loop_on l; + prerr_endline ("total time: " ^ string_of_float (Sys.time ()) ^ " seconds") +(* +let get_timing () = + let lexbuf = Lexing.from_channel stdin in + let tm = ref [] in + let cont = ref true in + while ! cont do + let data = Parser.tm Lexer.tmt lexbuf in + if fst data > 0. then begin + let d = (fst data, snd data, snd data /. fst data) in + tm := d :: ! tm end else + if fst data < 0. then cont := false else + print_endline ("[" ^ string_of_int (1 + List.length ! tm) ^ "] "); + done; print_newline (); ! tm + +let compare1 (r1, t1, p1) (r2, t2, p2) = + if r1 > r2 then 1 else + if r1 < r2 then -1 else + if p1 > p2 then 1 else + if p1 < p2 then -1 else 0 + +let rec count min max = function + | [] -> 0 + | (r, _, _) :: tail -> + let p = count min max tail in + if r >= min && r <= max then succ p else p + +let mean min max l = + let num = ref 0. in + let sum = ref 0. in + let rec mean_aux = function + | [] -> ! sum /. ! num + | (r, _, p) :: tail -> + if r >= min && r <= max then begin + num := ! num +. 1.; sum := ! sum +. p; mean_aux tail end + else mean_aux tail + in mean_aux l + +let variance min max m l = + let num = ref 0. in + let sum = ref 0. in + let rec variance_aux = function + | [] -> ! sum /. ! num + | (r, _, p) :: tail -> + if r >= min && r <= max then begin + num := ! num +. 1.; sum := ! sum +. (p -. m) *. (p -. m); + variance_aux tail end + else variance_aux tail + in sqrt (variance_aux l) + +let read_timing min max = + let l = List.sort compare1 (get_timing ()) in + let c = count min max l in + let m = mean min max l in + let v = variance min max m l in + print_timing l; + print_endline (string_of_int c ^ " " ^ string_of_float m ^ " " ^ + string_of_float v); + flush stdout + +let diff_timing () = + let rec diff_aux = function + | ((r1, q1, p1) :: t1, (r2, q2, p2) :: t2) when r1 = r2 -> + (r1, q1 -. q2, p1 -. p2) :: diff_aux (t1, t2) + | ([], []) -> print_newline (); [] + | (_ :: t1, _ :: t2) -> + print_string ("[" ^ string_of_int (1 + List.length t1) ^ "] "); + diff_aux (t1, t2) + | _ -> assert false + in + let l1 = get_timing () in + let l2 = get_timing () in + print_string (string_of_int (List.length l1) ^ " "); + print_string (string_of_int (List.length l2) ^ "\n"); + diff_aux (l1, l2) + +let comp_timing min max = + let l = List.sort compare1 (diff_timing ()) in + let c = count min max l in + let m = mean min max l in + let v = variance min max m l in + print_timing l; + print_endline (string_of_int c ^ " " ^ string_of_float m ^ " " ^ + string_of_float v); + flush stdout + +let print file = + let rec print_aux () = + let xobj = Xml.xnext() in + if fst xobj = XML_Done then () else + begin print_endline (string_of_xobject xobj); print_aux () end + in Xml.start file false; print_aux (); Xml.stop () + +let count_bytes s = + let mbytes = ref 0.0 in + let num = ref 0 in + let k = 1024.0 ** float (int_of_string s) in + let rec count_bytes_aux () = + try + let s = read_line () in + (* prerr_endline ("*" ^ s ^ "*"); *) + let j = String.rindex s ' ' in + let i = + try succ (String.rindex_from s (pred j) ' ') + with Not_found -> 0 in + (* prerr_endline ("*" ^ String.sub s i (j - i) ^ "*"); *) + let b = int_of_string (String.sub s i (j - i)) in + mbytes := ! mbytes +. float b /. k; incr num; + count_bytes_aux () + with End_of_file -> () + in + count_bytes_aux (); + prerr_endline (string_of_int ! num ^ " " ^ string_of_float ! mbytes) +*) + +let _ = + let usage = "Usage: hxp [-lx

] [-p ]" in + + let _l = " set the path to the index.txt file (default is .)" in + let _x = " set the path to the RDF export directory (default is EXPORT)" in + let _r = " read the XML files extracting the specified information + : con-info produce information about .con files in HTML format + html-names produce short names' list in HTML format + pg-names produce short names' list in Postgres input format + rdf-names produce RDF files with short names' metadata" + in + + Arg.parse [ + ("-l", Arg.String (fun dir -> lib := dir), _l); + ("-x", Arg.String (fun dir -> build := dir), _x); + ("-r", Arg.String read_xml, _r) + ] ignore usage + +(* +let parse_args () = + let rec parse = function + | [] -> () + | ("-l"|"-library") :: dir :: rem -> lib := dir; parse rem + | ("-x"|"-export") :: dir :: rem -> build := dir; parse rem + | ("-s"|"-stat") :: min :: max :: rem -> + read_timing (float_of_string min) (float_of_string max); parse rem + | ("-c"|"-comp") :: min :: max :: rem -> + comp_timing (float_of_string min) (float_of_string max); parse rem + | ("-i"|"-ins") :: s :: file :: rem -> test s file false; parse rem + | ("-t"|"-text") :: s :: rem -> read_xml s false; parse rem + | ("-z"|"-gzip") :: s :: rem -> read_xml s true; parse rem + | ("-b"|"-bytes") :: s :: rem -> count_bytes s; parse rem + | _ :: rem -> parse rem + in + parse (List.tl (Array.to_list Sys.argv)) +*) diff --git a/helm/hxp/hxpTypes.ml b/helm/hxp/hxpTypes.ml new file mode 100644 index 000000000..fb8cfeceb --- /dev/null +++ b/helm/hxp/hxpTypes.ml @@ -0,0 +1,57 @@ +(* Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://www.cs.unibo.it/helm/. + *) + +(* AUTOR: Ferruccio Guidi + *) + +type xml_object = + | XML_Open of string + | XML_Attribute of string * string + | XML_Close of string + | XML_End + | XML_Done + +type xml_xobject = xml_object * int + +type timing = float * float * float + +let string_of_object = function + | XML_Open t -> "OPEN " ^ t + | XML_Close t -> "CLOSE " ^ t + | XML_Attribute (n, v) -> "ATTRIBUTE " ^ n ^ "=" ^ v + | XML_End -> "END" + | XML_Done -> "" + +let string_of_xobject (o, i) = + string_of_object o ^ " [" ^ string_of_int i ^ "]" + +let string_of_timing (r, t, p) = + string_of_float r ^ "," ^ string_of_float t ^ "s, " ^ string_of_float p + +let rec print_timing = function + | [] -> () + | head :: tail -> + print_endline (string_of_timing head); flush stdout; + print_timing tail diff --git a/helm/hxp/hxpXML.ml b/helm/hxp/hxpXML.ml new file mode 100644 index 000000000..7c4dc4cb4 --- /dev/null +++ b/helm/hxp/hxpXML.ml @@ -0,0 +1,91 @@ +(* Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://www.cs.unibo.it/helm/. + *) + +(* AUTOR: Ferruccio Guidi + *) + +module T = HxpTypes + +module type Type = sig + + type handle + + val start : string -> bool -> handle + + val stop : handle -> unit + + val next : handle -> T.xml_object + + val xnext : handle -> handle * T.xml_xobject + + val scan_for : handle -> T.xml_xobject -> handle + +end + +module Make : Type = struct + + module L = HxpLexer + + module P = HxpParser + + type mode_t = Text of in_channel * Lexing.lexbuf + | GZip of Gzip.in_channel * Lexing.lexbuf + + type handle = {mode : mode_t; level : int} + + let start s filter = + let gread gch s n = Gzip.input gch s 0 n in + if filter then + let gch = Gzip.open_in (s ^ ".gz") in + let glb = Lexing.from_function (gread gch) in + {mode = GZip (gch, glb); level = 0} + else + let ch = open_in s in + let lb = Lexing.from_channel ch in + {mode = Text (ch, lb); level = 0} + + let stop = function + | {mode = GZip (gch, _)} -> Gzip.close_in gch + | {mode = Text (ch, _)} -> close_in ch + + let next h = + let lexbuf = match h.mode with + | GZip (_, glb) -> glb + | Text (_, lb) -> lb + in + P.xml L.xml_token lexbuf + + let xnext h = + let obj = next h in + match obj with + | T.XML_Open _ -> {h with level = succ h.level}, (obj, h.level) + | T.XML_Close _ -> {h with level = pred h.level}, (obj, pred h.level) + | _ -> h, (obj, h.level) + + let rec scan_for h xobj = + let h, curr_xobj = xnext h in + if curr_xobj = xobj then h else scan_for h xobj + +end