From 3ee9e411a482866320ed10318070212b1e994c53 Mon Sep 17 00:00:00 2001 From: Stefano Zacchiroli Date: Tue, 10 May 2005 10:10:38 +0000 Subject: [PATCH] moved here xmlPushParser.ml and corresponding test --- helm/ocaml/xml/.cvsignore | 2 + helm/ocaml/xml/.depend | 2 + helm/ocaml/xml/Makefile | 6 +- helm/ocaml/xml/test.ml | 59 +++++++++++++++++ helm/ocaml/xml/xmlPushParser.ml | 108 +++++++++++++++++++++++++++++++ helm/ocaml/xml/xmlPushParser.mli | 71 ++++++++++++++++++++ 6 files changed, 246 insertions(+), 2 deletions(-) create mode 100644 helm/ocaml/xml/test.ml create mode 100644 helm/ocaml/xml/xmlPushParser.ml create mode 100644 helm/ocaml/xml/xmlPushParser.mli diff --git a/helm/ocaml/xml/.cvsignore b/helm/ocaml/xml/.cvsignore index 6b3eba302..ee126863f 100644 --- a/helm/ocaml/xml/.cvsignore +++ b/helm/ocaml/xml/.cvsignore @@ -1 +1,3 @@ *.cm[iaox] *.cmxa +test +test.opt diff --git a/helm/ocaml/xml/.depend b/helm/ocaml/xml/.depend index bc6941bdf..5ef59bdc9 100644 --- a/helm/ocaml/xml/.depend +++ b/helm/ocaml/xml/.depend @@ -1,2 +1,4 @@ xml.cmo: xml.cmi xml.cmx: xml.cmi +xmlPushParser.cmo: xmlPushParser.cmi +xmlPushParser.cmx: xmlPushParser.cmi diff --git a/helm/ocaml/xml/Makefile b/helm/ocaml/xml/Makefile index 41a7edaa4..7ecb75457 100644 --- a/helm/ocaml/xml/Makefile +++ b/helm/ocaml/xml/Makefile @@ -1,8 +1,10 @@ PACKAGE = xml -REQUIRES = zip +REQUIRES = zip expat PREDICATES = -INTERFACE_FILES = xml.mli +INTERFACE_FILES = \ + xml.mli \ + xmlPushParser.mli IMPLEMENTATION_FILES = $(INTERFACE_FILES:%.mli=%.ml) EXTRA_OBJECTS_TO_INSTALL = EXTRA_OBJECTS_TO_CLEAN = diff --git a/helm/ocaml/xml/test.ml b/helm/ocaml/xml/test.ml new file mode 100644 index 000000000..516ba77c6 --- /dev/null +++ b/helm/ocaml/xml/test.ml @@ -0,0 +1,59 @@ + +(* Parsing test: + * - XmlPushParser version *) +open Printf +open XmlPushParser + +let print s = print_endline s; flush stdout + +let callbacks = + { default_callbacks with + start_element = + Some (fun tag attrs -> + let length = List.length attrs in + print (sprintf "opening %s [%s]" + tag (String.concat ";" (List.map fst attrs)))); + end_element = Some (fun tag -> print ("closing " ^ tag)); + character_data = Some (fun data -> print "character data ..."); + } + +let xml_parser = create_parser callbacks + +let is_gzip f = + try + let len = String.length f in + String.sub f (len - 3) 3 = ".gz" + with Invalid_argument _ -> false + +let _ = + let xml_source = + if is_gzip Sys.argv.(1) then + `Gzip_file Sys.argv.(1) + else + `File Sys.argv.(1) + in + parse xml_parser xml_source + +(* Parsing test: + * - Pure expat version (without XmlPushParser mediation). + * Originally written only to test if XmlPushParser mediation caused overhead. + * That was not the case. *) + +(*let _ =*) +(* let ic = open_in Sys.argv.(1) in*) +(* let expat_parser = Expat.parser_create ~encoding:None in*) +(* Expat.set_start_element_handler expat_parser*) +(* (fun tag attrs ->*) +(* let length = List.length attrs in*) +(* print (sprintf "opening %s [%d attribute%s]"*) +(* tag length (if length = 1 then "" else "s")));*) +(* Expat.set_end_element_handler expat_parser*) +(* (fun tag -> print ("closing " ^ tag));*) +(* Expat.set_character_data_handler expat_parser*) +(* (fun data -> print "character data ...");*) +(* try*) +(* while true do*) +(* Expat.parse expat_parser (input_line ic ^ "\n")*) +(* done*) +(* with End_of_file -> Expat.final expat_parser*) + diff --git a/helm/ocaml/xml/xmlPushParser.ml b/helm/ocaml/xml/xmlPushParser.ml new file mode 100644 index 000000000..ae93f4e6a --- /dev/null +++ b/helm/ocaml/xml/xmlPushParser.ml @@ -0,0 +1,108 @@ +(* Copyright (C) 2004-2005, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +let gzip_bufsize = 10240 + +type callbacks = { + start_element: (string -> (string * string) list -> unit) option; + end_element: (string -> unit) option; + character_data: (string -> unit) option; + processing_instruction: (string -> string -> unit) option; + comment: (string -> unit) option; +} + +let default_callbacks = { + start_element = None; + end_element = None; + character_data = None; + processing_instruction = None; + comment = None; +} + +type xml_source = + [ `Channel of in_channel + | `File of string + | `Gzip_channel of Gzip.in_channel + | `Gzip_file of string + | `String of string + ] + +type position = int * int + +type xml_parser = Expat.expat_parser + +let create_parser callbacks = + let expat_parser = Expat.parser_create ~encoding:None in + (match callbacks.start_element with + | Some f -> Expat.set_start_element_handler expat_parser f + | _ -> ()); + (match callbacks.end_element with + | Some f -> Expat.set_end_element_handler expat_parser f + | _ -> ()); + (match callbacks.character_data with + | Some f -> Expat.set_character_data_handler expat_parser f + | _ -> ()); + (match callbacks.processing_instruction with + | Some f -> Expat.set_processing_instruction_handler expat_parser f + | _ -> ()); + (match callbacks.comment with + | Some f -> Expat.set_comment_handler expat_parser f + | _ -> ()); + expat_parser + +let final = Expat.final + +let get_position expat_parser = + (Expat.get_current_line_number expat_parser, + Expat.get_current_column_number expat_parser) + +let parse expat_parser = + let parse_fun = Expat.parse expat_parser in + let rec aux = function + | `Channel ic -> + (try + while true do parse_fun (input_line ic ^ "\n") done + with End_of_file -> final expat_parser) + | `File fname -> + let ic = open_in fname in + aux (`Channel ic); + close_in ic + | `Gzip_channel ic -> + let buf = String.create gzip_bufsize in + (try + while true do + let bytes = Gzip.input ic buf 0 gzip_bufsize in + if bytes = 0 then raise End_of_file; + parse_fun (String.sub buf 0 bytes) + done + with End_of_file -> final expat_parser) + | `Gzip_file fname -> + let ic = Gzip.open_in fname in + aux (`Gzip_channel ic); + Gzip.close_in ic + | `String s -> parse_fun s + in + aux + diff --git a/helm/ocaml/xml/xmlPushParser.mli b/helm/ocaml/xml/xmlPushParser.mli new file mode 100644 index 000000000..0bbd2203d --- /dev/null +++ b/helm/ocaml/xml/xmlPushParser.mli @@ -0,0 +1,71 @@ +(* Copyright (C) 2004-2005, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +(** {2 XLM push parser generic interface} + * Do not depend on CIC *) + + (** callbacks needed to instantiate a parser *) +type callbacks = { + start_element: + (string -> (string * string) list -> unit) option; (* tag, attr list *) + end_element: (string -> unit) option; (* tag *) + character_data: (string -> unit) option; (* data *) + processing_instruction: + (string -> string -> unit) option; (* target, value *) + comment: (string -> unit) option; (* value *) +} + + (** do nothing callbacks (all set to None) *) +val default_callbacks: callbacks + + (** source from which parse an XML file *) +type xml_source = + [ `Channel of in_channel + | `File of string + | `Gzip_channel of Gzip.in_channel + | `Gzip_file of string + | `String of string + ] + + (** source position in a XML source. + * A position is a pair *) +type position = int * int + +type xml_parser + + (** Create a push parser which invokes the given callbacks *) +val create_parser: callbacks -> xml_parser + + (** Parse XML data from a given source with a given parser *) +val parse: xml_parser -> xml_source -> unit + + (** Inform the farser that parsing is completed, needed only when source is + * `String, for other sources it is automatically invoked when the end of file + * is reached *) +val final: xml_parser -> unit + + (** @return current pair *) +val get_position: xml_parser -> position + -- 2.39.2