*.cm[iaox] *.cmxa
+test
+test.opt
xml.cmo: xml.cmi
xml.cmx: xml.cmi
+xmlPushParser.cmo: xmlPushParser.cmi
+xmlPushParser.cmx: xmlPushParser.cmi
PACKAGE = xml
-REQUIRES = zip
+REQUIRES = zip expat
PREDICATES =
-INTERFACE_FILES = xml.mli
+INTERFACE_FILES = \
+ xml.mli \
+ xmlPushParser.mli
IMPLEMENTATION_FILES = $(INTERFACE_FILES:%.mli=%.ml)
EXTRA_OBJECTS_TO_INSTALL =
EXTRA_OBJECTS_TO_CLEAN =
--- /dev/null
+
+(* Parsing test:
+ * - XmlPushParser version *)
+open Printf
+open XmlPushParser
+
+let print s = print_endline s; flush stdout
+
+let callbacks =
+ { default_callbacks with
+ start_element =
+ Some (fun tag attrs ->
+ let length = List.length attrs in
+ print (sprintf "opening %s [%s]"
+ tag (String.concat ";" (List.map fst attrs))));
+ end_element = Some (fun tag -> print ("closing " ^ tag));
+ character_data = Some (fun data -> print "character data ...");
+ }
+
+let xml_parser = create_parser callbacks
+
+let is_gzip f =
+ try
+ let len = String.length f in
+ String.sub f (len - 3) 3 = ".gz"
+ with Invalid_argument _ -> false
+
+let _ =
+ let xml_source =
+ if is_gzip Sys.argv.(1) then
+ `Gzip_file Sys.argv.(1)
+ else
+ `File Sys.argv.(1)
+ in
+ parse xml_parser xml_source
+
+(* Parsing test:
+ * - Pure expat version (without XmlPushParser mediation).
+ * Originally written only to test if XmlPushParser mediation caused overhead.
+ * That was not the case. *)
+
+(*let _ =*)
+(* let ic = open_in Sys.argv.(1) in*)
+(* let expat_parser = Expat.parser_create ~encoding:None in*)
+(* Expat.set_start_element_handler expat_parser*)
+(* (fun tag attrs ->*)
+(* let length = List.length attrs in*)
+(* print (sprintf "opening %s [%d attribute%s]"*)
+(* tag length (if length = 1 then "" else "s")));*)
+(* Expat.set_end_element_handler expat_parser*)
+(* (fun tag -> print ("closing " ^ tag));*)
+(* Expat.set_character_data_handler expat_parser*)
+(* (fun data -> print "character data ...");*)
+(* try*)
+(* while true do*)
+(* Expat.parse expat_parser (input_line ic ^ "\n")*)
+(* done*)
+(* with End_of_file -> Expat.final expat_parser*)
+
--- /dev/null
+(* Copyright (C) 2004-2005, HELM Team.
+ *
+ * This file is part of HELM, an Hypertextual, Electronic
+ * Library of Mathematics, developed at the Computer Science
+ * Department, University of Bologna, Italy.
+ *
+ * HELM is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * HELM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HELM; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ * For details, see the HELM World-Wide-Web page,
+ * http://helm.cs.unibo.it/
+ *)
+
+let gzip_bufsize = 10240
+
+type callbacks = {
+ start_element: (string -> (string * string) list -> unit) option;
+ end_element: (string -> unit) option;
+ character_data: (string -> unit) option;
+ processing_instruction: (string -> string -> unit) option;
+ comment: (string -> unit) option;
+}
+
+let default_callbacks = {
+ start_element = None;
+ end_element = None;
+ character_data = None;
+ processing_instruction = None;
+ comment = None;
+}
+
+type xml_source =
+ [ `Channel of in_channel
+ | `File of string
+ | `Gzip_channel of Gzip.in_channel
+ | `Gzip_file of string
+ | `String of string
+ ]
+
+type position = int * int
+
+type xml_parser = Expat.expat_parser
+
+let create_parser callbacks =
+ let expat_parser = Expat.parser_create ~encoding:None in
+ (match callbacks.start_element with
+ | Some f -> Expat.set_start_element_handler expat_parser f
+ | _ -> ());
+ (match callbacks.end_element with
+ | Some f -> Expat.set_end_element_handler expat_parser f
+ | _ -> ());
+ (match callbacks.character_data with
+ | Some f -> Expat.set_character_data_handler expat_parser f
+ | _ -> ());
+ (match callbacks.processing_instruction with
+ | Some f -> Expat.set_processing_instruction_handler expat_parser f
+ | _ -> ());
+ (match callbacks.comment with
+ | Some f -> Expat.set_comment_handler expat_parser f
+ | _ -> ());
+ expat_parser
+
+let final = Expat.final
+
+let get_position expat_parser =
+ (Expat.get_current_line_number expat_parser,
+ Expat.get_current_column_number expat_parser)
+
+let parse expat_parser =
+ let parse_fun = Expat.parse expat_parser in
+ let rec aux = function
+ | `Channel ic ->
+ (try
+ while true do parse_fun (input_line ic ^ "\n") done
+ with End_of_file -> final expat_parser)
+ | `File fname ->
+ let ic = open_in fname in
+ aux (`Channel ic);
+ close_in ic
+ | `Gzip_channel ic ->
+ let buf = String.create gzip_bufsize in
+ (try
+ while true do
+ let bytes = Gzip.input ic buf 0 gzip_bufsize in
+ if bytes = 0 then raise End_of_file;
+ parse_fun (String.sub buf 0 bytes)
+ done
+ with End_of_file -> final expat_parser)
+ | `Gzip_file fname ->
+ let ic = Gzip.open_in fname in
+ aux (`Gzip_channel ic);
+ Gzip.close_in ic
+ | `String s -> parse_fun s
+ in
+ aux
+
--- /dev/null
+(* Copyright (C) 2004-2005, HELM Team.
+ *
+ * This file is part of HELM, an Hypertextual, Electronic
+ * Library of Mathematics, developed at the Computer Science
+ * Department, University of Bologna, Italy.
+ *
+ * HELM is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * HELM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HELM; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ * For details, see the HELM World-Wide-Web page,
+ * http://helm.cs.unibo.it/
+ *)
+
+(** {2 XLM push parser generic interface}
+ * Do not depend on CIC *)
+
+ (** callbacks needed to instantiate a parser *)
+type callbacks = {
+ start_element:
+ (string -> (string * string) list -> unit) option; (* tag, attr list *)
+ end_element: (string -> unit) option; (* tag *)
+ character_data: (string -> unit) option; (* data *)
+ processing_instruction:
+ (string -> string -> unit) option; (* target, value *)
+ comment: (string -> unit) option; (* value *)
+}
+
+ (** do nothing callbacks (all set to None) *)
+val default_callbacks: callbacks
+
+ (** source from which parse an XML file *)
+type xml_source =
+ [ `Channel of in_channel
+ | `File of string
+ | `Gzip_channel of Gzip.in_channel
+ | `Gzip_file of string
+ | `String of string
+ ]
+
+ (** source position in a XML source.
+ * A position is a pair <line, column> *)
+type position = int * int
+
+type xml_parser
+
+ (** Create a push parser which invokes the given callbacks *)
+val create_parser: callbacks -> xml_parser
+
+ (** Parse XML data from a given source with a given parser *)
+val parse: xml_parser -> xml_source -> unit
+
+ (** Inform the farser that parsing is completed, needed only when source is
+ * `String, for other sources it is automatically invoked when the end of file
+ * is reached *)
+val final: xml_parser -> unit
+
+ (** @return current <line, column> pair *)
+val get_position: xml_parser -> position
+