From 58bc0455c51116f049a7135dfed6e235c271f0d2 Mon Sep 17 00:00:00 2001 From: Stefano Zacchiroli Date: Thu, 5 May 2005 15:54:14 +0000 Subject: [PATCH] - implemented CicPushParser which parser CIC objects using Expat (currently, old PXP parser have been renamed to CicPxpParser, new one has been named CicPushParser and CicParser can includes one of the two. It includes CicPushParser per default) - added mandatory uri parameter to main parsing functions, it is needed for creation of universes when Type sorts are encountered --- helm/ocaml/METAS/meta.helm-cic.src | 2 +- helm/ocaml/cic/.depend | 12 + helm/ocaml/cic/Makefile | 17 +- helm/ocaml/cic/cicParser.ml | 74 +--- helm/ocaml/cic/cicParser.mli | 39 +- helm/ocaml/cic/cicPushParser.ml | 677 +++++++++++++++++++++++++++++ helm/ocaml/cic/cicPushParser.mli | 48 ++ helm/ocaml/cic/cicPxpParser.ml | 103 +++++ helm/ocaml/cic/cicPxpParser.mli | 61 +++ helm/ocaml/cic/test.ml | 126 ++++++ helm/ocaml/cic/xmlPushParser.ml | 91 ++++ helm/ocaml/cic/xmlPushParser.mli | 69 +++ 12 files changed, 1213 insertions(+), 106 deletions(-) create mode 100644 helm/ocaml/cic/cicPushParser.ml create mode 100644 helm/ocaml/cic/cicPushParser.mli create mode 100644 helm/ocaml/cic/cicPxpParser.ml create mode 100644 helm/ocaml/cic/cicPxpParser.mli create mode 100644 helm/ocaml/cic/test.ml create mode 100644 helm/ocaml/cic/xmlPushParser.ml create mode 100644 helm/ocaml/cic/xmlPushParser.mli diff --git a/helm/ocaml/METAS/meta.helm-cic.src b/helm/ocaml/METAS/meta.helm-cic.src index d096cab7f..946dfeae2 100644 --- a/helm/ocaml/METAS/meta.helm-cic.src +++ b/helm/ocaml/METAS/meta.helm-cic.src @@ -1,4 +1,4 @@ -requires="helm-urimanager helm-pxp helm-xml" +requires="helm-urimanager helm-pxp helm-xml expat" version="0.0.1" archive(byte)="cic.cma" archive(native)="cic.cmxa" diff --git a/helm/ocaml/cic/.depend b/helm/ocaml/cic/.depend index 8d735abd3..78af7a0f1 100644 --- a/helm/ocaml/cic/.depend +++ b/helm/ocaml/cic/.depend @@ -1,11 +1,15 @@ deannotate.cmi: cic.cmo cicParser3.cmi: cic.cmo cicParser2.cmi: cicParser3.cmi cic.cmo +cicPxpParser.cmi: cic.cmo +cicPushParser.cmi: cicParser.cmi cicParser.cmi: cic.cmo cicUtil.cmi: cic.cmo helmLibraryObjects.cmi: cic.cmo cic.cmo: cicUniv.cmi cic.cmx: cicUniv.cmx +xmlPushParser.cmo: xmlPushParser.cmi +xmlPushParser.cmx: xmlPushParser.cmi cicUniv.cmo: cicUniv.cmi cicUniv.cmx: cicUniv.cmi deannotate.cmo: cic.cmo deannotate.cmi @@ -14,6 +18,14 @@ cicParser3.cmo: cicUniv.cmi cic.cmo cicParser3.cmi cicParser3.cmx: cicUniv.cmx cic.cmx cicParser3.cmi cicParser2.cmo: cicParser3.cmi cic.cmo cicParser2.cmi cicParser2.cmx: cicParser3.cmx cic.cmx cicParser2.cmi +cicPxpParser.cmo: deannotate.cmi cicParser3.cmi cicParser2.cmi \ + cicPxpParser.cmi +cicPxpParser.cmx: deannotate.cmx cicParser3.cmx cicParser2.cmx \ + cicPxpParser.cmi +cicPushParser.cmo: xmlPushParser.cmi deannotate.cmi cicUniv.cmi cic.cmo \ + cicPushParser.cmi +cicPushParser.cmx: xmlPushParser.cmx deannotate.cmx cicUniv.cmx cic.cmx \ + cicPushParser.cmi cicParser.cmo: deannotate.cmi cicParser3.cmi cicParser2.cmi cicParser.cmi cicParser.cmx: deannotate.cmx cicParser3.cmx cicParser2.cmx cicParser.cmi cicUtil.cmo: cicUniv.cmi cic.cmo cicUtil.cmi diff --git a/helm/ocaml/cic/Makefile b/helm/ocaml/cic/Makefile index 5d3c19c3c..a7f4b568b 100644 --- a/helm/ocaml/cic/Makefile +++ b/helm/ocaml/cic/Makefile @@ -1,14 +1,17 @@ PACKAGE = cic -REQUIRES = helm-urimanager helm-pxp helm-xml +REQUIRES = helm-urimanager helm-pxp helm-xml expat PREDICATES = INTERFACE_FILES = \ - cicUniv.mli \ - deannotate.mli \ - cicParser3.mli \ - cicParser2.mli \ - cicParser.mli \ - cicUtil.mli \ + xmlPushParser.mli \ + cicUniv.mli \ + deannotate.mli \ + cicParser3.mli \ + cicParser2.mli \ + cicPxpParser.mli \ + cicPushParser.mli \ + cicParser.mli \ + cicUtil.mli \ helmLibraryObjects.mli IMPLEMENTATION_FILES = \ cic.ml $(INTERFACE_FILES:%.mli=%.ml) diff --git a/helm/ocaml/cic/cicParser.ml b/helm/ocaml/cic/cicParser.ml index 64ee58427..c46a20a98 100644 --- a/helm/ocaml/cic/cicParser.ml +++ b/helm/ocaml/cic/cicParser.ml @@ -1,4 +1,4 @@ -(* Copyright (C) 2000, HELM Team. +(* Copyright (C) 2000-2005, HELM Team. * * This file is part of HELM, an Hypertextual, Electronic * Library of Mathematics, developed at the Computer Science @@ -23,75 +23,5 @@ * http://cs.unibo.it/helm/. *) -(******************************************************************************) -(* *) -(* PROJECT HELM *) -(* *) -(* Claudio Sacerdoti Coen *) -(* 24/01/2000 *) -(* *) -(* This is the main (top level) module of a parser for cic objects from xml *) -(* files to the internal representation. It uses the modules cicParser2 *) -(* (objects level) and cicParser3 (terms level) *) -(* *) -(******************************************************************************) - -exception Getter_failure of string * string -exception Parser_failure of string - - (** tries to recover from a parse error caused by the parsing of a getter - * error message (e.g. Key_not_found exception). Unfortunately we have to - * re-parse xml document to extract exception data *) -let try_recover exn filename = - let rc = ref None in - (try - let entity_manager = - Pxp_ev_parser.create_entity_manager ~is_document:true - PxpHelmConf.pxp_config (Pxp_types.from_file filename) - in - let pull_parser = - Pxp_ev_parser.create_pull_parser PxpHelmConf.pxp_config - (`Entry_document []) entity_manager - in - let rec find_exn p = - match p () with - | None -> () - | Some (Pxp_types.E_start_tag ("html", attrs, _, _)) -> - let exn = List.assoc "helm:exception" attrs in - let arg = - try List.assoc "helm:exception_arg" attrs with Not_found -> "" - in - rc := Some (Getter_failure (exn, arg)) - | _ -> find_exn p - in - find_exn pull_parser - with _ -> raise (Parser_failure (Printexc.to_string exn))); - match !rc with - | None -> raise (Parser_failure (Printexc.to_string exn)) - | Some exn -> raise exn - -let parse_document filename = - try - Pxp_tree_parser.parse_document_entity PxpHelmConf.pxp_config - (Pxp_types.from_file ~alt:[PxpUrlResolver.url_resolver] filename) - CicParser3.domspec - with exn -> - raise (try_recover exn filename) - -(* given the filename of an xml file of a cic object it returns its internal *) -(* representation. *) -let annobj_of_xml filename filenamebody = - let root, rootbody = - let doc = parse_document filename in - let docroot = doc#root in - match filenamebody with - None -> docroot,None - | Some filename -> - let docbody = parse_document filename in - docroot,Some docbody#root - in - CicParser2.get_term root rootbody - -let obj_of_xml filename filenamebody = - Deannotate.deannotate_obj (annobj_of_xml filename filenamebody) +include CicPushParser.CicParser diff --git a/helm/ocaml/cic/cicParser.mli b/helm/ocaml/cic/cicParser.mli index 048df36a3..33c5b4b19 100644 --- a/helm/ocaml/cic/cicParser.mli +++ b/helm/ocaml/cic/cicParser.mli @@ -1,4 +1,4 @@ -(* Copyright (C) 2000, HELM Team. +(* Copyright (C) 2000-2005, HELM Team. * * This file is part of HELM, an Hypertextual, Electronic * Library of Mathematics, developed at the Computer Science @@ -23,35 +23,22 @@ * http://cs.unibo.it/helm/. *) -(******************************************************************************) -(* *) -(* PROJECT HELM *) -(* *) -(* Claudio Sacerdoti Coen *) -(* 22/03/2000 *) -(* *) -(* This is the main (top level) module of a parser for cic objects from xml *) -(* files to the internal representation. It uses the modules cicParser2 *) -(* (objects level) and cicParser3 (terms level) *) -(* *) -(******************************************************************************) - (** raised for exception received by the getter (i.e. embedded in the source - * XML document). Arguments are values of "helm:exception" and - * "helm:exception_arg" attributes *) + * XML document). Arguments are values of "helm:exception" and + * "helm:exception_arg" attributes *) exception Getter_failure of string * string (** generic parser failure *) exception Parser_failure of string -(* given the filename of an xml file of a cic object, it returns *) -(* its internal annotated representation. In the case of constants (whose *) -(* type is splitted from the body), a second xml file (for the body) must be *) -(* provided. *) -val annobj_of_xml : string -> string option -> Cic.annobj + (* given the filename of an xml file of a cic object, it returns + * its internal annotated representation. In the case of constants (whose + * type is splitted from the body), a second xml file (for the body) must be + * provided. *) +val annobj_of_xml: UriManager.uri -> string -> string option -> Cic.annobj + + (* given the filename of an xml file of a cic object, it returns its internal + * logical representation. In the case of constants (whose type is splitted + * from the body), a second xml file (for the body) must be provided. *) +val obj_of_xml : UriManager.uri -> string -> string option -> Cic.obj -(* given the filename of an xml file of a cic object, it returns *) -(* its internal logical representation. In the case of constants (whose *) -(* type is splitted from the body), a second xml file (for the body) must be *) -(* provided. *) -val obj_of_xml : string -> string option -> Cic.obj diff --git a/helm/ocaml/cic/cicPushParser.ml b/helm/ocaml/cic/cicPushParser.ml new file mode 100644 index 000000000..31ded7ef2 --- /dev/null +++ b/helm/ocaml/cic/cicPushParser.ml @@ -0,0 +1,677 @@ +(* Copyright (C) 2004-2005, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +let debug_print = prerr_endline + +open Printf + +(* ZACK TODO element from the DTD still to be handled: + + + + + + + + +*) + +module CicParser = +struct + +exception Getter_failure of string * string +exception Parser_failure of string + +type stack_entry = + | Arg of string * Cic.annterm (* relative uri, term *) + (* constants' body and types resides in differne files, thus we can't simple + * keep constants in Cic_obj stack entries *) + | Cic_attributes of Cic.attribute list + | Cic_constant_body of string * string * UriManager.uri list * Cic.annterm + * Cic.attribute list + (* id, for, params, body, object attributes *) + | Cic_constant_type of string * string * UriManager.uri list * Cic.annterm + * Cic.attribute list + (* id, name, params, type, object attributes *) + | Cic_term of Cic.annterm (* term *) + | Cic_obj of Cic.annobj (* object *) + | Cofix_fun of Cic.id * string * Cic.annterm * Cic.annterm + (* id, name, type, body *) + | Constructor of string * Cic.annterm (* name, type *) + | Decl of Cic.id * Cic.name * Cic.annterm (* id, binder, source *) + | Def of Cic.id * Cic.name * Cic.annterm (* id, binder, source *) + | Fix_fun of Cic.id * string * int * Cic.annterm * Cic.annterm + (* id, name, ind. index, type, body *) + | Inductive_type of string * string * bool * Cic.annterm * + (string * Cic.annterm) list (* id, name, inductive, arity, constructors *) + | Meta_subst of Cic.annterm option + | Tag of string * (string * string) list (* tag name, attributes *) + (* ZACK TODO add file position to tag stack entry so that when attribute + * errors occur, the position of their _start_tag_ could be printed + * instead of the current position (usually the end tag) *) + +type ctxt = { + mutable stack: stack_entry list; + mutable xml_parser: XmlPushParser.xml_parser option; + mutable filename: string; + uri: UriManager.uri; +} + +let string_of_stack ctxt = + "[" ^ (String.concat "; " + (List.map + (function + | Arg (reluri, _) -> sprintf "Arg %s" reluri + | Cic_attributes _ -> "Cic_attributes" + | Cic_constant_body (id, name, _, _, _) -> + sprintf "Cic_constant_body %s (id=%s)" name id + | Cic_constant_type (id, name, _, _, _) -> + sprintf "Cic_constant_type %s (id=%s)" name id + | Cic_term _ -> "Cic_term" + | Cic_obj _ -> "Cic_obj" + | Constructor (name, _) -> "Constructor " ^ name + | Cofix_fun (id, _, _, _) -> sprintf "Cofix_fun (id=%s)" id + | Decl (id, _, _) -> sprintf "Decl (id=%s)" id + | Def (id, _, _) -> sprintf "Def (id=%s)" id + | Fix_fun (id, _, _, _, _) -> sprintf "Fix_fun (id=%s)" id + | Inductive_type (id, name, _, _, _) -> + sprintf "Inductive_type %s (id=%s)" name id + | Meta_subst _ -> "Meta_subst" + | Tag (tag, _) -> "Tag " ^ tag) + ctxt.stack)) ^ "]" + +let compare_attrs (a1, v1) (a2, v2) = Pervasives.compare a1 a2 +let sort_attrs = List.sort compare_attrs + +let new_parser_context uri = { + stack = []; + xml_parser = None; + filename = "-"; + uri = uri; +} + +let get_parser ctxt = + match ctxt.xml_parser with + | Some p -> p + | None -> assert false + +(** {2 Error handling} *) + +let parse_error ctxt msg = + let (line, col) = XmlPushParser.get_position (get_parser ctxt) in + raise (Parser_failure (sprintf "[%s: line %d, column %d] %s" + ctxt.filename line col msg)) + +let attribute_error ctxt tag = + parse_error ctxt ("wrong attribute set for " ^ tag) + +(** {2 Parsing context management} *) + +let pop ctxt = +(* debug_print "pop";*) + match ctxt.stack with + | hd :: tl -> (ctxt.stack <- tl) + | _ -> assert false + +let push ctxt v = +(* debug_print "push";*) + ctxt.stack <- v :: ctxt.stack + +let set_top ctxt v = +(* debug_print "set_top";*) + match ctxt.stack with + | _ :: tl -> (ctxt.stack <- v :: tl) + | _ -> assert false + + (** pop the last tag from the open tags stack returning a pair *) +let pop_tag ctxt = + match ctxt.stack with + | Tag (tag, attrs) :: tl -> + ctxt.stack <- tl; + (tag, attrs) + | _ -> parse_error ctxt "unexpected extra content" + + (** pop the last tag from the open tags stack returning its attributes. + * Attributes are returned as a list of pair _sorted_ by + * attribute name *) +let pop_tag_attrs ctxt = sort_attrs (snd (pop_tag ctxt)) + +let pop_cics ctxt = + let rec aux acc stack = + match stack with + | Cic_term t :: tl -> aux (t :: acc) tl + | tl -> acc, tl + in + let values, new_stack = aux [] ctxt.stack in + ctxt.stack <- new_stack; + values + +let pop_meta_substs ctxt = + let rec aux acc stack = + match stack with + | Meta_subst t :: tl -> aux (t :: acc) tl + | tl -> acc, tl + in + let values, new_stack = aux [] ctxt.stack in + ctxt.stack <- new_stack; + values + +let pop_fix_funs ctxt = + let rec aux acc stack = + match stack with + | Fix_fun (id, name, index, typ, body) :: tl -> + aux ((id, name, index, typ, body) :: acc) tl + | tl -> acc, tl + in + let values, new_stack = aux [] ctxt.stack in + ctxt.stack <- new_stack; + values + +let pop_cofix_funs ctxt = + let rec aux acc stack = + match stack with + | Cofix_fun (id, name, typ, body) :: tl -> + aux ((id, name, typ, body) :: acc) tl + | tl -> acc, tl + in + let values, new_stack = aux [] ctxt.stack in + ctxt.stack <- new_stack; + values + +let pop_constructors ctxt = + let rec aux acc stack = + match stack with + | Constructor (name, t) :: tl -> aux ((name, t) :: acc) tl + | tl -> acc, tl + in + let values, new_stack = aux [] ctxt.stack in + ctxt.stack <- new_stack; + values + +let pop_inductive_types ctxt = + let rec aux acc stack = + match stack with + | Inductive_type (id, name, ind, arity, ctors) :: tl -> + aux ((id, name, ind, arity, ctors) :: acc) tl + | tl -> acc, tl + in + let values, new_stack = aux [] ctxt.stack in + if values = [] then + parse_error ctxt "no \"InductiveType\" element found"; + ctxt.stack <- new_stack; + values + + (** travels the stack (without popping) for the first term subject of explicit + * named substitution and return its URI *) +let find_base_uri ctxt = + let rec aux = function + | Cic_term (Cic.AConst (_, uri, _)) :: _ + | Cic_term (Cic.AMutInd (_, uri, _, _)) :: _ + | Cic_term (Cic.AMutConstruct (_, uri, _, _, _)) :: _ + | Cic_term (Cic.AVar (_, uri, _)) :: _ -> + uri + | Arg _ :: tl -> aux tl + | _ -> parse_error ctxt "no \"arg\" element found" + in + UriManager.buri_of_uri (aux ctxt.stack) + + (** backwardly eats the stack building an explicit named substitution from Arg + * stack entries *) +let pop_subst ctxt base_uri = + let rec aux acc stack = + match stack with + | Arg (rel_uri, term) :: tl -> + let uri = UriManager.uri_of_string (base_uri ^ "/" ^ rel_uri) in + aux ((uri, term) :: acc) tl + | tl -> acc, tl + in + let subst, new_stack = aux [] ctxt.stack in + if subst = [] then + parse_error ctxt "no \"arg\" element found"; + ctxt.stack <- new_stack; + subst + +let pop_cic ctxt = + match ctxt.stack with + | Cic_term t :: tl -> + ctxt.stack <- tl; + t + | _ -> parse_error ctxt "no cic term found" + +let pop_obj_attributes ctxt = + match ctxt.stack with + | Cic_attributes attributes :: tl -> + ctxt.stack <- tl; + attributes + | _ -> [] + +(** {2 Auxiliary functions} *) + +let uri_of_string = UriManager.uri_of_string + +let uri_list_of_string = + let space_RE = Str.regexp " " in + fun s -> + List.map uri_of_string (Str.split space_RE s) + +let sort_of_string ctxt = function + | "Prop" -> Cic.Prop + | "Set" -> Cic.Set + | "Type" -> +(* CicUniv.restart_numbering (); |+ useful only to test parser +| *) + Cic.Type (CicUniv.fresh ~uri:ctxt.uri ()) + | _ -> parse_error ctxt "sort expected" + +let patch_subst ctxt subst = function + | Cic.AConst (id, uri, _) -> Cic.AConst (id, uri, subst) + | Cic.AMutInd (id, uri, typeno, _) -> + Cic.AMutInd (id, uri, typeno, subst) + | Cic.AMutConstruct (id, uri, typeno, consno, _) -> + Cic.AMutConstruct (id, uri, typeno, consno, subst) + | Cic.AVar (id, uri, _) -> Cic.AVar (id, uri, subst) + | _ -> + parse_error ctxt + ("only \"CONST\", \"VAR\", \"MUTIND\", and \"MUTCONSTRUCT\" can be" ^ + " instantiated") + + (** backwardly eats the stack seeking for the first open tag carrying + * "helm:exception" attributes. If found return Some of a pair containing + * exception name and argument. Return None otherwise *) +let find_helm_exception ctxt = + let rec aux = function + | [] -> None + | Tag (_, attrs) :: tl -> + (try + let exn = List.assoc "helm:exception" attrs in + let arg = + try List.assoc "helm:exception_arg" attrs with Not_found -> "" + in + Some (exn, arg) + with Not_found -> aux tl) + | _ :: tl -> aux tl + in + aux ctxt.stack + +(** {2 Push parser callbacks} + * each callback needs to be instantiated to a parsing context *) + +let start_element ctxt tag attrs = +(* debug_print (sprintf "<%s%s>" tag (match attrs with | [] -> "" | _ -> " " ^ String.concat " " (List.map (fun (a,v) -> sprintf "%s=\"%s\"" a v) attrs)));*) + push ctxt (Tag (tag, attrs)) + +let end_element ctxt tag = +(* debug_print (sprintf "" tag);*) +(* debug_print (string_of_stack ctxt);*) + let attribute_error () = attribute_error ctxt tag in + let parse_error = parse_error ctxt in + let sort_of_string = sort_of_string ctxt in + match tag with + | "REL" -> + push ctxt (Cic_term + (match pop_tag_attrs ctxt with + | ["binder", binder; "id", id; "idref", idref; "sort", _; + "value", value] -> + Cic.ARel (id, idref, int_of_string value, binder) + | _ -> attribute_error ())) + | "VAR" -> + push ctxt (Cic_term + (match pop_tag_attrs ctxt with + | ["id", id; "sort", _; "uri", uri] -> + Cic.AVar (id, uri_of_string uri, []) + | _ -> attribute_error ())) + | "CONST" -> + push ctxt (Cic_term + (match pop_tag_attrs ctxt with + | ["id", id; "sort", _; "uri", uri] -> + Cic.AConst (id, uri_of_string uri, []) + | _ -> attribute_error ())) + | "SORT" -> + push ctxt (Cic_term + (match pop_tag_attrs ctxt with + | ["id", id; "value", sort] -> Cic.ASort (id, sort_of_string sort) + | _ -> attribute_error ())) + | "APPLY" -> + let args = pop_cics ctxt in + push ctxt (Cic_term + (match pop_tag_attrs ctxt with + | ["id", id; "sort", _] -> Cic.AAppl (id, args) + | _ -> attribute_error ())) + | "decl" -> + let source = pop_cic ctxt in + push ctxt + (match pop_tag_attrs ctxt with + | ["binder", binder; "id", id; "type", _] -> + Decl (id, Cic.Name binder, source) + | ["id", id; "type", _] -> Decl (id, Cic.Anonymous, source) + | _ -> attribute_error ()) + | "def" -> (* same as "decl" above *) + let source = pop_cic ctxt in + push ctxt + (match pop_tag_attrs ctxt with + | ["binder", binder; "id", id; "sort", _] -> + Def (id, Cic.Name binder, source) + | ["id", id; "sort", _] -> Def (id, Cic.Anonymous, source) + | _ -> attribute_error ()) + | "arity" (* transparent elements (i.e. which contain a CIC) *) + | "body" + | "inductiveTerm" + | "pattern" + | "patternsType" + | "target" + | "term" + | "type" -> + let term = pop_cic ctxt in + pop ctxt; (* pops start tag matching current end tag (e.g. ) *) + push ctxt (Cic_term term) + | "substitution" -> (* optional transparent elements (i.e. which _may_ + * contain a CIC) *) + set_top ctxt (* replace *) + (match ctxt.stack with + | Cic_term term :: tl -> + ctxt.stack <- tl; + (Meta_subst (Some term)) + | _ -> Meta_subst None) + | "PROD" -> + let target = pop_cic ctxt in + let rec add_decl target = function + | Decl (id, binder, source) :: tl -> + add_decl (Cic.AProd (id, binder, source, target)) tl + | tl -> + ctxt.stack <- tl; + target + in + let term = add_decl target ctxt.stack in + (match pop_tag_attrs ctxt with + | ["type", _] -> () + | _ -> attribute_error ()); + push ctxt (Cic_term term) + | "LAMBDA" -> + let target = pop_cic ctxt in + let rec add_decl target = function + | Decl (id, binder, source) :: tl -> + add_decl (Cic.ALambda (id, binder, source, target)) tl + | tl -> + ctxt.stack <- tl; + target + in + let term = add_decl target ctxt.stack in + (match pop_tag_attrs ctxt with + | ["sort", _] -> () + | _ -> attribute_error ()); + push ctxt (Cic_term term) + | "LETIN" -> + let target = pop_cic ctxt in + let rec add_def target = function + | Def (id, binder, source) :: tl -> + add_def (Cic.ALetIn (id, binder, source, target)) tl + | tl -> + ctxt.stack <- tl; + target + in + let term = add_def target ctxt.stack in + (match pop_tag_attrs ctxt with + | ["sort", _] -> () + | _ -> attribute_error ()); + push ctxt (Cic_term term) + | "CAST" -> + let typ = pop_cic ctxt in + let term = pop_cic ctxt in + push ctxt (Cic_term + (match pop_tag_attrs ctxt with + | ["id", id; "sort", _] -> Cic.ACast (id, term, typ) + | _ -> attribute_error ())); + | "IMPLICIT" -> + push ctxt (Cic_term + (match pop_tag_attrs ctxt with + | ["id", id] -> Cic.AImplicit (id, None) + | ["annotation", annotation; "id", id] -> + let implicit_annotation = + match annotation with + | "closed" -> `Closed + | "hole" -> `Hole + | "type" -> `Type + | _ -> parse_error "invalid value for \"annotation\" attribute" + in + Cic.AImplicit (id, Some implicit_annotation) + | _ -> attribute_error ())) + | "META" -> + let meta_substs = pop_meta_substs ctxt in + push ctxt (Cic_term + (match pop_tag_attrs ctxt with + | ["id", id; "no", no; "sort", _] -> + Cic.AMeta (id, int_of_string no, meta_substs) + | _ -> attribute_error ())); + | "MUTIND" -> + push ctxt (Cic_term + (match pop_tag_attrs ctxt with + | ["id", id; "noType", noType; "uri", uri] -> + Cic.AMutInd (id, uri_of_string uri, int_of_string noType, []) + | _ -> attribute_error ())); + | "MUTCONSTRUCT" -> + push ctxt (Cic_term + (match pop_tag_attrs ctxt with + | ["id", id; "noConstr", noConstr; "noType", noType; "sort", _; + "uri", uri] -> + Cic.AMutConstruct (id, uri_of_string uri, int_of_string noType, + int_of_string noConstr, []) + | _ -> attribute_error ())); + | "FixFunction" -> + let body = pop_cic ctxt in + let typ = pop_cic ctxt in + push ctxt + (match pop_tag_attrs ctxt with + | ["id", id; "name", name; "recIndex", recIndex] -> + Fix_fun (id, name, int_of_string recIndex, typ, body) + | _ -> attribute_error ()) + | "CofixFunction" -> + let body = pop_cic ctxt in + let typ = pop_cic ctxt in + push ctxt + (match pop_tag_attrs ctxt with + | ["id", id; "name", name] -> + Cofix_fun (id, name, typ, body) + | _ -> attribute_error ()) + | "FIX" -> + let fix_funs = pop_fix_funs ctxt in + push ctxt (Cic_term + (match pop_tag_attrs ctxt with + | ["id", id; "noFun", noFun; "sort", _] -> + Cic.AFix (id, int_of_string noFun, fix_funs) + | _ -> attribute_error ())) + | "COFIX" -> + let cofix_funs = pop_cofix_funs ctxt in + push ctxt (Cic_term + (match pop_tag_attrs ctxt with + | ["id", id; "noFun", noFun; "sort", _] -> + Cic.ACoFix (id, int_of_string noFun, cofix_funs) + | _ -> attribute_error ())) + | "MUTCASE" -> + (match pop_cics ctxt with + | patternsType :: inductiveTerm :: patterns -> + push ctxt (Cic_term + (match pop_tag_attrs ctxt with + | ["id", id; "noType", noType; "sort", _; "uriType", uriType] -> + Cic.AMutCase (id, uri_of_string uriType, int_of_string noType, + patternsType, inductiveTerm, patterns) + | _ -> attribute_error ())) + | _ -> parse_error "invalid \"MUTCASE\" content") + | "Constructor" -> + let typ = pop_cic ctxt in + push ctxt + (match pop_tag_attrs ctxt with + | ["name", name] -> Constructor (name, typ) + | _ -> attribute_error ()) + | "InductiveType" -> + let constructors = pop_constructors ctxt in + let arity = pop_cic ctxt in + push ctxt + (match pop_tag_attrs ctxt with + | ["id", id; "inductive", inductive; "name", name] -> + Inductive_type (id, name, bool_of_string inductive, arity, + constructors) + | _ -> attribute_error ()) + | "InductiveDefinition" -> + let inductive_types = pop_inductive_types ctxt in + let obj_attributes = pop_obj_attributes ctxt in + push ctxt (Cic_obj + (match pop_tag_attrs ctxt with + | ["id", id; "noParams", noParams; "params", params] -> + Cic.AInductiveDefinition (id, inductive_types, + uri_list_of_string params, int_of_string noParams, obj_attributes) + | _ -> attribute_error ())) + | "ConstantType" -> + let typ = pop_cic ctxt in + let obj_attributes = pop_obj_attributes ctxt in + push ctxt + (match pop_tag_attrs ctxt with + | ["id", id; "name", name; "params", params] -> + Cic_constant_type (id, name, uri_list_of_string params, typ, + obj_attributes) + | _ -> attribute_error ()) + | "ConstantBody" -> + let body = pop_cic ctxt in + let obj_attributes = pop_obj_attributes ctxt in + push ctxt + (match pop_tag_attrs ctxt with + | ["for", for_; "id", id; "params", params] -> + Cic_constant_body (id, for_, uri_list_of_string params, body, + obj_attributes) + | _ -> attribute_error ()) + | "Variable" -> + let typ = pop_cic ctxt in + let body = + match pop_cics ctxt with + | [] -> None + | [t] -> Some t + | _ -> parse_error "wrong content for \"Variable\"" + in + let obj_attributes = pop_obj_attributes ctxt in + push ctxt (Cic_obj + (match pop_tag_attrs ctxt with + | ["id", id; "name", name; "params", params] -> + Cic.AVariable (id, name, body, typ, uri_list_of_string params, + obj_attributes) + | _ -> attribute_error ())) + | "arg" -> + let term = pop_cic ctxt in + push ctxt + (match pop_tag_attrs ctxt with + | ["relUri", relUri] -> Arg (relUri, term) + | _ -> attribute_error ()) + | "instantiate" -> + (* explicit named substitution handling: when the end tag of an element + * subject of exlicit named subst (MUTIND, MUTCONSTRUCT, CONST, VAR) it + * is stored on the stack with no substitutions (i.e. []). When the end + * tag of an "instantiate" element is found we patch the term currently + * on the stack with the substitution built from "instantiate" children + *) + (* XXX inefficiency here: first travels the elements in order to + * find the baseUri, then in order to build the explicit named subst *) + let base_uri = find_base_uri ctxt in + let subst = pop_subst ctxt base_uri in + let term = pop_cic ctxt in + (* comment from CicParser3.ml: + * CSC: the "id" optional attribute should be parsed and reflected in + * Cic.annterm and id = string_of_xml_attr (n#attribute "id") *) + (* replace *) + set_top ctxt (Cic_term (patch_subst ctxt subst term)) + | "attributes" -> + let (_, attrs) = pop_tag ctxt in + let rec mk_obj_attributes acc = function + | [] -> acc + | ("generated", "true") :: tl -> + mk_obj_attributes (`Generated :: acc) tl + | ("class", "coercion") :: tl -> + mk_obj_attributes (`Class `Coercion :: acc) tl + | ("class", "record") :: tl -> + mk_obj_attributes (`Class `Record :: acc) tl + | ("class", "projection") :: tl -> + mk_obj_attributes (`Class `Projection :: acc) tl + | ("class", "elimProp") :: tl -> + mk_obj_attributes (`Class (`Elim Cic.Prop) :: acc) tl + | ("class", "elimSet") :: tl -> + mk_obj_attributes (`Class (`Elim Cic.Set) :: acc) tl + | ("class", "elimType") :: tl -> + let univ = CicUniv.fresh ~uri:ctxt.uri () in + mk_obj_attributes (`Class (`Elim (Cic.Type univ)) :: acc) tl + | _ -> attribute_error () + in + push ctxt (Cic_attributes (mk_obj_attributes [] attrs)) + | tag -> + match find_helm_exception ctxt with + | Some (exn, arg) -> raise (Getter_failure (exn, arg)) + | None -> parse_error (sprintf "unknown element \"%s\"" tag) + +(** {2 Parser internals} *) + +let parse uri filename = + let ctxt = new_parser_context uri in + ctxt.filename <- filename; + let module P = XmlPushParser in + let callbacks = { + P.default_callbacks with + P.start_element = Some (start_element ctxt); + P.end_element = Some (end_element ctxt); + } in + let xml_parser = P.create_parser callbacks in + ctxt.xml_parser <- Some xml_parser; + try + P.parse xml_parser (`File filename); +(* debug_print (string_of_stack stack);*) + List.hd ctxt.stack + with + | Failure "int_of_string" -> parse_error ctxt "integer number expected" + | Invalid_argument "bool_of_string" -> parse_error ctxt "boolean expected" + | Parser_failure _ + | Getter_failure _ as exn -> + raise exn + | exn -> + raise (Parser_failure ("uncaught exception: " ^ Printexc.to_string exn)) + +(** {2 API implementation} *) + +let annobj_of_xml uri filename filenamebody = + match filenamebody with + | None -> + (match parse uri filename with + | Cic_constant_type (id, name, params, typ, obj_attributes) -> + Cic.AConstant (id, None, name, None, typ, params, obj_attributes) + | Cic_obj obj -> obj + | _ -> raise (Parser_failure ("no object found in " ^ filename))) + | Some filenamebody -> + (match parse uri filename, parse uri filenamebody with + | Cic_constant_type (type_id, name, params, typ, obj_attributes), + Cic_constant_body (body_id, _, _, body, _) -> + Cic.AConstant (type_id, Some body_id, name, Some body, typ, params,[]) + | _ -> + raise (Parser_failure (sprintf "no constant found in %s, %s" + filename filenamebody))) + +let obj_of_xml uri filename filenamebody = + Deannotate.deannotate_obj (annobj_of_xml uri filename filenamebody) + +end + diff --git a/helm/ocaml/cic/cicPushParser.mli b/helm/ocaml/cic/cicPushParser.mli new file mode 100644 index 000000000..d08913321 --- /dev/null +++ b/helm/ocaml/cic/cicPushParser.mli @@ -0,0 +1,48 @@ +(* Copyright (C) 2004-2005, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +module CicParser: +sig + (** raised for exception received by the getter (i.e. embedded in the source + * XML document). Arguments are values of "helm:exception" and + * "helm:exception_arg" attributes *) + exception Getter_failure of string * string + + (** generic parser failure *) + exception Parser_failure of string + + (* given the filename of an xml file of a cic object, it returns its + * internal annotated representation. In the case of constants (whose type + * is splitted from the body), a second xml file (for the body) must be + * provided. *) + val annobj_of_xml: UriManager.uri -> string -> string option -> Cic.annobj + + (* given the filename of an xml file of a cic object, it returns its + * internal logical representation. In the case of constants (whose type is + * splitted from the body), a second xml file (for the body) must be + * provided. *) + val obj_of_xml : UriManager.uri -> string -> string option -> Cic.obj +end + diff --git a/helm/ocaml/cic/cicPxpParser.ml b/helm/ocaml/cic/cicPxpParser.ml new file mode 100644 index 000000000..8fdefc250 --- /dev/null +++ b/helm/ocaml/cic/cicPxpParser.ml @@ -0,0 +1,103 @@ +(* Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://cs.unibo.it/helm/. + *) + +(******************************************************************************) +(* *) +(* PROJECT HELM *) +(* *) +(* Claudio Sacerdoti Coen *) +(* 24/01/2000 *) +(* *) +(* This is the main (top level) module of a parser for cic objects from xml *) +(* files to the internal representation. It uses the modules cicParser2 *) +(* (objects level) and cicParser3 (terms level) *) +(* *) +(******************************************************************************) + +module CicParser = +struct + +exception Getter_failure of string * string +exception Parser_failure of string + + (** tries to recover from a parse error caused by the parsing of a getter + * error message (e.g. Key_not_found exception). Unfortunately we have to + * re-parse xml document to extract exception data *) +let try_recover exn filename = + let rc = ref None in + (try + let entity_manager = + Pxp_ev_parser.create_entity_manager ~is_document:true + PxpHelmConf.pxp_config (Pxp_types.from_file filename) + in + let pull_parser = + Pxp_ev_parser.create_pull_parser PxpHelmConf.pxp_config + (`Entry_document []) entity_manager + in + let rec find_exn p = + match p () with + | None -> () + | Some (Pxp_types.E_start_tag ("html", attrs, _, _)) -> + let exn = List.assoc "helm:exception" attrs in + let arg = + try List.assoc "helm:exception_arg" attrs with Not_found -> "" + in + rc := Some (Getter_failure (exn, arg)) + | _ -> find_exn p + in + find_exn pull_parser + with _ -> raise (Parser_failure (Printexc.to_string exn))); + match !rc with + | None -> raise (Parser_failure (Printexc.to_string exn)) + | Some exn -> raise exn + +let parse_document filename = + try + Pxp_tree_parser.parse_document_entity PxpHelmConf.pxp_config + (Pxp_types.from_file ~alt:[PxpUrlResolver.url_resolver] filename) + CicParser3.domspec + with exn -> + raise (try_recover exn filename) + +(* given the filename of an xml file of a cic object it returns its internal *) +(* representation. *) +let annobj_of_xml uri filename filenamebody = + CicParser3.set_uri uri; + let root, rootbody = + let doc = parse_document filename in + let docroot = doc#root in + match filenamebody with + None -> docroot,None + | Some filename -> + let docbody = parse_document filename in + docroot,Some docbody#root + in + CicParser2.get_term root rootbody + +let obj_of_xml uri filename filenamebody = + Deannotate.deannotate_obj (annobj_of_xml uri filename filenamebody) + +end + diff --git a/helm/ocaml/cic/cicPxpParser.mli b/helm/ocaml/cic/cicPxpParser.mli new file mode 100644 index 000000000..a1915ec47 --- /dev/null +++ b/helm/ocaml/cic/cicPxpParser.mli @@ -0,0 +1,61 @@ +(* Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://cs.unibo.it/helm/. + *) + +(******************************************************************************) +(* *) +(* PROJECT HELM *) +(* *) +(* Claudio Sacerdoti Coen *) +(* 24/01/2000 *) +(* *) +(* This is the main (top level) module of a parser for cic objects from xml *) +(* files to the internal representation. It uses the modules cicParser2 *) +(* (objects level) and cicParser3 (terms level) *) +(* *) +(******************************************************************************) + +module CicParser: +sig + (** raised for exception received by the getter (i.e. embedded in the source + * XML document). Arguments are values of "helm:exception" and + * "helm:exception_arg" attributes *) + exception Getter_failure of string * string + + (** generic parser failure *) + exception Parser_failure of string + + (* given the filename of an xml file of a cic object, it returns its + * internal annotated representation. In the case of constants (whose type + * is splitted from the body), a second xml file (for the body) must be + * provided. *) + val annobj_of_xml: UriManager.uri -> string -> string option -> Cic.annobj + + (* given the filename of an xml file of a cic object, it returns its + * internal logical representation. In the case of constants (whose type is + * splitted from the body), a second xml file (for the body) must be + * provided. *) + val obj_of_xml : UriManager.uri -> string -> string option -> Cic.obj +end + diff --git a/helm/ocaml/cic/test.ml b/helm/ocaml/cic/test.ml new file mode 100644 index 000000000..c92e1d498 --- /dev/null +++ b/helm/ocaml/cic/test.ml @@ -0,0 +1,126 @@ +(* Copyright (C) 2004-2005, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +open Printf + +let _ = + Helm_registry.set "getter.mode" "remote"; + Helm_registry.set "getter.url" "http://mowgli.cs.unibo.it:58081/" + +let body_RE = Str.regexp "^.*\\.body$" +let con_RE = Str.regexp "^.*\\.con$" + +let unlink f = + if Sys.file_exists f then + Unix.unlink f + +let rec parse uri tmpfile1 tmpfile2 = +(* prerr_endline (sprintf "%s %s" tmpfile1 (match tmpfile2 with None -> "None" | Some f -> "Some " ^ f));*) + (try + let uri' = UriManager.uri_of_string uri in + let time_new0 = Unix.gettimeofday () in +(* let obj_new = CicPushParser.CicParser.annobj_of_xml tmpfile1 tmpfile2 in*) + let obj_new = CicParser.annobj_of_xml uri' tmpfile1 tmpfile2 in + let time_new1 = Unix.gettimeofday () in + + let time_old0 = Unix.gettimeofday () in + let obj_old = CicPxpParser.CicParser.annobj_of_xml uri' tmpfile1 tmpfile2 in + let time_old1 = Unix.gettimeofday () in + + let time_old = time_old1 -. time_old0 in + let time_new = time_new1 -. time_new0 in + let are_equal = (obj_old = obj_new) in + printf "%s\t%b\t%f\t%f\t%f\n" + uri are_equal time_old time_new (time_new /. time_old *. 100.); + flush stdout; + with + | CicParser.Getter_failure ("key_not_found", uri) + when Str.string_match body_RE uri 0 -> + parse uri tmpfile1 None + | CicParser.Parser_failure msg -> + printf "%s FAILED (%s)\n" uri msg; flush stdout); + unlink tmpfile1; + (match tmpfile2 with None -> () | Some f -> unlink f) + +let _ = + try + while true do + let uri = input_line stdin in + let tmpfile1 = Http_getter.getxml uri in + let tmpfile2 = + if Str.string_match con_RE uri 0 then begin + Some (Http_getter.getxml (uri ^ ".body")) + end else + None + in + parse uri tmpfile1 tmpfile2 + done + with End_of_file -> () + +(* Parsing test: + * - XmlPushParser version *) +(*open Printf*) +(*open XmlPushParser*) + +(*let print s = print_endline s; flush stdout*) + +(*let callbacks =*) +(* { default_callbacks with*) +(* start_element =*) +(* Some (fun tag attrs ->*) +(* let length = List.length attrs in*) +(* print (sprintf "opening %s [%s]"*) +(* tag (String.concat ";" (List.map fst attrs))));*) +(* end_element = Some (fun tag -> print ("closing " ^ tag));*) +(* character_data = Some (fun data -> print "character data ...");*) +(* }*) + +(*let xml_parser = create_parser callbacks*) + +(*let _ = parse xml_parser (`File Sys.argv.(1))*) + +(* Parsing test: + * - Pure expat version (without XmlPushParser mediation). + * Originally written only to test if XmlPushParser mediation caused overhead. + * That was not the case. *) + +(*let _ =*) +(* let ic = open_in Sys.argv.(1) in*) +(* let expat_parser = Expat.parser_create ~encoding:None in*) +(* Expat.set_start_element_handler expat_parser*) +(* (fun tag attrs ->*) +(* let length = List.length attrs in*) +(* print (sprintf "opening %s [%d attribute%s]"*) +(* tag length (if length = 1 then "" else "s")));*) +(* Expat.set_end_element_handler expat_parser*) +(* (fun tag -> print ("closing " ^ tag));*) +(* Expat.set_character_data_handler expat_parser*) +(* (fun data -> print "character data ...");*) +(* try*) +(* while true do*) +(* Expat.parse expat_parser (input_line ic ^ "\n")*) +(* done*) +(* with End_of_file -> Expat.final expat_parser*) + diff --git a/helm/ocaml/cic/xmlPushParser.ml b/helm/ocaml/cic/xmlPushParser.ml new file mode 100644 index 000000000..92e9f0dd6 --- /dev/null +++ b/helm/ocaml/cic/xmlPushParser.ml @@ -0,0 +1,91 @@ +(* Copyright (C) 2004-2005, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +type callbacks = { + start_element: (string -> (string * string) list -> unit) option; + end_element: (string -> unit) option; + character_data: (string -> unit) option; + processing_instruction: (string -> string -> unit) option; + comment: (string -> unit) option; +} + +let default_callbacks = { + start_element = None; + end_element = None; + character_data = None; + processing_instruction = None; + comment = None; +} + +type xml_source = + [ `Channel of in_channel + | `File of string + | `String of string + ] + +type position = int * int + +type xml_parser = Expat.expat_parser + +let create_parser callbacks = + let expat_parser = Expat.parser_create ~encoding:None in + (match callbacks.start_element with + | Some f -> Expat.set_start_element_handler expat_parser f + | _ -> ()); + (match callbacks.end_element with + | Some f -> Expat.set_end_element_handler expat_parser f + | _ -> ()); + (match callbacks.character_data with + | Some f -> Expat.set_character_data_handler expat_parser f + | _ -> ()); + (match callbacks.processing_instruction with + | Some f -> Expat.set_processing_instruction_handler expat_parser f + | _ -> ()); + (match callbacks.comment with + | Some f -> Expat.set_comment_handler expat_parser f + | _ -> ()); + expat_parser + +let final = Expat.final + +let get_position expat_parser = + (Expat.get_current_line_number expat_parser, + Expat.get_current_column_number expat_parser) + +let parse expat_parser = + let parse_fun = Expat.parse expat_parser in + let rec aux = function + | `Channel ic -> + (try + while true do parse_fun (input_line ic ^ "\n") done + with End_of_file -> final expat_parser) + | `File fname -> + let ic = open_in fname in + aux (`Channel ic); + close_in ic + | `String s -> parse_fun s + in + aux + diff --git a/helm/ocaml/cic/xmlPushParser.mli b/helm/ocaml/cic/xmlPushParser.mli new file mode 100644 index 000000000..49a87425c --- /dev/null +++ b/helm/ocaml/cic/xmlPushParser.mli @@ -0,0 +1,69 @@ +(* Copyright (C) 2004-2005, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +(** {2 XLM push parser generic interface} + * Do not depend on CIC *) + + (** callbacks needed to instantiate a parser *) +type callbacks = { + start_element: + (string -> (string * string) list -> unit) option; (* tag, attr list *) + end_element: (string -> unit) option; (* tag *) + character_data: (string -> unit) option; (* data *) + processing_instruction: + (string -> string -> unit) option; (* target, value *) + comment: (string -> unit) option; (* value *) +} + + (** do nothing callbacks (all set to None) *) +val default_callbacks: callbacks + + (** source from which parse an XML file *) +type xml_source = + [ `Channel of in_channel + | `File of string + | `String of string + ] + + (** source position in a XML source. + * A position is a pair *) +type position = int * int + +type xml_parser + + (** Create a push parser which invokes the given callbacks *) +val create_parser: callbacks -> xml_parser + + (** Parse XML data from a given source with a given parser *) +val parse: xml_parser -> xml_source -> unit + + (** Inform the farser that parsing is completed, needed only when source is + * `String, for other sources it is automatically invoked when the end of file + * is reached *) +val final: xml_parser -> unit + + (** @return current pair *) +val get_position: xml_parser -> position + -- 2.39.2