(* $Id$
* ----------------------------------------------------------------------
* Markup! The validating XML parser for Objective Caml.
* Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
*
* THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_yacc.mli.
* It corresponds to revision 1.4 of markup_yacc.mli.
*)
(*$ markup-yacc.mli *)
open Markup_types
open Markup_dtd
open Markup_document
type config =
{ warner : collect_warnings;
(* An object that collects warnings. *)
errors_with_line_numbers : bool;
(* Whether error messages contain line numbers or not. The parser
* is 10 to 20 per cent faster if line numbers are turned off;
* you get only character positions in this case.
*)
processing_instructions_inline : bool;
(* true: turns a special mode for processing instructions on. Normally,
* you cannot determine the exact location of a PI; you only know
* in which element the PI occurs. The "inline" mode makes it possible
* to find the exact location out: Every PI is artificially wrapped
* by a special element with name "-pi". For example, if the XML text
* is , the parser normally produces only an element
* object for "a", and puts the PIs "x" and "y" into it (without
* order). In inline mode, the object "a" will contain two objects
* with name "-pi", and the first object will contain "x", and the
* second "y".
* Notes:
* (1) The name "-pi" is reserved. You cannot use it for your own
* tags because tag names must not begin with '-'.
* (2) You need not to add a declaration for "-pi" to the DTD. These
* elements are handled separately.
* (3) Of course, the "-pi" objects are created from exemplars of
* your DOM map.
*)
virtual_root : bool;
(* true: the topmost element of the XML tree is not the root element,
* but the so-called virtual root. The root element is a son of the
* virtual root. The virtual root is an ordinary element with name
* "-vr".
* The following behaviour changes, too:
* - PIs occurring outside the root element and outside the DTD are
* added to the virtual root instead of the document object
* - If processing_instructions_inline is also turned on, these PIs
* are added inline to the virtual root
* Notes:
* (1) The name "-vr" is reserved. You cannot use it for your own
* tags because tag names must not begin with '-'.
* (2) You need not to add a declaration for "-vr" to the DTD. These
* elements are handled separately.
* (3) Of course, the "-vr" objects are created from exemplars of
* your DOM map.
*)
(* The following options are not implemented, or only for internal
* use.
*)
debugging_mode : bool;
}
type source =
Entity of ((dtd -> Pxp_entity.entity) * Markup_reader.resolver)
| Channel of in_channel
| File of string
| Latin1 of string
| ExtID of (ext_id * Markup_reader.resolver)
(* Note on sources:
*
* The sources do not have all the same capabilities. Here the differences:
*
* - File: A File source reads from a file by name. This has the advantage
* that references to external entites can be resolved. - The problem
* with SYSTEM references is that they usually contain relative file
* names; more exactly, a file name relative to the document containing it.
* It is only possible to convert such names to absolute file names if the
* name of the document containing such references is known; and File
* denotes this name.
*
* - Channel, Latin1: These sources read from documents given as channels or
* (Latin 1-encoded) strings. There is no file name, and because of this
* the documents must not contain references to external files (even
* if the file names are given as absolute names).
*
* - ExtID(x,r): The identifier x (either the SYSTEM or the PUBLIC name) of the
* entity to read from is passed to the resolver r as-is.
* The intention of this option is to allow customized
* resolvers to interpret external identifiers without any restriction.
* For example, you can assign the PUBLIC identifiers a meaning (they
* currently do not have any), or you can extend the "namespace" of
* identifiers.
* ExtID is the interface of choice for own extensions to resolvers.
*
* - Entity(m,r): You can implementy every behaviour by using a customized
* entity class. Once the DTD object d is known that will be used during
* parsing, the entity e = m d is determined and used together with the
* resolver r.
* This is only for hackers.
*)
type 'ext domspec =
{ map : (node_type, 'ext node) Hashtbl.t;
default_element : 'ext node;
}
(* Specifies which node to use as exemplar for which node type. See the
* manual for explanations.
*)
val default_config : config
(* - The resolver is able to read from files by name
* - Warnings are thrown away
* - Error message will contain line numbers
* - The internal encoding is ISO-8859-1
* - standalone declaration is checked
*)
val default_extension : ('a node extension) as 'a
(* A "null" extension; an extension that does not extend the funtionality *)
val default_dom : ('a node extension as 'a) domspec
(* Specifies that you do not want to use extensions. *)
val parse_dtd_entity : config -> source -> dtd
(* Parse an entity containing a DTD, and return this DTD. *)
val parse_document_entity : config -> source -> 'ext domspec -> 'ext document
(* Parse a closed document, i.e. a document beginning with ,
* and validate the contents of the document against the DTD contained
* and/or referenced in the document.
*)
val parse_content_entity : config ->
source ->
dtd ->
'ext domspec ->
'ext node
(* Parse a file representing a well-formed fragment of a document. The
* fragment must be a single element (i.e. something like ...;
* not a sequence like ......). The element is validated
* against the passed DTD, but it is not checked whether the element is
* the root element specified in the DTD.
* Note that you can create DTDs that specify not to validate at all
* (invoke method allow_arbitrary on the DTD).
*)
val parse_wf_entity : config -> source -> 'ext domspec -> 'ext document
(* Parse a closed document (see parse_document_entity), but do not
* validate it. Only checks on well-formedness are performed.
*)
(*$-*)
(* ======================================================================
* History:
*
* $Log$
* Revision 1.1 2000/11/17 09:57:30 lpadovan
* Initial revision
*
* Revision 1.1 2000/05/29 23:43:51 gerd
* Initial compatibility revision.
*
* ======================================================================
* OLD LOGS:
*
* Revision 1.4 2000/05/29 21:14:57 gerd
* Changed the type 'encoding' into a polymorphic variant.
*
* Revision 1.3 2000/05/27 19:24:01 gerd
* New option: recognize_standalone_declaration.
*
* Revision 1.2 2000/05/20 20:31:40 gerd
* Big change: Added support for various encodings of the
* internal representation.
*
* Revision 1.1 2000/05/06 23:21:49 gerd
* Initial revision.
*
* Revision 1.9 2000/04/30 18:23:38 gerd
* New config options 'processing_instructions_inline' and
* 'virtual_root'.
*
* Revision 1.8 2000/03/13 23:46:46 gerd
* Change: The 'resolver' component of the 'config' type has
* disappeared. Instead, there is a new resolver component in the Entity
* and ExtID values of 'source'. I hope that this makes clearer that the
* resolver has only an effect if used together with Entity and ExtID
* sources.
* Change: The Entity value can now return the entity dependent
* on the DTD that is going to be used.
*
* Revision 1.7 2000/02/22 02:32:02 gerd
* Updated.
*
* Revision 1.6 2000/02/22 01:52:45 gerd
* Added documentation.
*
* Revision 1.5 2000/01/20 20:54:43 gerd
* New config.errors_with_line_numbers.
*
* Revision 1.4 1999/09/01 23:09:10 gerd
* New function parse_wf_entity that simulates a well-formedness
* parser.
*
* Revision 1.3 1999/09/01 16:26:36 gerd
* Added an empty line. This is *really* a big change.
*
* Revision 1.2 1999/08/14 22:20:27 gerd
* The "config" slot has now a component "warner"which is
* an object with a "warn" method. This is used to warn about characters
* that cannot be represented in the Latin 1 alphabet.
* Furthermore, there is a new component "debugging_mode".
*
* Revision 1.1 1999/08/10 00:35:52 gerd
* Initial revision.
*
*
*)