(* $Id$ * ---------------------------------------------------------------------- * Markup! The validating XML parser for Objective Caml. * Copyright 1999 by Gerd Stolpmann. See LICENSE for details. * * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_yacc.mli. * It corresponds to revision 1.4 of markup_yacc.mli. *) (*$ markup-yacc.mli *) open Markup_types open Markup_dtd open Markup_document type config = { warner : collect_warnings; (* An object that collects warnings. *) errors_with_line_numbers : bool; (* Whether error messages contain line numbers or not. The parser * is 10 to 20 per cent faster if line numbers are turned off; * you get only character positions in this case. *) processing_instructions_inline : bool; (* true: turns a special mode for processing instructions on. Normally, * you cannot determine the exact location of a PI; you only know * in which element the PI occurs. The "inline" mode makes it possible * to find the exact location out: Every PI is artificially wrapped * by a special element with name "-pi". For example, if the XML text * is , the parser normally produces only an element * object for "a", and puts the PIs "x" and "y" into it (without * order). In inline mode, the object "a" will contain two objects * with name "-pi", and the first object will contain "x", and the * second "y". * Notes: * (1) The name "-pi" is reserved. You cannot use it for your own * tags because tag names must not begin with '-'. * (2) You need not to add a declaration for "-pi" to the DTD. These * elements are handled separately. * (3) Of course, the "-pi" objects are created from exemplars of * your DOM map. *) virtual_root : bool; (* true: the topmost element of the XML tree is not the root element, * but the so-called virtual root. The root element is a son of the * virtual root. The virtual root is an ordinary element with name * "-vr". * The following behaviour changes, too: * - PIs occurring outside the root element and outside the DTD are * added to the virtual root instead of the document object * - If processing_instructions_inline is also turned on, these PIs * are added inline to the virtual root * Notes: * (1) The name "-vr" is reserved. You cannot use it for your own * tags because tag names must not begin with '-'. * (2) You need not to add a declaration for "-vr" to the DTD. These * elements are handled separately. * (3) Of course, the "-vr" objects are created from exemplars of * your DOM map. *) (* The following options are not implemented, or only for internal * use. *) debugging_mode : bool; } type source = Entity of ((dtd -> Pxp_entity.entity) * Markup_reader.resolver) | Channel of in_channel | File of string | Latin1 of string | ExtID of (ext_id * Markup_reader.resolver) (* Note on sources: * * The sources do not have all the same capabilities. Here the differences: * * - File: A File source reads from a file by name. This has the advantage * that references to external entites can be resolved. - The problem * with SYSTEM references is that they usually contain relative file * names; more exactly, a file name relative to the document containing it. * It is only possible to convert such names to absolute file names if the * name of the document containing such references is known; and File * denotes this name. * * - Channel, Latin1: These sources read from documents given as channels or * (Latin 1-encoded) strings. There is no file name, and because of this * the documents must not contain references to external files (even * if the file names are given as absolute names). * * - ExtID(x,r): The identifier x (either the SYSTEM or the PUBLIC name) of the * entity to read from is passed to the resolver r as-is. * The intention of this option is to allow customized * resolvers to interpret external identifiers without any restriction. * For example, you can assign the PUBLIC identifiers a meaning (they * currently do not have any), or you can extend the "namespace" of * identifiers. * ExtID is the interface of choice for own extensions to resolvers. * * - Entity(m,r): You can implementy every behaviour by using a customized * entity class. Once the DTD object d is known that will be used during * parsing, the entity e = m d is determined and used together with the * resolver r. * This is only for hackers. *) type 'ext domspec = { map : (node_type, 'ext node) Hashtbl.t; default_element : 'ext node; } (* Specifies which node to use as exemplar for which node type. See the * manual for explanations. *) val default_config : config (* - The resolver is able to read from files by name * - Warnings are thrown away * - Error message will contain line numbers * - The internal encoding is ISO-8859-1 * - standalone declaration is checked *) val default_extension : ('a node extension) as 'a (* A "null" extension; an extension that does not extend the funtionality *) val default_dom : ('a node extension as 'a) domspec (* Specifies that you do not want to use extensions. *) val parse_dtd_entity : config -> source -> dtd (* Parse an entity containing a DTD, and return this DTD. *) val parse_document_entity : config -> source -> 'ext domspec -> 'ext document (* Parse a closed document, i.e. a document beginning with , * and validate the contents of the document against the DTD contained * and/or referenced in the document. *) val parse_content_entity : config -> source -> dtd -> 'ext domspec -> 'ext node (* Parse a file representing a well-formed fragment of a document. The * fragment must be a single element (i.e. something like ...; * not a sequence like ......). The element is validated * against the passed DTD, but it is not checked whether the element is * the root element specified in the DTD. * Note that you can create DTDs that specify not to validate at all * (invoke method allow_arbitrary on the DTD). *) val parse_wf_entity : config -> source -> 'ext domspec -> 'ext document (* Parse a closed document (see parse_document_entity), but do not * validate it. Only checks on well-formedness are performed. *) (*$-*) (* ====================================================================== * History: * * $Log$ * Revision 1.1 2000/11/17 09:57:30 lpadovan * Initial revision * * Revision 1.1 2000/05/29 23:43:51 gerd * Initial compatibility revision. * * ====================================================================== * OLD LOGS: * * Revision 1.4 2000/05/29 21:14:57 gerd * Changed the type 'encoding' into a polymorphic variant. * * Revision 1.3 2000/05/27 19:24:01 gerd * New option: recognize_standalone_declaration. * * Revision 1.2 2000/05/20 20:31:40 gerd * Big change: Added support for various encodings of the * internal representation. * * Revision 1.1 2000/05/06 23:21:49 gerd * Initial revision. * * Revision 1.9 2000/04/30 18:23:38 gerd * New config options 'processing_instructions_inline' and * 'virtual_root'. * * Revision 1.8 2000/03/13 23:46:46 gerd * Change: The 'resolver' component of the 'config' type has * disappeared. Instead, there is a new resolver component in the Entity * and ExtID values of 'source'. I hope that this makes clearer that the * resolver has only an effect if used together with Entity and ExtID * sources. * Change: The Entity value can now return the entity dependent * on the DTD that is going to be used. * * Revision 1.7 2000/02/22 02:32:02 gerd * Updated. * * Revision 1.6 2000/02/22 01:52:45 gerd * Added documentation. * * Revision 1.5 2000/01/20 20:54:43 gerd * New config.errors_with_line_numbers. * * Revision 1.4 1999/09/01 23:09:10 gerd * New function parse_wf_entity that simulates a well-formedness * parser. * * Revision 1.3 1999/09/01 16:26:36 gerd * Added an empty line. This is *really* a big change. * * Revision 1.2 1999/08/14 22:20:27 gerd * The "config" slot has now a component "warner"which is * an object with a "warn" method. This is used to warn about characters * that cannot be represented in the Latin 1 alphabet. * Furthermore, there is a new component "debugging_mode". * * Revision 1.1 1999/08/10 00:35:52 gerd * Initial revision. * * *)