2 * ----------------------------------------------------------------------
3 * PXP: The polymorphic XML parser for Objective Caml.
4 * Copyright by Gerd Stolpmann. See LICENSE for details.
9 (**********************************************************************)
12 (* Object model of document type declarations *)
14 (**********************************************************************)
16 (* ======================================================================
19 * class dtd ............... represents the whole DTD, including element
20 * declarations, entity declarations, notation
21 * declarations, and processing instructions
22 * class dtd_element ....... represents an element declaration consisting
23 * of a content model and an attribute list
25 * class dtd_notation ...... represents a notation declaration
26 * class proc_instruction .. represents a processing instruction
27 * ======================================================================
35 * creates a new, empty DTD object without any declaration, without a root
36 * element, without an ID.
38 Pxp_types.collect_warnings ->
39 Pxp_types.rep_encoding ->
41 method root : string option
42 (* get the name of the root element if present *)
44 method set_root : string -> unit
45 (* set the name of the root element. This method can be invoked
49 method id : Pxp_types.dtd_id option
50 (* get the identifier for this DTD *)
52 method set_id : Pxp_types.dtd_id -> unit
53 (* set the identifier. This method can be invoked only once *)
55 method encoding : Pxp_types.rep_encoding
56 (* returns the encoding used for character representation *)
59 method allow_arbitrary : unit
60 (* After this method has been invoked, the object changes its behaviour:
61 * - elements and notations that have not been added may be used in an
62 * arbitrary way; the methods "element" and "notation" indicate this
63 * by raising Undeclared instead of Validation_error.
66 method disallow_arbitrary : unit
68 method arbitrary_allowed : bool
69 (* Returns whether arbitrary contents are allowed or not. *)
71 method standalone_declaration : bool
72 (* Whether there is a 'standalone' declaration or not. Strictly
73 * speaking, this declaration is not part of the DTD, but it is
74 * included here because of practical reasons.
75 * If not set, this property defaults to 'false'.
78 method set_standalone_declaration : bool -> unit
79 (* Sets the 'standalone' declaration. *)
82 method add_element : dtd_element -> unit
83 (* add the given element declaration to this DTD. Raises Not_found
84 * if there is already an element declaration with the same name.
87 method add_gen_entity : Pxp_entity.entity -> bool -> unit
88 (* add_gen_entity e extdecl:
89 * add the entity 'e' as general entity to this DTD (general entities
90 * are those represented by &name;). If there is already a declaration
91 * with the same name, the second definition is ignored; as exception from
92 * this rule, entities with names "lt", "gt", "amp", "quot", and "apos"
93 * may only be redeclared with a definition that is equivalent to the
94 * standard definition; otherwise a Validation_error is raised.
96 * 'extdecl': 'true' indicates that the entity declaration occurs in
97 * an external entity. (Used for the standalone check.)
100 method add_par_entity : Pxp_entity.entity -> unit
101 (* add the given entity as parameter entity to this DTD (parameter
102 * entities are those represented by %name;). If there is already a
103 * declaration with the same name, the second definition is ignored.
106 method add_notation : dtd_notation -> unit
107 (* add the given notation to this DTD. If there is already a declaration
108 * with the same name, a Validation_error is raised.
111 method add_pinstr : proc_instruction -> unit
112 (* add the given processing instruction to this DTD. *)
114 method element : string -> dtd_element
115 (* looks up the element declaration with the given name. Raises
116 * Validation_error if the element cannot be found. (If "allow_arbitrary"
117 * has been invoked before, Unrestricted is raised instead.)
120 method element_names : string list
121 (* returns the list of the names of all element declarations. *)
123 method gen_entity : string -> (Pxp_entity.entity * bool)
124 (* let e, extdecl = obj # gen_entity n:
125 * looks up the general entity 'e' with the name 'n'. Raises
126 * WF_error if the entity cannot be found.
127 * 'extdecl': indicates whether the entity declaration occured in an
131 method gen_entity_names : string list
132 (* returns the list of all general entity names *)
134 method par_entity : string -> Pxp_entity.entity
135 (* looks up the parameter entity with the given name. Raises
136 * WF_error if the entity cannot be found.
139 method par_entity_names : string list
140 (* returns the list of all parameter entity names *)
142 method notation : string -> dtd_notation
143 (* looks up the notation declaration with the given name. Raises
144 * Validation_error if the notation cannot be found. (If "allow_arbitrary"
145 * has been invoked before, Unrestricted is raised instead.)
148 method notation_names : string list
149 (* Returns the list of the names of all added notations *)
151 method pinstr : string -> proc_instruction list
152 (* looks up all processing instructions with the given target.
153 * The "target" is the identifier following "<?".
154 * Note: It is not possible to find out the exact position of the
155 * processing instruction.
158 method pinstr_names : string list
159 (* Returns the list of the names (targets) of all added pinstrs *)
161 method validate : unit
162 (* ensures that the DTD is valid. This method is optimized such that
163 * actual validation is only performed if DTD has changed.
164 * If the DTD is invalid, mostly a Validation_error is raised,
165 * but other exceptions are possible, too.
168 method only_deterministic_models : unit
169 (* Succeeds if all regexp content models are deterministic.
170 * Otherwise Validation_error.
173 method write : Pxp_types.output_stream -> Pxp_types.encoding -> bool -> unit
174 (* write_compact_as_latin1 os enc doctype:
175 * Writes the DTD as 'enc'-encoded string to 'os'. If 'doctype', a
176 * DTD like <!DOCTYPE root [ ... ]> is written. If 'not doctype',
177 * only the declarations are written (the material within the
181 method write_compact_as_latin1 : Pxp_types.output_stream -> bool -> unit
182 (* DEPRECATED METHOD; included only to keep compatibility with
183 * older versions of the parser
187 (*----------------------------------------*)
188 method invalidate : unit
189 (* INTERNAL METHOD *)
190 method warner : Pxp_types.collect_warnings
191 (* INTERNAL METHOD *)
196 (*$ markup-dtd2.mli *)
198 (* ---------------------------------------------------------------------- *)
200 and dtd_element : dtd -> string ->
202 * new dtd_element init_dtd init_name:
203 * creates a new dtd_element object for init_dtd with init_name.
204 * The strings are represented in the same encoding as init_dtd.
209 (* returns the name of the declared element *)
211 method externally_declared : bool
212 (* returns whether the element declaration occurs in an external
216 method content_model : Pxp_types.content_model_type
217 (* get the content model of this element declaration, or Unspecified *)
219 method content_dfa : Pxp_dfa.dfa_definition option
220 (* return the DFA of the content model if there is a DFA, or None.
221 * A DFA exists only for regexp style content models which are
225 method set_cm_and_extdecl : Pxp_types.content_model_type -> bool -> unit
226 (* set_cm_and_extdecl cm extdecl:
227 * set the content model to 'cm'. Once the content model is not
228 * Unspecified, it cannot be set to a different value again.
229 * Furthermore, it is set whether the element occurs in an external
230 * entity ('extdecl').
233 method encoding : Pxp_types.rep_encoding
234 (* Return the encoding of the strings *)
236 method allow_arbitrary : unit
237 (* After this method has been invoked, the object changes its behaviour:
238 * - attributes that have not been added may be used in an
239 * arbitrary way; the method "attribute" indicates this
240 * by raising Undeclared instead of Validation_error.
243 method disallow_arbitrary : unit
245 method arbitrary_allowed : bool
246 (* Returns whether arbitrary attributes are allowed or not. *)
248 method attribute : string ->
249 Pxp_types.att_type * Pxp_types.att_default
250 (* get the type and default value of a declared attribute, or raise
251 * Validation_error if the attribute does not exist.
252 * If 'arbitrary_allowed', the exception Undeclared is raised instead
253 * of Validation_error.
256 method attribute_violates_standalone_declaration :
257 string -> string option -> bool
258 (* attribute_violates_standalone_declaration name v:
259 * Checks whether the attribute 'name' violates the "standalone"
260 * declaration if it has value 'v'.
261 * The method returns true if:
262 * - The attribute declaration occurs in an external entity,
263 * and if one of the two conditions holds:
264 * - v = None, and there is a default for the attribute value
265 * - v = Some s, and the type of the attribute is not CDATA,
266 * and s changes if normalized according to the rules of the
269 * The method raises Validation_error if the attribute does not exist.
270 * If 'arbitrary_allowed', the exception Undeclared is raised instead
271 * of Validation_error.
274 method attribute_names : string list
275 (* get the list of all declared attributes *)
277 method names_of_required_attributes : string list
278 (* get the list of all attributes that are specified as required
282 method id_attribute_name : string option
283 (* Returns the name of the attribute with type ID, or None. *)
285 method idref_attribute_names : string list
286 (* Returns the names of the attributes with type IDREF or IDREFS. *)
288 method add_attribute : string ->
289 Pxp_types.att_type ->
290 Pxp_types.att_default ->
293 (* add_attribute name type default extdecl:
294 * add an attribute declaration for an attribute with the given name,
295 * type, and default value. If there is more than one declaration for
296 * an attribute name, the first declaration counts; the other declarations
298 * 'extdecl': if true, the attribute declaration occurs in an external
299 * entity. This property is used to check the "standalone" attribute.
302 method validate : unit
303 (* checks whether this element declaration (i.e. the content model and
304 * all attribute declarations) is valid for the associated DTD.
305 * Raises mostly Validation_error if the validation fails.
308 method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
309 (* write_compact_as_latin1 os enc:
310 * Writes the <!ELEMENT ... > declaration to 'os' as 'enc'-encoded string.
313 method write_compact_as_latin1 : Pxp_types.output_stream -> unit
314 (* DEPRECATED METHOD; included only to keep compatibility with
315 * older versions of the parser
319 (* ---------------------------------------------------------------------- *)
321 and dtd_notation : string -> Pxp_types.ext_id -> Pxp_types.rep_encoding ->
323 * new dtd_notation a_name an_external_ID init_encoding
324 * creates a new dtd_notation object with the given name and the given
329 method ext_id : Pxp_types.ext_id
330 method encoding : Pxp_types.rep_encoding
332 method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
333 (* write_compact_as_latin1 os enc:
334 * Writes the <!NOTATION ... > declaration to 'os' as 'enc'-encoded
338 method write_compact_as_latin1 : Pxp_types.output_stream -> unit
339 (* DEPRECATED METHOD; included only to keep compatibility with
340 * older versions of the parser
345 (* ---------------------------------------------------------------------- *)
347 and proc_instruction : string -> string -> Pxp_types.rep_encoding ->
349 * new proc_instruction a_target a_value
350 * creates a new proc_instruction object with the given target string and
351 * the given value string.
352 * Note: A processing instruction is written as <?target value?>.
355 method target : string
356 method value : string
357 method encoding : Pxp_types.rep_encoding
359 method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
361 * Writes the <?...?> PI to 'os' as 'enc'-encoded string.
364 method write_compact_as_latin1 : Pxp_types.output_stream -> unit
365 (* DEPRECATED METHOD; included only to keep compatibility with
366 * older versions of the parser
369 method parse_pxp_option : (string * string * (string * string) list)
370 (* Parses a PI containing a PXP option. Such PIs are formed like:
371 * <?target option-name option-att="value" option-att="value" ... ?>
372 * The method returns a triple
373 * (target, option-name, [option-att, value; ...])
383 (* ======================================================================
387 * Revision 1.1 2000/11/17 09:57:29 lpadovan
390 * Revision 1.8 2000/08/18 21:18:45 gerd
391 * Updated wrong comments for methods par_entity and gen_entity.
392 * These can raise WF_error and not Validation_error, and this is the
395 * Revision 1.7 2000/07/25 00:30:01 gerd
396 * Added support for pxp:dtd PI options.
398 * Revision 1.6 2000/07/23 02:16:33 gerd
401 * Revision 1.5 2000/07/16 16:34:41 gerd
402 * New method 'write', the successor of 'write_compact_as_latin1'.
404 * Revision 1.4 2000/07/14 13:56:49 gerd
405 * Added methods id_attribute_name and idref_attribute_names.
407 * Revision 1.3 2000/07/09 00:13:37 gerd
408 * Added methods gen_entity_names, par_entity_names.
410 * Revision 1.2 2000/06/14 22:19:06 gerd
411 * Added checks such that it is impossible to mix encodings.
413 * Revision 1.1 2000/05/29 23:48:38 gerd
414 * Changed module names:
415 * Markup_aux into Pxp_aux
416 * Markup_codewriter into Pxp_codewriter
417 * Markup_document into Pxp_document
418 * Markup_dtd into Pxp_dtd
419 * Markup_entity into Pxp_entity
420 * Markup_lexer_types into Pxp_lexer_types
421 * Markup_reader into Pxp_reader
422 * Markup_types into Pxp_types
423 * Markup_yacc into Pxp_yacc
424 * See directory "compatibility" for (almost) compatible wrappers emulating
425 * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
427 * ======================================================================
428 * Old logs from markup_dtd.ml:
430 * Revision 1.11 2000/05/29 21:14:57 gerd
431 * Changed the type 'encoding' into a polymorphic variant.
433 * Revision 1.10 2000/05/27 19:20:38 gerd
434 * Changed the interfaces for the standalone check: New
435 * methods: standalone_declaration, set_standalone_declaration,
436 * externally_declared, attribute_violates_standalone_declaration.
437 * The method set_content_model has been renamed to
438 * set_cm_and_extdecl; it now initializes also whether the element
439 * has been declared in an external entity.
440 * Methods add_gen_entity and gen_entity pass an additional
441 * boolean argument containing whether the declaration of the
442 * general entity happened in an external entity.
443 * Method add_attribute expects this argument, too, which
444 * states whether the declaration of the attribute happened in an
447 * Revision 1.9 2000/05/20 20:31:40 gerd
448 * Big change: Added support for various encodings of the
449 * internal representation.
451 * Revision 1.8 2000/05/06 23:10:26 gerd
452 * allow_arbitrary for elements, too.
454 * Revision 1.7 2000/05/01 20:42:52 gerd
455 * New method write_compact_as_latin1.
457 * Revision 1.6 2000/03/11 22:58:15 gerd
458 * Updated to support Markup_codewriter.
460 * Revision 1.5 2000/02/22 02:32:02 gerd
463 * Revision 1.4 1999/11/09 22:15:41 gerd
464 * Added method "arbitrary_allowed".
466 * Revision 1.3 1999/09/01 16:21:56 gerd
467 * "dtd" classes have now an argument that passes a "warner".
469 * Revision 1.2 1999/08/15 02:20:23 gerd
470 * New feature: a DTD can allow arbitrary elements.
472 * Revision 1.1 1999/08/10 00:35:51 gerd