2 * ----------------------------------------------------------------------
3 * PXP: The polymorphic XML parser for Objective Caml.
4 * Copyright by Gerd Stolpmann. See LICENSE for details.
22 | Pro_string of string (* "..." or '...' *)
26 (* The class without properties; but you can still compare if two objects
31 | Begin_entity (* Beginning of entity *)
32 | End_entity (* End of entity *)
33 | Comment_begin (* <!-- *)
34 | Comment_material of string (* within a comment *)
35 | Comment_end (* --> *)
36 | Ignore (* ignored whitespace *)
38 | Rangle (* > as tag delimiter *)
39 | Rangle_empty (* /> as tag delimiter *)
40 | Percent (* % followed by space in declaration *)
41 | Plus (* + in declaration *)
42 | Star (* * in declaration *)
43 | Bar (* | in declaration *)
44 | Comma (* , in declaration *)
45 | Qmark (* ? in declaration *)
46 | Pcdata (* #PCDATA in declaration *)
47 | Required (* #REQUIRED in declaration *)
48 | Implied (* #IMPLIED in declaration *)
49 | Fixed (* #FIXED in declaration *)
50 | Bof (* A marker for 'beginning of file' *)
51 | Eof (* End of file *)
52 | Conditional_begin of entity_id (* <![ in declaration *)
53 | Conditional_body of entity_id (* [ in declaration *)
54 | Conditional_end of entity_id (* ]]> in declaration *)
55 | Doctype of entity_id (* <!DOCTYPE *)
56 | Doctype_rangle of entity_id (* > as DOCTYPE delimiter *)
57 | Dtd_begin of entity_id (* '[' after DOCTYPE *)
58 | Dtd_end of entity_id (* ']' *)
59 | Decl_element of entity_id (* <!ELEMENT *)
60 | Decl_attlist of entity_id (* <!ATTLIST *)
61 | Decl_entity of entity_id (* <!ENTITY *)
62 | Decl_notation of entity_id (* <!NOTATION *)
63 | Decl_rangle of entity_id (* > *)
64 | Lparen of entity_id (* ( in declaration *)
65 | Rparen of entity_id (* ) in declaration *)
66 | RparenPlus of entity_id (* )+ in declaration *)
67 | RparenStar of entity_id (* )* in declaration *)
68 | RparenQmark of entity_id (* )? in declaration *)
70 | Tag_beg of (string*entity_id) (* <name *)
71 | Tag_end of (string*entity_id) (* </name *)
73 | PI of (string*string) (* <?name ... ?> *)
74 | PI_xml of (prolog_token list) (* <?xml ...?> *)
75 | Cdata of string (* <![CDATA[...]]> *)
76 | CRef of int (* &#digits; *)
77 | ERef of string (* &name; *)
78 | PERef of string (* %name; *)
79 | CharData of string (* any characters not otherwise matching *)
81 | Name of string (* name *)
82 | Nametoken of string (* nmtoken but not name *)
83 | Attval of string (* attribute value; may contain entity refs *)
84 | Attval_nl_normalized of string
85 | Unparsed_string of string (* "data" or 'data' *)
88 val string_of_tok : token -> string
92 { lex_encoding : Pxp_types.rep_encoding;
93 scan_document : Lexing.lexbuf -> (token * lexers);
94 scan_content : Lexing.lexbuf -> (token * lexers);
95 scan_within_tag : Lexing.lexbuf -> (token * lexers);
96 scan_document_type : Lexing.lexbuf -> (token * lexers);
97 scan_declaration : Lexing.lexbuf -> (token * lexers);
98 scan_content_comment : Lexing.lexbuf -> (token * lexers);
99 scan_decl_comment : Lexing.lexbuf -> (token * lexers);
100 scan_document_comment: Lexing.lexbuf -> (token * lexers);
101 scan_ignored_section : Lexing.lexbuf -> (token * lexers);
102 scan_xml_pi : Lexing.lexbuf -> prolog_token;
103 scan_dtd_string : Lexing.lexbuf -> token;
104 scan_content_string : Lexing.lexbuf -> token;
105 scan_name_string : Lexing.lexbuf -> token;
106 scan_only_xml_decl : Lexing.lexbuf -> token;
107 scan_for_crlf : Lexing.lexbuf -> token;
110 (* lexer_set: Every internal encoding has its own set of lexer functions *)
114 (* ======================================================================
118 * Revision 1.1 2000/11/17 09:57:29 lpadovan
121 * Revision 1.2 2000/08/18 20:14:31 gerd
122 * Comment -> Comment_begin, Comment_material, Comment_end.
124 * Revision 1.1 2000/05/29 23:48:38 gerd
125 * Changed module names:
126 * Markup_aux into Pxp_aux
127 * Markup_codewriter into Pxp_codewriter
128 * Markup_document into Pxp_document
129 * Markup_dtd into Pxp_dtd
130 * Markup_entity into Pxp_entity
131 * Markup_lexer_types into Pxp_lexer_types
132 * Markup_reader into Pxp_reader
133 * Markup_types into Pxp_types
134 * Markup_yacc into Pxp_yacc
135 * See directory "compatibility" for (almost) compatible wrappers emulating
136 * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
138 * ======================================================================
139 * Old logs from markup_lexer_types.mli:
141 * Revision 1.5 2000/05/29 21:14:57 gerd
142 * Changed the type 'encoding' into a polymorphic variant.
144 * Revision 1.4 2000/05/20 20:31:40 gerd
145 * Big change: Added support for various encodings of the
146 * internal representation.
148 * Revision 1.3 2000/05/14 17:35:12 gerd
149 * Conditional_begin, _end, and _body have an entity_id.
151 * Revision 1.2 2000/05/08 21:59:17 gerd
152 * New token Bof (beginning of file).
154 * Revision 1.1 2000/05/06 23:21:49 gerd
158 * ======================================================================
160 * DERIVED FROM REVISION 1.3 of markup_lexer_types_shadow.mli
162 * Revision 1.3 1999/08/31 19:13:31 gerd
163 * Added checks on proper PE nesting. The idea is that tokens such
164 * as Decl_element and Decl_rangle carry an entity ID with them. This ID
165 * is simply an object of type < >, i.e. you can only test on identity.
166 * The lexer always produces tokens with a dummy ID because it does not
167 * know which entity is the current one. The entity layer replaces the dummy
168 * ID with the actual ID. The parser checks that the IDs of pairs such as
169 * Decl_element and Decl_rangle are the same; otherwise a Validation_error
172 * Revision 1.2 1999/08/10 21:35:09 gerd
173 * The XML/encoding declaration at the beginning of entities is
174 * evaluated. In particular, entities have now a method "xml_declaration"
175 * which returns the name/value pairs of such a declaration. The "encoding"
176 * setting is interpreted by the entity itself; "version", and "standalone"
177 * are interpreted by Markup_yacc.parse_document_entity. Other settings
178 * are ignored (this does not conform to the standard; the standard prescribes
179 * that "version" MUST be given in the declaration of document; "standalone"
180 * and "encoding" CAN be declared; no other settings are allowed).
181 * TODO: The user should be warned if the standard is not exactly
182 * fulfilled. -- The "standalone" property is not checked yet.
184 * Revision 1.1 1999/08/10 00:35:51 gerd