--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+type lexers =
+ Document
+ | Document_type
+ | Content
+ | Within_tag
+ | Declaration
+ | Content_comment
+ | Decl_comment
+ | Document_comment
+ | Ignored_section
+
+
+type prolog_token =
+ Pro_name of string
+ | Pro_eq (* "=" *)
+ | Pro_string of string (* "..." or '...' *)
+ | Pro_eof
+
+
+type entity_id = < >
+ (* The class without properties; but you can still compare if two objects
+ * are the same.
+ *)
+
+type token =
+ | Begin_entity (* Beginning of entity *)
+ | End_entity (* End of entity *)
+ | Comment_begin (* <!-- *)
+ | Comment_material of string (* within a comment *)
+ | Comment_end (* --> *)
+ | Ignore (* ignored whitespace *)
+ | Eq (* = *)
+ | Rangle (* > as tag delimiter *)
+ | Rangle_empty (* /> as tag delimiter *)
+ | Percent (* % followed by space in declaration *)
+ | Plus (* + in declaration *)
+ | Star (* * in declaration *)
+ | Bar (* | in declaration *)
+ | Comma (* , in declaration *)
+ | Qmark (* ? in declaration *)
+ | Pcdata (* #PCDATA in declaration *)
+ | Required (* #REQUIRED in declaration *)
+ | Implied (* #IMPLIED in declaration *)
+ | Fixed (* #FIXED in declaration *)
+ | Bof (* A marker for 'beginning of file' *)
+ | Eof (* End of file *)
+ | Conditional_begin of entity_id (* <![ in declaration *)
+ | Conditional_body of entity_id (* [ in declaration *)
+ | Conditional_end of entity_id (* ]]> in declaration *)
+ | Doctype of entity_id (* <!DOCTYPE *)
+ | Doctype_rangle of entity_id (* > as DOCTYPE delimiter *)
+ | Dtd_begin of entity_id (* '[' after DOCTYPE *)
+ | Dtd_end of entity_id (* ']' *)
+ | Decl_element of entity_id (* <!ELEMENT *)
+ | Decl_attlist of entity_id (* <!ATTLIST *)
+ | Decl_entity of entity_id (* <!ENTITY *)
+ | Decl_notation of entity_id (* <!NOTATION *)
+ | Decl_rangle of entity_id (* > *)
+ | Lparen of entity_id (* ( in declaration *)
+ | Rparen of entity_id (* ) in declaration *)
+ | RparenPlus of entity_id (* )+ in declaration *)
+ | RparenStar of entity_id (* )* in declaration *)
+ | RparenQmark of entity_id (* )? in declaration *)
+
+ | Tag_beg of (string*entity_id) (* <name *)
+ | Tag_end of (string*entity_id) (* </name *)
+
+ | PI of (string*string) (* <?name ... ?> *)
+ | PI_xml of (prolog_token list) (* <?xml ...?> *)
+ | Cdata of string (* <![CDATA[...]]> *)
+ | CRef of int (* &#digits; *)
+ | ERef of string (* &name; *)
+ | PERef of string (* %name; *)
+ | CharData of string (* any characters not otherwise matching *)
+ | LineEnd of string
+ | Name of string (* name *)
+ | Nametoken of string (* nmtoken but not name *)
+ | Attval of string (* attribute value; may contain entity refs *)
+ | Attval_nl_normalized of string
+ | Unparsed_string of string (* "data" or 'data' *)
+
+
+(**********************************************************************)
+(* debugging *)
+
+let string_of_tok tok =
+ match tok with
+ Begin_entity -> "Begin_entity"
+ | End_entity -> "End_entity"
+ | Doctype _ -> "Doctype"
+ | Doctype_rangle _ -> "Doctype_rangle"
+ | Comment_begin -> "Comment_begin"
+ | Comment_end -> "Comment_end"
+ | Comment_material _ -> "Comment_material"
+ | Rangle -> "Rangle"
+ | Rangle_empty -> "Rangle_empty"
+ | Ignore -> "Ignore"
+ | Eq -> "Eq"
+ | Dtd_begin _ -> "Dtd_begin"
+ | Dtd_end _ -> "Dtd_end"
+ | Conditional_begin _ -> "Conditional_begin"
+ | Conditional_body _ -> "Conditional_body"
+ | Conditional_end _ -> "Conditional_end"
+ | Percent -> "Percent"
+ | Lparen _ -> "Lparen"
+ | Rparen _ -> "Rparen"
+ | Plus -> "Plus"
+ | Star -> "Star"
+ | Bar -> "Bar"
+ | Comma -> "Comma"
+ | Qmark -> "Qmark"
+ | Pcdata -> "Pcdata"
+ | Required -> "Required"
+ | Implied -> "Implied"
+ | Fixed -> "Fixed"
+ | Decl_element _ -> "Decl_element"
+ | Decl_attlist _ -> "Decl_attlist"
+ | Decl_entity _ -> "Decl_entity"
+ | Decl_notation _ -> "Decl_notation"
+ | Decl_rangle _ -> "Decl_rangle"
+ | RparenPlus _ -> "RparenPlus"
+ | RparenStar _ -> "RparenStar"
+ | RparenQmark _ -> "RparenQmark"
+ | Bof -> "Bof"
+ | Eof -> "Eof"
+ | PI _ -> "PI"
+ | PI_xml _ -> "PI_xml"
+ | Tag_beg _ -> "Tag_beg"
+ | Tag_end _ -> "Tag_end"
+ | Cdata _ -> "Cdata"
+ | CRef _ -> "CRef"
+ | ERef _ -> "ERef"
+ | PERef _ -> "PERef"
+ | CharData _ -> "CharData"
+ | Name _ -> "Name"
+ | Nametoken _ -> "Nametoken"
+ | Attval _ -> "Attval"
+ | Attval_nl_normalized _ -> "Attval_nl_normalized"
+ | Unparsed_string _ -> "Unparsed_string"
+ | LineEnd _ -> "LineEnd"
+
+
+type lexer_set =
+ { lex_encoding : Pxp_types.rep_encoding;
+ scan_document : Lexing.lexbuf -> (token * lexers);
+ scan_content : Lexing.lexbuf -> (token * lexers);
+ scan_within_tag : Lexing.lexbuf -> (token * lexers);
+ scan_document_type : Lexing.lexbuf -> (token * lexers);
+ scan_declaration : Lexing.lexbuf -> (token * lexers);
+ scan_content_comment : Lexing.lexbuf -> (token * lexers);
+ scan_decl_comment : Lexing.lexbuf -> (token * lexers);
+ scan_document_comment: Lexing.lexbuf -> (token * lexers);
+ scan_ignored_section : Lexing.lexbuf -> (token * lexers);
+ scan_xml_pi : Lexing.lexbuf -> prolog_token;
+ scan_dtd_string : Lexing.lexbuf -> token;
+ scan_content_string : Lexing.lexbuf -> token;
+ scan_name_string : Lexing.lexbuf -> token;
+ scan_only_xml_decl : Lexing.lexbuf -> token;
+ scan_for_crlf : Lexing.lexbuf -> token;
+ }
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/08/18 20:14:31 gerd
+ * Comment -> Comment_begin, Comment_material, Comment_end.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_lexer_types.ml:
+ *
+ * Revision 1.6 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.5 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.4 2000/05/14 17:45:36 gerd
+ * Bugfix.
+ *
+ * Revision 1.3 2000/05/14 17:35:12 gerd
+ * Conditional_begin, _end, and _body have an entity_id.
+ *
+ * Revision 1.2 2000/05/08 21:59:06 gerd
+ * New token Bof (beginning of file).
+ *
+ * Revision 1.1 2000/05/06 23:21:49 gerd
+ * Initial revision.
+ *
+ *
+ * ======================================================================
+ *
+ * DERIVED FROM REVISION 1.4 of markup_lexer_types_shadow.ml
+ *
+ * Revision 1.4 2000/04/30 18:19:04 gerd
+ * Added new tokens.
+ *
+ * Revision 1.3 1999/08/31 19:13:31 gerd
+ * Added checks on proper PE nesting. The idea is that tokens such
+ * as Decl_element and Decl_rangle carry an entity ID with them. This ID
+ * is simply an object of type < >, i.e. you can only test on identity.
+ * The lexer always produces tokens with a dummy ID because it does not
+ * know which entity is the current one. The entity layer replaces the dummy
+ * ID with the actual ID. The parser checks that the IDs of pairs such as
+ * Decl_element and Decl_rangle are the same; otherwise a Validation_error
+ * is produced.
+ *
+ * Revision 1.2 1999/08/10 21:35:08 gerd
+ * The XML/encoding declaration at the beginning of entities is
+ * evaluated. In particular, entities have now a method "xml_declaration"
+ * which returns the name/value pairs of such a declaration. The "encoding"
+ * setting is interpreted by the entity itself; "version", and "standalone"
+ * are interpreted by Markup_yacc.parse_document_entity. Other settings
+ * are ignored (this does not conform to the standard; the standard prescribes
+ * that "version" MUST be given in the declaration of document; "standalone"
+ * and "encoding" CAN be declared; no other settings are allowed).
+ * TODO: The user should be warned if the standard is not exactly
+ * fulfilled. -- The "standalone" property is not checked yet.
+ *
+ * Revision 1.1 1999/08/10 00:35:51 gerd
+ * Initial revision.
+ *
+ *
+ *)