X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2FDEVEL%2Fpxp%2Fpxp%2Fpxp_lexer_types.mli;fp=helm%2FDEVEL%2Fpxp%2Fpxp%2Fpxp_lexer_types.mli;h=9e7c2d8a1bad6d9fef25ae78f0b0e06e76c02590;hb=c03d2c1fdab8d228cb88aaba5ca0f556318bebc5;hp=0000000000000000000000000000000000000000;hpb=758057e85325f94cd88583feb1fdf6b038e35055;p=helm.git diff --git a/helm/DEVEL/pxp/pxp/pxp_lexer_types.mli b/helm/DEVEL/pxp/pxp/pxp_lexer_types.mli new file mode 100644 index 000000000..9e7c2d8a1 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/pxp_lexer_types.mli @@ -0,0 +1,188 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * PXP: The polymorphic XML parser for Objective Caml. + * Copyright by Gerd Stolpmann. See LICENSE for details. + *) + +type lexers = + Document + | Document_type + | Content + | Within_tag + | Declaration + | Content_comment + | Decl_comment + | Document_comment + | Ignored_section + + +type prolog_token = + Pro_name of string + | Pro_eq (* "=" *) + | Pro_string of string (* "..." or '...' *) + | Pro_eof + +type entity_id = < > + (* The class without properties; but you can still compare if two objects + * are the same. + *) + +type token = + | Begin_entity (* Beginning of entity *) + | End_entity (* End of entity *) + | Comment_begin (* *) + | Ignore (* ignored whitespace *) + | Eq (* = *) + | Rangle (* > as tag delimiter *) + | Rangle_empty (* /> as tag delimiter *) + | Percent (* % followed by space in declaration *) + | Plus (* + in declaration *) + | Star (* * in declaration *) + | Bar (* | in declaration *) + | Comma (* , in declaration *) + | Qmark (* ? in declaration *) + | Pcdata (* #PCDATA in declaration *) + | Required (* #REQUIRED in declaration *) + | Implied (* #IMPLIED in declaration *) + | Fixed (* #FIXED in declaration *) + | Bof (* A marker for 'beginning of file' *) + | Eof (* End of file *) + | Conditional_begin of entity_id (* in declaration *) + | Doctype of entity_id (* as DOCTYPE delimiter *) + | Dtd_begin of entity_id (* '[' after DOCTYPE *) + | Dtd_end of entity_id (* ']' *) + | Decl_element of entity_id (* *) + | Lparen of entity_id (* ( in declaration *) + | Rparen of entity_id (* ) in declaration *) + | RparenPlus of entity_id (* )+ in declaration *) + | RparenStar of entity_id (* )* in declaration *) + | RparenQmark of entity_id (* )? in declaration *) + + | Tag_beg of (string*entity_id) (* *) + | PI_xml of (prolog_token list) (* *) + | Cdata of string (* *) + | CRef of int (* &#digits; *) + | ERef of string (* &name; *) + | PERef of string (* %name; *) + | CharData of string (* any characters not otherwise matching *) + | LineEnd of string + | Name of string (* name *) + | Nametoken of string (* nmtoken but not name *) + | Attval of string (* attribute value; may contain entity refs *) + | Attval_nl_normalized of string + | Unparsed_string of string (* "data" or 'data' *) + + +val string_of_tok : token -> string + + +type lexer_set = + { lex_encoding : Pxp_types.rep_encoding; + scan_document : Lexing.lexbuf -> (token * lexers); + scan_content : Lexing.lexbuf -> (token * lexers); + scan_within_tag : Lexing.lexbuf -> (token * lexers); + scan_document_type : Lexing.lexbuf -> (token * lexers); + scan_declaration : Lexing.lexbuf -> (token * lexers); + scan_content_comment : Lexing.lexbuf -> (token * lexers); + scan_decl_comment : Lexing.lexbuf -> (token * lexers); + scan_document_comment: Lexing.lexbuf -> (token * lexers); + scan_ignored_section : Lexing.lexbuf -> (token * lexers); + scan_xml_pi : Lexing.lexbuf -> prolog_token; + scan_dtd_string : Lexing.lexbuf -> token; + scan_content_string : Lexing.lexbuf -> token; + scan_name_string : Lexing.lexbuf -> token; + scan_only_xml_decl : Lexing.lexbuf -> token; + scan_for_crlf : Lexing.lexbuf -> token; + } + +(* lexer_set: Every internal encoding has its own set of lexer functions *) + + + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:29 lpadovan + * Initial revision + * + * Revision 1.2 2000/08/18 20:14:31 gerd + * Comment -> Comment_begin, Comment_material, Comment_end. + * + * Revision 1.1 2000/05/29 23:48:38 gerd + * Changed module names: + * Markup_aux into Pxp_aux + * Markup_codewriter into Pxp_codewriter + * Markup_document into Pxp_document + * Markup_dtd into Pxp_dtd + * Markup_entity into Pxp_entity + * Markup_lexer_types into Pxp_lexer_types + * Markup_reader into Pxp_reader + * Markup_types into Pxp_types + * Markup_yacc into Pxp_yacc + * See directory "compatibility" for (almost) compatible wrappers emulating + * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc. + * + * ====================================================================== + * Old logs from markup_lexer_types.mli: + * + * Revision 1.5 2000/05/29 21:14:57 gerd + * Changed the type 'encoding' into a polymorphic variant. + * + * Revision 1.4 2000/05/20 20:31:40 gerd + * Big change: Added support for various encodings of the + * internal representation. + * + * Revision 1.3 2000/05/14 17:35:12 gerd + * Conditional_begin, _end, and _body have an entity_id. + * + * Revision 1.2 2000/05/08 21:59:17 gerd + * New token Bof (beginning of file). + * + * Revision 1.1 2000/05/06 23:21:49 gerd + * Initial revision. + * + * + * ====================================================================== + * + * DERIVED FROM REVISION 1.3 of markup_lexer_types_shadow.mli + * + * Revision 1.3 1999/08/31 19:13:31 gerd + * Added checks on proper PE nesting. The idea is that tokens such + * as Decl_element and Decl_rangle carry an entity ID with them. This ID + * is simply an object of type < >, i.e. you can only test on identity. + * The lexer always produces tokens with a dummy ID because it does not + * know which entity is the current one. The entity layer replaces the dummy + * ID with the actual ID. The parser checks that the IDs of pairs such as + * Decl_element and Decl_rangle are the same; otherwise a Validation_error + * is produced. + * + * Revision 1.2 1999/08/10 21:35:09 gerd + * The XML/encoding declaration at the beginning of entities is + * evaluated. In particular, entities have now a method "xml_declaration" + * which returns the name/value pairs of such a declaration. The "encoding" + * setting is interpreted by the entity itself; "version", and "standalone" + * are interpreted by Markup_yacc.parse_document_entity. Other settings + * are ignored (this does not conform to the standard; the standard prescribes + * that "version" MUST be given in the declaration of document; "standalone" + * and "encoding" CAN be declared; no other settings are allowed). + * TODO: The user should be warned if the standard is not exactly + * fulfilled. -- The "standalone" property is not checked yet. + * + * Revision 1.1 1999/08/10 00:35:51 gerd + * Initial revision. + * + * + *)