2 * ----------------------------------------------------------------------
11 #insert pxp_lex_aux.src
13 #insert open_pxp_lex_aux_*.src
14 #insert open_pxp_lex_misc_*.src
18 #insert pxp_lex_defs_*.def
20 rule scan_content = parse
22 { scan_pi (Lexing.lexeme lexbuf) scan_xml_pi, Content }
24 { raise (WF_error ("Illegal processing instruction")) }
26 { Comment_begin, Content_comment }
28 (* One rule for Tag_beg and Tag_end saves transitions. *)
29 { let s = Lexing.lexeme lexbuf in
31 Tag_end (String.sub s 2 (String.length s - 2), dummy_entity),
34 Tag_beg (String.sub s 1 (String.length s - 1), dummy_entity),
37 | "<![CDATA[" cdata_string "]]>"
38 { let s = Lexing.lexeme lexbuf in
39 Cdata (String.sub s 9 (String.length s - 12)), Content }
41 { raise (WF_error "Declaration either malformed or not allowed in this context")
44 { raise (WF_error ("The left angle bracket '<' must be written as '<'"))
46 | "&#" ascii_digit+ ";"
47 { let s = Lexing.lexeme lexbuf in
48 CRef (int_of_string (String.sub s 2 (String.length s - 3))), Content }
49 | "&#x" ascii_hexdigit+ ";"
50 { let s = Lexing.lexeme lexbuf in
51 CRef (int_of_string ("0x" ^ String.sub s 3 (String.length s - 4))), Content }
53 { let s = Lexing.lexeme lexbuf in
54 ERef (String.sub s 1 (String.length s - 2)), Content }
56 { raise (WF_error ("The ampersand '&' must be written as '&'"))
59 (* LineEnd: Depending on whether we are reading from a primary source
60 * (file) or from the replacement text of an internal entity, line endings
61 * must be normalized (converted to \n) or not.
62 * The entity classes do that. The yacc parser will never see LineEnd;
63 * this token is always converted to the appropriate CharData token.
67 { tok_LineEndCRLF__Content }
69 { tok_LineEndCR__Content }
71 { tok_LineEndLF__Content }
75 { raise (WF_error ("The sequence ']]>' must be written as ']]>'"))
78 { tok_CharDataRBRACKET__Content }
80 { let s = Lexing.lexeme lexbuf in
84 { raise Netconversion.Malformed_code }
87 (* ======================================================================
91 * Revision 1.1 2000/11/17 09:57:32 lpadovan
94 * Revision 1.4 2000/08/18 20:19:59 gerd
95 * Comments return different comment tokens.
97 * Revision 1.3 2000/08/14 22:18:34 gerd
98 * Bad_character_stream -> Netconversion.Malformed_code
100 * Revision 1.2 2000/05/29 23:53:12 gerd
101 * Updated because Markup_* modules have been renamed to Pxp_*.
103 * Revision 1.1 2000/05/20 20:33:25 gerd