--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+{
+ open Pxp_types
+ open Pxp_lexer_types
+
+#insert pxp_lex_aux.src
+
+#insert open_pxp_lex_aux_*.src
+#insert open_pxp_lex_misc_*.src
+
+}
+
+#insert pxp_lex_defs_*.def
+
+rule scan_content = parse
+ "<?" pi_string "?>"
+ { scan_pi (Lexing.lexeme lexbuf) scan_xml_pi, Content }
+ | "<?"
+ { raise (WF_error ("Illegal processing instruction")) }
+ | "<!--"
+ { Comment_begin, Content_comment }
+ | '<' '/'? name
+ (* One rule for Tag_beg and Tag_end saves transitions. *)
+ { let s = Lexing.lexeme lexbuf in
+ if s.[1] = '/' then
+ Tag_end (String.sub s 2 (String.length s - 2), dummy_entity),
+ Within_tag
+ else
+ Tag_beg (String.sub s 1 (String.length s - 1), dummy_entity),
+ Within_tag
+ }
+ | "<![CDATA[" cdata_string "]]>"
+ { let s = Lexing.lexeme lexbuf in
+ Cdata (String.sub s 9 (String.length s - 12)), Content }
+ | "<!"
+ { raise (WF_error "Declaration either malformed or not allowed in this context")
+ }
+ | "<"
+ { raise (WF_error ("The left angle bracket '<' must be written as '<'"))
+ }
+ | "&#" ascii_digit+ ";"
+ { let s = Lexing.lexeme lexbuf in
+ CRef (int_of_string (String.sub s 2 (String.length s - 3))), Content }
+ | "&#x" ascii_hexdigit+ ";"
+ { let s = Lexing.lexeme lexbuf in
+ CRef (int_of_string ("0x" ^ String.sub s 3 (String.length s - 4))), Content }
+ | "&" name ";"
+ { let s = Lexing.lexeme lexbuf in
+ ERef (String.sub s 1 (String.length s - 2)), Content }
+ | "&"
+ { raise (WF_error ("The ampersand '&' must be written as '&'"))
+ }
+
+ (* LineEnd: Depending on whether we are reading from a primary source
+ * (file) or from the replacement text of an internal entity, line endings
+ * must be normalized (converted to \n) or not.
+ * The entity classes do that. The yacc parser will never see LineEnd;
+ * this token is always converted to the appropriate CharData token.
+ *)
+
+ | '\013' '\010'
+ { tok_LineEndCRLF__Content }
+ | '\013'
+ { tok_LineEndCR__Content }
+ | '\010'
+ { tok_LineEndLF__Content }
+ | eof
+ { tok_Eof__Content }
+ | "]]>"
+ { raise (WF_error ("The sequence ']]>' must be written as ']]>'"))
+ }
+ | "]"
+ { tok_CharDataRBRACKET__Content }
+ | normal_character+
+ { let s = Lexing.lexeme lexbuf in
+ CharData s, Content
+ }
+ | _
+ { raise Netconversion.Malformed_code }
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.4 2000/08/18 20:19:59 gerd
+ * Comments return different comment tokens.
+ *
+ * Revision 1.3 2000/08/14 22:18:34 gerd
+ * Bad_character_stream -> Netconversion.Malformed_code
+ *
+ * Revision 1.2 2000/05/29 23:53:12 gerd
+ * Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)