Initial revision

[helm.git] / helm / DEVEL / pxp / pxp / lexers / pxp_lex_content.src
diff --git a/helm/DEVEL/pxp/pxp/lexers/pxp_lex_content.src b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_content.src

new file mode 100644 (file)

index 0000000..3df2025
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_content.src
@@ -0,0 +1,107 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+{
+  open Pxp_types
+  open Pxp_lexer_types
+
+#insert pxp_lex_aux.src
+
+#insert open_pxp_lex_aux_*.src
+#insert open_pxp_lex_misc_*.src
+
+}
+
+#insert pxp_lex_defs_*.def
+
+rule scan_content = parse
+    "<?" pi_string "?>"
+      { scan_pi (Lexing.lexeme lexbuf) scan_xml_pi, Content }
+  | "<?"
+      { raise (WF_error ("Illegal processing instruction")) }
+  | "<!--"
+      { Comment_begin, Content_comment }
+  | '<' '/'? name
+      (* One rule for Tag_beg and Tag_end saves transitions. *)
+      { let s = Lexing.lexeme lexbuf in
+       if s.[1] = '/' then
+         Tag_end (String.sub s 2 (String.length s - 2), dummy_entity), 
+         Within_tag 
+       else
+         Tag_beg (String.sub s 1 (String.length s - 1), dummy_entity), 
+         Within_tag 
+      }
+  | "<![CDATA[" cdata_string "]]>"
+      { let s = Lexing.lexeme lexbuf in
+       Cdata (String.sub s 9 (String.length s - 12)), Content }
+  | "<!"
+      { raise (WF_error "Declaration either malformed or not allowed in this context") 
+      }
+  | "<"
+      { raise (WF_error ("The left angle bracket '<' must be written as '&lt;'"))
+      }
+  | "&#" ascii_digit+ ";"
+      { let s = Lexing.lexeme lexbuf in
+       CRef (int_of_string (String.sub s 2 (String.length s - 3))), Content }
+  | "&#x" ascii_hexdigit+ ";"
+      { let s = Lexing.lexeme lexbuf in
+       CRef (int_of_string ("0x" ^ String.sub s 3 (String.length s - 4))), Content }
+  | "&" name ";"
+      { let s = Lexing.lexeme lexbuf in
+       ERef (String.sub s 1 (String.length s - 2)), Content }
+  | "&" 
+      { raise (WF_error ("The ampersand '&' must be written as '&amp;'"))
+      }
+
+  (* LineEnd: Depending on whether we are reading from a primary source
+   * (file) or from the replacement text of an internal entity, line endings
+   * must be normalized (converted to \n) or not.
+   * The entity classes do that. The yacc parser will never see LineEnd;
+   * this token is always converted to the appropriate CharData token.
+   *)
+
+  | '\013' '\010'
+      { tok_LineEndCRLF__Content }
+  | '\013'
+      { tok_LineEndCR__Content }
+  | '\010'
+      { tok_LineEndLF__Content }
+  | eof
+      { tok_Eof__Content }
+  | "]]>" 
+      { raise (WF_error ("The sequence ']]>' must be written as ']]&gt;'"))
+      }
+  | "]"
+      { tok_CharDataRBRACKET__Content }
+  | normal_character+
+      { let s = Lexing.lexeme lexbuf in
+       CharData s, Content 
+      }
+  | _
+      { raise Netconversion.Malformed_code }
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.4  2000/08/18 20:19:59  gerd
+ *     Comments return different comment tokens.
+ *
+ * Revision 1.3  2000/08/14 22:18:34  gerd
+ *     Bad_character_stream -> Netconversion.Malformed_code
+ *
+ * Revision 1.2  2000/05/29 23:53:12  gerd
+ *     Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1  2000/05/20 20:33:25  gerd
+ *     Initial revision.
+ *
+ * 
+ *)