This commit was manufactured by cvs2svn to create branch 'init'.

[helm.git] / helm / DEVEL / pxp / pxp / pxp_lexer_types.ml
diff --git a/helm/DEVEL/pxp/pxp/pxp_lexer_types.ml b/helm/DEVEL/pxp/pxp/pxp_lexer_types.ml

deleted file mode 100644 (file)

index 988e9d0..0000000
--- a/helm/DEVEL/pxp/pxp/pxp_lexer_types.ml
+++ /dev/null
@@ -1,248 +0,0 @@
-(* $Id$
- * ----------------------------------------------------------------------
- * PXP: The polymorphic XML parser for Objective Caml.
- * Copyright by Gerd Stolpmann. See LICENSE for details.
- *)
-
-type lexers =
-    Document
-  | Document_type
-  | Content
-  | Within_tag
-  | Declaration
-  | Content_comment
-  | Decl_comment
-  | Document_comment
-  | Ignored_section
-
-
-type prolog_token =
-    Pro_name of string
-  | Pro_eq                  (* "=" *)
-  | Pro_string of string    (* "..." or '...' *)
-  | Pro_eof
-
-
-type entity_id = < >
-  (* The class without properties; but you can still compare if two objects
-   * are the same.
-   *)
-
-type token = 
-  | Begin_entity             (* Beginning of entity *)
-  | End_entity               (* End of entity *)
-  | Comment_begin            (* <!-- *)
-  | Comment_material of string (* within a comment *)
-  | Comment_end              (* --> *)
-  | Ignore                   (* ignored whitespace *)
-  | Eq                       (* = *)
-  | Rangle                   (* > as tag delimiter *)
-  | Rangle_empty             (* /> as tag delimiter *)
-  | Percent                  (* % followed by space in declaration *)
-  | Plus                     (* + in declaration *)
-  | Star                     (* * in declaration *)
-  | Bar                      (* | in declaration *)
-  | Comma                    (* , in declaration *)
-  | Qmark                    (* ? in declaration *)
-  | Pcdata                   (* #PCDATA in declaration *)
-  | Required                 (* #REQUIRED in declaration *)
-  | Implied                  (* #IMPLIED in declaration *)
-  | Fixed                    (* #FIXED in declaration *)
-  | Bof                      (* A marker for 'beginning of file' *)
-  | Eof                      (* End of file *)
-  | Conditional_begin of entity_id  (* <![ in declaration *)
-  | Conditional_body  of entity_id  (* [ in declaration *)
-  | Conditional_end   of entity_id  (* ]]> in declaration *)
-  | Doctype        of entity_id  (* <!DOCTYPE *)
-  | Doctype_rangle of entity_id  (* > as DOCTYPE delimiter *)
-  | Dtd_begin      of entity_id  (* '[' after DOCTYPE *)
-  | Dtd_end        of entity_id  (* ']' *)
-  | Decl_element   of entity_id  (* <!ELEMENT *)
-  | Decl_attlist   of entity_id  (* <!ATTLIST *)
-  | Decl_entity    of entity_id  (* <!ENTITY *)
-  | Decl_notation  of entity_id  (* <!NOTATION *)
-  | Decl_rangle    of entity_id  (* > *)
-  | Lparen         of entity_id  (* ( in declaration *)
-  | Rparen         of entity_id  (* ) in declaration *)
-  | RparenPlus     of entity_id  (* )+ in declaration *)
-  | RparenStar     of entity_id  (* )* in declaration *)
-  | RparenQmark    of entity_id  (* )? in declaration *)
-      
-  | Tag_beg of (string*entity_id)     (* <name *)
-  | Tag_end of (string*entity_id)     (* </name *)
-
-  | PI        of (string*string)      (* <?name ... ?> *)
-  | PI_xml    of (prolog_token list)  (* <?xml ...?> *)
-  | Cdata     of string               (* <![CDATA[...]]> *)
-  | CRef      of int                  (* &#digits; *)
-  | ERef      of string               (* &name; *)
-  | PERef     of string               (* %name; *)
-  | CharData  of string             (* any characters not otherwise matching *)
-  | LineEnd   of string
-  | Name      of string               (* name *)
-  | Nametoken of string               (* nmtoken but not name *)
-  | Attval    of string           (* attribute value; may contain entity refs *)
-  | Attval_nl_normalized of string
-  | Unparsed_string      of string    (* "data" or 'data' *)
-      
-
-(**********************************************************************)
-(* debugging *)
-
-let string_of_tok tok =
-  match tok with
-    Begin_entity -> "Begin_entity"
-  | End_entity -> "End_entity"
-  | Doctype _ -> "Doctype"
-  | Doctype_rangle _ -> "Doctype_rangle"
-  | Comment_begin -> "Comment_begin"
-  | Comment_end -> "Comment_end"
-  | Comment_material _ -> "Comment_material"
-  | Rangle -> "Rangle"
-  | Rangle_empty -> "Rangle_empty"
-  | Ignore -> "Ignore"
-  | Eq -> "Eq"
-  | Dtd_begin _ -> "Dtd_begin"
-  | Dtd_end _ -> "Dtd_end"
-  | Conditional_begin _ -> "Conditional_begin"
-  | Conditional_body _ -> "Conditional_body"
-  | Conditional_end _ -> "Conditional_end"
-  | Percent -> "Percent"
-  | Lparen _ -> "Lparen"
-  | Rparen _ -> "Rparen"
-  | Plus -> "Plus"
-  | Star -> "Star"
-  | Bar -> "Bar"
-  | Comma -> "Comma"
-  | Qmark -> "Qmark"
-  | Pcdata -> "Pcdata"
-  | Required -> "Required"
-  | Implied -> "Implied"
-  | Fixed -> "Fixed"
-  | Decl_element _ -> "Decl_element"
-  | Decl_attlist _ -> "Decl_attlist"
-  | Decl_entity _ -> "Decl_entity"
-  | Decl_notation _ -> "Decl_notation"
-  | Decl_rangle _ -> "Decl_rangle"
-  | RparenPlus _ -> "RparenPlus"
-  | RparenStar _ -> "RparenStar"
-  | RparenQmark _ -> "RparenQmark"
-  | Bof -> "Bof"
-  | Eof -> "Eof"
-  | PI _ -> "PI"
-  | PI_xml _ -> "PI_xml"
-  | Tag_beg _ -> "Tag_beg"
-  | Tag_end _ -> "Tag_end"
-  | Cdata _ -> "Cdata"
-  | CRef _ -> "CRef"
-  | ERef _ -> "ERef"
-  | PERef _ -> "PERef"
-  | CharData _ -> "CharData"
-  | Name _ -> "Name" 
-  | Nametoken _ -> "Nametoken" 
-  | Attval _ -> "Attval" 
-  | Attval_nl_normalized _ -> "Attval_nl_normalized"
-  | Unparsed_string _ -> "Unparsed_string" 
-  | LineEnd _ -> "LineEnd"
-
-
-type lexer_set =
-    { lex_encoding         : Pxp_types.rep_encoding;
-      scan_document        : Lexing.lexbuf -> (token * lexers);
-      scan_content         : Lexing.lexbuf -> (token * lexers);
-      scan_within_tag      : Lexing.lexbuf -> (token * lexers);
-      scan_document_type   : Lexing.lexbuf -> (token * lexers);
-      scan_declaration     : Lexing.lexbuf -> (token * lexers);
-      scan_content_comment : Lexing.lexbuf -> (token * lexers);
-      scan_decl_comment    : Lexing.lexbuf -> (token * lexers);
-      scan_document_comment: Lexing.lexbuf -> (token * lexers);
-      scan_ignored_section : Lexing.lexbuf -> (token * lexers);
-      scan_xml_pi          : Lexing.lexbuf -> prolog_token;
-      scan_dtd_string      : Lexing.lexbuf -> token;
-      scan_content_string  : Lexing.lexbuf -> token;
-      scan_name_string     : Lexing.lexbuf -> token;
-      scan_only_xml_decl   : Lexing.lexbuf -> token;
-      scan_for_crlf        : Lexing.lexbuf -> token;
-    }
-
-(* ======================================================================
- * History:
- * 
- * $Log$
- * Revision 1.1  2000/11/17 09:57:29  lpadovan
- * Initial revision
- *
- * Revision 1.2  2000/08/18 20:14:31  gerd
- *     Comment -> Comment_begin, Comment_material, Comment_end.
- *
- * Revision 1.1  2000/05/29 23:48:38  gerd
- *     Changed module names:
- *             Markup_aux          into Pxp_aux
- *             Markup_codewriter   into Pxp_codewriter
- *             Markup_document     into Pxp_document
- *             Markup_dtd          into Pxp_dtd
- *             Markup_entity       into Pxp_entity
- *             Markup_lexer_types  into Pxp_lexer_types
- *             Markup_reader       into Pxp_reader
- *             Markup_types        into Pxp_types
- *             Markup_yacc         into Pxp_yacc
- * See directory "compatibility" for (almost) compatible wrappers emulating
- * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
- *
- * ======================================================================
- * Old logs from markup_lexer_types.ml:
- *
- * Revision 1.6  2000/05/29 21:14:57  gerd
- *     Changed the type 'encoding' into a polymorphic variant.
- *
- * Revision 1.5  2000/05/20 20:31:40  gerd
- *     Big change: Added support for various encodings of the
- * internal representation.
- *
- * Revision 1.4  2000/05/14 17:45:36  gerd
- *     Bugfix.
- *
- * Revision 1.3  2000/05/14 17:35:12  gerd
- *     Conditional_begin, _end, and _body have an entity_id.
- *
- * Revision 1.2  2000/05/08 21:59:06  gerd
- *     New token Bof (beginning of file).
- *
- * Revision 1.1  2000/05/06 23:21:49  gerd
- *     Initial revision.
- *
- *
- * ======================================================================
- *
- * DERIVED FROM REVISION 1.4 of markup_lexer_types_shadow.ml
- *
- * Revision 1.4  2000/04/30 18:19:04  gerd
- *     Added new tokens.
- *
- * Revision 1.3  1999/08/31 19:13:31  gerd
- *     Added checks on proper PE nesting. The idea is that tokens such
- * as Decl_element and Decl_rangle carry an entity ID with them. This ID
- * is simply an object of type < >, i.e. you can only test on identity.
- * The lexer always produces tokens with a dummy ID because it does not
- * know which entity is the current one. The entity layer replaces the dummy
- * ID with the actual ID. The parser checks that the IDs of pairs such as
- * Decl_element and Decl_rangle are the same; otherwise a Validation_error
- * is produced.
- *
- * Revision 1.2  1999/08/10 21:35:08  gerd
- *     The XML/encoding declaration at the beginning of entities is
- * evaluated. In particular, entities have now a method "xml_declaration"
- * which returns the name/value pairs of such a declaration. The "encoding"
- * setting is interpreted by the entity itself; "version", and "standalone"
- * are interpreted by Markup_yacc.parse_document_entity. Other settings
- * are ignored (this does not conform to the standard; the standard prescribes
- * that "version" MUST be given in the declaration of document; "standalone"
- * and "encoding" CAN be declared; no other settings are allowed).
- *     TODO: The user should be warned if the standard is not exactly
- * fulfilled. -- The "standalone" property is not checked yet.
- *
- * Revision 1.1  1999/08/10 00:35:51  gerd
- *     Initial revision.
- *
- * 
- *)