+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-{
- type token =
- Lcomment
- | Rcomment
- | Mcomment
- | Ldoctype
- | Rdoctype
- | Mdoctype
- | Lelement of string
- | Lelementend of string
- | Relement
- | Cdata of string
- | Space of int
- | Name of string
- | Is
- | Literal of string
- | Other
- | Eof
-}
-
-(* Simplified rules: Only Latin-1 is recognized as character set *)
-
-let letter = ['A'-'Z' 'a'-'z' '\192'-'\214' '\216'-'\246' '\248'-'\255']
-let extender = '\183'
-let digit = ['0'-'9']
-let hexdigit = ['0'-'9' 'A'-'F' 'a'-'f']
-let namechar = letter | digit | '.' | ':' | '-' | '_' | extender
-let name = ( letter | '_' | ':' ) namechar*
-let nmtoken = namechar+
-let ws = [ ' ' '\t' '\r' '\n' ]
-let string_literal1 = '"' [^ '"' '>' '<' '\n']* '"'
-let string_literal2 = "'" [^ '\'' '>' '<' '\n']* "'"
-
-
-(* This following rules reflect HTML as it is used, not the SGML
- * rules.
- *)
-
-rule scan_document = parse
- | "<!--"
- { Lcomment }
- | "<!"
- { Ldoctype }
- | "<" name
- { let s = Lexing.lexeme lexbuf in
- Lelement (String.sub s 1 (String.length s - 1))
- }
- | "</" name
- { let s = Lexing.lexeme lexbuf in
- Lelementend (String.sub s 2 (String.length s - 2))
- }
- | "<" (* misplaced "<" *)
- { Cdata "<" }
- | eof
- { Eof }
- | [^ '<' ]+
- { Cdata (Lexing.lexeme lexbuf)}
-
-and scan_special = parse
- | "</" name
- { let s = Lexing.lexeme lexbuf in
- Lelementend (String.sub s 2 (String.length s - 2))
- }
- | "<"
- { Cdata "<" }
- | eof
- { Eof }
- | [^ '<' ]+
- { Cdata (Lexing.lexeme lexbuf)}
-
-
-and scan_comment = parse
- | "-->"
- { Rcomment }
- | "-"
- { Mcomment }
- | eof
- { Eof }
- | [^ '-']+
- { Mcomment }
-
-and scan_doctype = parse
- | ">" (* Occurence in strings, and [ ] brackets ignored *)
- { Rdoctype }
- | eof
- { Eof }
- | [^ '>' ] +
- { Mdoctype }
-
-and scan_element = parse
- | ">"
- { Relement }
- | ws+
- { Space (String.length (Lexing.lexeme lexbuf)) }
- | name
- { Name (Lexing.lexeme lexbuf) }
- | "="
- { Is }
- | string_literal1
- { let s = Lexing.lexeme lexbuf in
- Literal (String.sub s 1 (String.length s - 2))
- }
- | string_literal2
- { let s = Lexing.lexeme lexbuf in
- Literal (String.sub s 1 (String.length s - 2))
- }
- | eof
- { Eof }
- | _
- { Other }
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:28 lpadovan
- * Initial revision
- *
- * Revision 1.1 2000/03/03 01:07:25 gerd
- * Initial revision.
- *
- *
- *)