X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2FDEVEL%2Fpxp%2Fnetstring%2Fnethtml_scanner.mll;fp=helm%2FDEVEL%2Fpxp%2Fnetstring%2Fnethtml_scanner.mll;h=03e6dea0e2cf31f196785c5bfd7e31bb96c4d677;hb=c03d2c1fdab8d228cb88aaba5ca0f556318bebc5;hp=0000000000000000000000000000000000000000;hpb=758057e85325f94cd88583feb1fdf6b038e35055;p=helm.git diff --git a/helm/DEVEL/pxp/netstring/nethtml_scanner.mll b/helm/DEVEL/pxp/netstring/nethtml_scanner.mll new file mode 100644 index 000000000..03e6dea0e --- /dev/null +++ b/helm/DEVEL/pxp/netstring/nethtml_scanner.mll @@ -0,0 +1,128 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + +{ + type token = + Lcomment + | Rcomment + | Mcomment + | Ldoctype + | Rdoctype + | Mdoctype + | Lelement of string + | Lelementend of string + | Relement + | Cdata of string + | Space of int + | Name of string + | Is + | Literal of string + | Other + | Eof +} + +(* Simplified rules: Only Latin-1 is recognized as character set *) + +let letter = ['A'-'Z' 'a'-'z' '\192'-'\214' '\216'-'\246' '\248'-'\255'] +let extender = '\183' +let digit = ['0'-'9'] +let hexdigit = ['0'-'9' 'A'-'F' 'a'-'f'] +let namechar = letter | digit | '.' | ':' | '-' | '_' | extender +let name = ( letter | '_' | ':' ) namechar* +let nmtoken = namechar+ +let ws = [ ' ' '\t' '\r' '\n' ] +let string_literal1 = '"' [^ '"' '>' '<' '\n']* '"' +let string_literal2 = "'" [^ '\'' '>' '<' '\n']* "'" + + +(* This following rules reflect HTML as it is used, not the SGML + * rules. + *) + +rule scan_document = parse + | "" + { Rcomment } + | "-" + { Mcomment } + | eof + { Eof } + | [^ '-']+ + { Mcomment } + +and scan_doctype = parse + | ">" (* Occurence in strings, and [ ] brackets ignored *) + { Rdoctype } + | eof + { Eof } + | [^ '>' ] + + { Mdoctype } + +and scan_element = parse + | ">" + { Relement } + | ws+ + { Space (String.length (Lexing.lexeme lexbuf)) } + | name + { Name (Lexing.lexeme lexbuf) } + | "=" + { Is } + | string_literal1 + { let s = Lexing.lexeme lexbuf in + Literal (String.sub s 1 (String.length s - 2)) + } + | string_literal2 + { let s = Lexing.lexeme lexbuf in + Literal (String.sub s 1 (String.length s - 2)) + } + | eof + { Eof } + | _ + { Other } + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:28 lpadovan + * Initial revision + * + * Revision 1.1 2000/03/03 01:07:25 gerd + * Initial revision. + * + * + *)