exception Error of int * int * string
let regexp number = xml_digit+
+let regexp utf8_blank = " " | "\n" | "\t" | [160] (* this is a nbsp *)
(* ZACK: breaks unicode's binder followed by an ascii letter without blank *)
(* let regexp ident_letter = xml_letter *)
let regexp meta_anonymous = "$_"
let regexp qstring = '"' [^ '"']* '"'
-let regexp begincomment = "(**" xml_blank
+let regexp begincomment = "(**" utf8_blank
let regexp beginnote = "(*"
let regexp endcomment = "*)"
(* let regexp comment_char = [^'*'] | '*'[^')']
[ ("->", <:unicode<to>>); ("=>", <:unicode<Rightarrow>>);
("<=", <:unicode<leq>>); (">=", <:unicode<geq>>);
("<>", <:unicode<neq>>); (":=", <:unicode<def>>);
+ ("==", <:unicode<equiv>>);
]
-let regexp uri_step = [ 'a' - 'z' 'A' - 'Z' '0' - '9' '_' '-' ]+
+let regexp uri_step = [ 'a' - 'z' 'A' - 'Z' '0' - '9' '_' '-' ''' ]+
let regexp uri =
("cic:/" | "theory:/") (* schema *)
let rec level2_meta_token =
lexer
- | xml_blank+ -> level2_meta_token lexbuf
+ | utf8_blank+ -> level2_meta_token lexbuf
| ident ->
let s = Ulexing.utf8_lexeme lexbuf in
begin
and level2_ast_token =
lexer
- | xml_blank+ -> ligatures_token level2_ast_token lexbuf
- | meta -> return lexbuf ("META", Ulexing.utf8_lexeme lexbuf)
+ | utf8_blank+ -> ligatures_token level2_ast_token lexbuf
+ | meta ->
+ let s = Ulexing.utf8_lexeme lexbuf in
+ return lexbuf ("META", String.sub s 1 (String.length s - 1))
| implicit -> return lexbuf ("IMPLICIT", "")
| placeholder -> return lexbuf ("PLACEHOLDER", "")
| ident ->
and level1_pattern_token =
lexer
- | xml_blank+ -> ligatures_token level1_pattern_token lexbuf
+ | utf8_blank+ -> ligatures_token level1_pattern_token lexbuf
| number -> return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf)
| ident ->
let s = Ulexing.utf8_lexeme lexbuf in