exception Error of int * int * string
let regexp number = xml_digit+
+let regexp utf8_blank = " " | "\r\n" | "\n" | "\t" | [160] (* this is a nbsp *)
(* ZACK: breaks unicode's binder followed by an ascii letter without blank *)
(* let regexp ident_letter = xml_letter *)
true
with Not_found -> false))
+let regexp we_proved = "we" utf8_blank+ "proved"
+let regexp we_have = "we" utf8_blank+ "have"
+let regexp let_rec = "let" utf8_blank+ "rec"
+let regexp let_corec = "let" utf8_blank+ "corec"
let regexp ident_decoration = '\'' | '?' | '`'
let regexp ident_cont = ident_letter | xml_digit | '_'
let regexp ident = ident_letter ident_cont* ident_decoration*
let regexp meta_anonymous = "$_"
let regexp qstring = '"' [^ '"']* '"'
-let regexp begincomment = "(**" xml_blank
+let regexp begincomment = "(**" utf8_blank
let regexp beginnote = "(*"
let regexp endcomment = "*)"
(* let regexp comment_char = [^'*'] | '*'[^')']
let level2_ast_keywords = Hashtbl.create 23
let _ =
List.iter (fun k -> Hashtbl.add level2_ast_keywords k ())
- [ "CProp"; "Prop"; "Type"; "Set"; "let"; "rec"; "corec"; "match";
- "with"; "in"; "and"; "to"; "as"; "on"; "return" ]
+ [ "CProp"; "Prop"; "Type"; "Set"; "let"; "match";
+ "with"; "in"; "and"; "to"; "as"; "on"; "return"; "done" ]
let add_level2_ast_keyword k = Hashtbl.add level2_ast_keywords k ()
let remove_level2_ast_keyword k = Hashtbl.remove level2_ast_keywords k
raise (Error (begin_cnum, end_cnum, msg))
let return_with_loc token begin_cnum end_cnum =
- (* TODO handle line/column numbers *)
- let flocation_begin =
- { Lexing.pos_fname = "";
- Lexing.pos_lnum = -1; Lexing.pos_bol = -1;
- Lexing.pos_cnum = begin_cnum }
- in
- let flocation_end = { flocation_begin with Lexing.pos_cnum = end_cnum } in
- (token, (flocation_begin, flocation_end))
+ let flocation = HExtlib.floc_of_loc (begin_cnum,end_cnum) in
+ token, flocation
let return lexbuf token =
let begin_cnum, end_cnum = Ulexing.loc lexbuf in
(* let lexbuf = Ulexing.from_utf8_stream stream in *)
(** XXX Obj.magic rationale.
* The problem.
- * camlp4 constraints the tok_func field of Token.glexer to have type:
+ * camlp5 constraints the tok_func field of Token.glexer to have type:
* Stream.t char -> (Stream.t 'te * flocation_function)
* In order to use ulex we have (in theory) to instantiate a new lexbuf each
* time a char Stream.t is passed, destroying the previous lexbuf which may
* have consumed a character from the old stream which is lost forever :-(
* The "solution".
- * Instead of passing to camlp4 a char Stream.t we pass a lexbuf, casting it to
+ * Instead of passing to camlp5 a char Stream.t we pass a lexbuf, casting it to
* char Stream.t with Obj.magic where needed.
*)
let lexbuf = Obj.magic stream in
- Token.make_stream_and_flocation
+ Token.make_stream_and_location
(fun () ->
try
token lexbuf
let rec level2_meta_token =
lexer
- | xml_blank+ -> level2_meta_token lexbuf
+ | utf8_blank+ -> level2_meta_token lexbuf
| ident ->
let s = Ulexing.utf8_lexeme lexbuf in
begin
and level2_ast_token =
lexer
- | xml_blank+ -> ligatures_token level2_ast_token lexbuf
+ | let_rec -> return lexbuf ("LETREC","")
+ | let_corec -> return lexbuf ("LETCOREC","")
+ | we_proved -> return lexbuf ("WEPROVED","")
+ | we_have -> return lexbuf ("WEHAVE","")
+ | utf8_blank+ -> ligatures_token level2_ast_token lexbuf
| meta ->
let s = Ulexing.utf8_lexeme lexbuf in
return lexbuf ("META", String.sub s 1 (String.length s - 1))
and level1_pattern_token =
lexer
- | xml_blank+ -> ligatures_token level1_pattern_token lexbuf
+ | utf8_blank+ -> ligatures_token level1_pattern_token lexbuf
| number -> return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf)
| ident ->
let s = Ulexing.utf8_lexeme lexbuf in