let regexp ident_letter = [ 'a' - 'z' 'A' - 'Z' ]
+ (* must be in sync with "is_ligature_char" below *)
let regexp ligature_char = [ "'`~!?@*()[]<>-+=|:;.,/\"" ]
let regexp ligature = ligature_char ligature_char+
+let is_ligature_char =
+ (* must be in sync with "regexp ligature_char" above *)
+ let chars = "'`~!?@*()[]<>-+=|:;.,/\"" in
+ (fun char ->
+ (try
+ ignore (String.index chars char);
+ true
+ with Not_found -> false))
+
let regexp ident_decoration = '\'' | '?' | '`'
let regexp ident_cont = ident_letter | xml_digit | '_'
let regexp ident = ident_letter ident_cont* ident_decoration*
let regexp qstring = '"' [^ '"']* '"'
let regexp begincomment = "(**" xml_blank
+let regexp beginnote = "(*"
let regexp endcomment = "*)"
-let regexp comment_char = [^'*'] | '*'[^')']
-let regexp note = "(*" ([^'*'] | "**") comment_char* "*)"
+(* let regexp comment_char = [^'*'] | '*'[^')']
+let regexp note = "|+" ([^'*'] | "**") comment_char* "+|" *)
let level1_layouts =
[ "sub"; "sup";
let level2_ast_keywords = Hashtbl.create 23
let _ =
List.iter (fun k -> Hashtbl.add level2_ast_keywords k ())
- [ "CProp"; "Prop"; "Type"; "Set"; "let"; "rec"; "corec"; "using"; "match";
- "with"; "in"; "and"; "to"; "as"; "on"; "names" ]
+ [ "CProp"; "Prop"; "Type"; "Set"; "let"; "rec"; "corec"; "match";
+ "with"; "in"; "and"; "to"; "as"; "on"; "return" ]
let add_level2_ast_keyword k = Hashtbl.add level2_ast_keywords k ()
let remove_level2_ast_keyword k = Hashtbl.remove level2_ast_keywords k
remove_left_quote (Ulexing.utf8_lexeme lexbuf))
| eof -> return_eoi lexbuf
+let rec comment_token acc depth =
+ lexer
+ | beginnote ->
+ let acc = acc ^ Ulexing.utf8_lexeme lexbuf in
+ comment_token acc (depth + 1) lexbuf
+ | endcomment ->
+ let acc = acc ^ Ulexing.utf8_lexeme lexbuf in
+ if depth = 0
+ then acc
+ else comment_token acc (depth - 1) lexbuf
+ | _ ->
+ let acc = acc ^ Ulexing.utf8_lexeme lexbuf in
+ comment_token acc depth lexbuf
+
(** @param k continuation to be invoked when no ligature has been found *)
let rec ligatures_token k =
lexer
| ligature ->
let lexeme = Ulexing.utf8_lexeme lexbuf in
- (match Hashtbl.find_all ligatures lexeme with
+ (match List.rev (Hashtbl.find_all ligatures lexeme) with
| [] -> (* ligature not found, rollback and try default lexer *)
Ulexing.rollback lexbuf;
k lexbuf
- | ligs -> (* ligatures found, use the default one *)
- let default_lig = List.hd (List.rev ligs) in
+ | default_lig :: _ -> (* ligatures found, use the default one *)
return_symbol lexbuf default_lig)
| eof -> return_eoi lexbuf
| _ -> (* not a ligature, rollback and try default lexer *)
return lexbuf ("UNPARSED_META",
remove_left_quote (Ulexing.utf8_lexeme lexbuf))
| meta_anonymous -> return lexbuf ("UNPARSED_META", "anonymous")
- | note ->
- let comment =
+ | beginnote ->
+ let comment = comment_token (Ulexing.utf8_lexeme lexbuf) 0 lexbuf in
+(* let comment =
Ulexing.utf8_sub_lexeme lexbuf 2 (Ulexing.lexeme_length lexbuf - 4)
in
- return lexbuf ("NOTE", comment)
+ return lexbuf ("NOTE", comment) *)
+ ligatures_token level2_ast_token lexbuf
| begincomment -> return lexbuf ("BEGINCOMMENT","")
| endcomment -> return lexbuf ("ENDCOMMENT","")
| eof -> return_eoi lexbuf
let level2_ast_lexer = mk_lexer level2_ast_token
let level2_meta_lexer = mk_lexer level2_meta_token
+let lookup_ligatures lexeme =
+ try
+ if lexeme.[0] = '\\'
+ then [ Utf8Macro.expand (String.sub lexeme 1 (String.length lexeme - 1)) ]
+ else List.rev (Hashtbl.find_all ligatures lexeme)
+ with Invalid_argument _ | Utf8Macro.Macro_not_found _ as exn -> []
+