let regexp ligature_char = [ "'`~!?@*()[]<>-+=|:;.,/\"" ]
let regexp ligature = ligature_char ligature_char+
-let is_ligature_char =
- (* must be in sync with "regexp ligature_char" above *)
- let chars = "'`~!?@*()[]<>-+=|:;.,/\"" in
- (fun char ->
- (try
- ignore (String.index chars char);
- true
- with Not_found -> false))
-
let regexp we_proved = "we" utf8_blank+ "proved"
let regexp we_have = "we" utf8_blank+ "have"
let regexp let_rec = "let" utf8_blank+ "rec"
let regexp let_corec = "let" utf8_blank+ "corec"
+let regexp nlet_rec = "nlet" utf8_blank+ "rec"
+let regexp nlet_corec = "nlet" utf8_blank+ "corec"
let regexp ident_decoration = '\'' | '?' | '`'
let regexp ident_cont = ident_letter | xml_digit | '_'
-let regexp ident = ident_letter ident_cont* ident_decoration*
+let regexp ident_start = ident_letter
+let regexp ident = ident_letter ident_cont* ident_decoration*
+let regexp variable_ident = '_' '_' number
+let regexp pident = '_' ident
let regexp tex_token = '\\' ident
let regexp delim_begin = "\\["
let regexp delim_end = "\\]"
-let regexp qkeyword = "'" ident "'"
+let regexp qkeyword = "'" ( ident | pident ) "'"
let regexp implicit = '?'
let regexp placeholder = '%'
[ "sub"; "sup";
"below"; "above";
"over"; "atop"; "frac";
- "sqrt"; "root"
+ "sqrt"; "root"; "mstyle" ; "mpadded"; "maction"
+
]
let level1_keywords =
"break";
"list0"; "list1"; "sep";
"opt";
- "term"; "ident"; "number"; "mstyle" ; "mpadded"
+ "term"; "ident"; "number";
] @ level1_layouts
let level2_meta_keywords =
List.iter
(fun (ligature, symbol) -> Hashtbl.add ligatures ligature symbol)
[ ("->", <:unicode<to>>); ("=>", <:unicode<Rightarrow>>);
- ("<=", <:unicode<leq>>); (">=", <:unicode<geq>>);
- ("<>", <:unicode<neq>>); (":=", <:unicode<def>>);
- ("==", <:unicode<equiv>>);
+ (":=", <:unicode<def>>);
]
let regexp uri_step = [ 'a' - 'z' 'A' - 'Z' '0' - '9' '_' '-' ''' ]+
('.' ident)+ (* ext *)
("#xpointer(" number ('/' number)+ ")")? (* xpointer *)
+let regexp nreference =
+ "cic:/" (* schema *)
+ uri_step ('/' uri_step)* (* path *)
+ '.'
+ ( "dec"
+ | "def" "(" number ")"
+ | "fix" "(" number "," number "," number ")"
+ | "cfx" "(" number ")"
+ | "ind" "(" number "," number "," number ")"
+ | "con" "(" number "," number "," number ")") (* ext + reference *)
+
let error lexbuf msg =
let begin_cnum, end_cnum = Ulexing.loc lexbuf in
raise (Error (begin_cnum, end_cnum, msg))
in
try
("SYMBOL", Utf8Macro.expand macro)
- with Utf8Macro.Macro_not_found _ -> "SYMBOL", Ulexing.utf8_lexeme lexbuf
+ with Utf8Macro.Macro_not_found _ ->
+(* FG: unexpanded TeX macros are terminated by a space for rendering *)
+ "SYMBOL", (Ulexing.utf8_lexeme lexbuf ^ " ")
let remove_quotes s = String.sub s 1 (String.length s - 2)
let remove_left_quote s = String.sub s 1 (String.length s - 1)
let end_cnum = level2_pattern_token_group 0 buffer lexbuf in
return_with_loc (token_name, Buffer.contents buffer) begin_cnum end_cnum
+let handle_keywords lexbuf k name =
+ let s = Ulexing.utf8_lexeme lexbuf in
+ if k s then
+ return lexbuf ("", s)
+ else
+ return lexbuf (name, s)
+;;
+
let rec level2_meta_token =
lexer
| utf8_blank+ -> level2_meta_token lexbuf
| ident ->
- let s = Ulexing.utf8_lexeme lexbuf in
- begin
- if List.mem s level2_meta_keywords then
- return lexbuf ("", s)
- else
- return lexbuf ("IDENT", s)
- end
+ handle_keywords lexbuf (fun x -> List.mem x level2_meta_keywords) "IDENT"
+ | variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf)
+ | pident ->
+ handle_keywords lexbuf (fun x -> List.mem x level2_meta_keywords) "PIDENT"
| "@{" -> read_unparsed_group "UNPARSED_AST" lexbuf
| ast_ident ->
return lexbuf ("UNPARSED_AST",
lexer
| let_rec -> return lexbuf ("LETREC","")
| let_corec -> return lexbuf ("LETCOREC","")
+ | nlet_rec -> return lexbuf ("NLETREC","")
+ | nlet_corec -> return lexbuf ("NLETCOREC","")
| we_proved -> return lexbuf ("WEPROVED","")
| we_have -> return lexbuf ("WEHAVE","")
| utf8_blank+ -> ligatures_token level2_ast_token lexbuf
return lexbuf ("META", String.sub s 1 (String.length s - 1))
| implicit -> return lexbuf ("IMPLICIT", "")
| placeholder -> return lexbuf ("PLACEHOLDER", "")
- | ident ->
- let lexeme = Ulexing.utf8_lexeme lexbuf in
- if Hashtbl.mem !level2_ast_keywords lexeme then
- return lexbuf ("", lexeme)
- else
- return lexbuf ("IDENT", lexeme)
+ | ident -> handle_keywords lexbuf (Hashtbl.mem !level2_ast_keywords) "IDENT"
+ | variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf)
+ | pident -> handle_keywords lexbuf (Hashtbl.mem !level2_ast_keywords) "PIDENT"
| number -> return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf)
| tex_token -> return lexbuf (expand_macro lexbuf)
+ | nreference -> return lexbuf ("NREF", Ulexing.utf8_lexeme lexbuf)
| uri -> return lexbuf ("URI", Ulexing.utf8_lexeme lexbuf)
| qstring ->
return lexbuf ("QSTRING", remove_quotes (Ulexing.utf8_lexeme lexbuf))
lexer
| utf8_blank+ -> ligatures_token level1_pattern_token lexbuf
| number -> return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf)
- | ident ->
- let s = Ulexing.utf8_lexeme lexbuf in
- begin
- if List.mem s level1_keywords then
- return lexbuf ("", s)
- else
- return lexbuf ("IDENT", s)
- end
+ | ident ->handle_keywords lexbuf (fun x -> List.mem x level1_keywords) "IDENT"
+ | variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf)
+ | pident->handle_keywords lexbuf (fun x->List.mem x level1_keywords) "PIDENT"
| color -> return lexbuf ("COLOR", Ulexing.utf8_lexeme lexbuf)
| percentage ->
return lexbuf ("PERCENTAGE", Ulexing.utf8_lexeme lexbuf)
let level2_ast_lexer () = !level2_ast_lexer_ref
let level2_meta_lexer () = !level2_meta_lexer_ref
-let lookup_ligatures lexeme =
- try
- if lexeme.[0] = '\\'
- then [ Utf8Macro.expand (String.sub lexeme 1 (String.length lexeme - 1)) ]
- else List.rev (Hashtbl.find_all ligatures lexeme)
- with Invalid_argument _ | Utf8Macro.Macro_not_found _ -> []
-;;
-
let history = ref [];;
let push () =