+let level2_ast_token loctable status =
+ prerr_endline ("X keywords = " ^ (String.concat ", "
+ (StringSet.elements status)));
+ (* start = starting point of last open A tag (default -1 = no tag) *)
+ let rec aux desc href start =
+ lexer
+ | let_rec -> return lexbuf ("LETREC","")
+ | let_corec -> return lexbuf ("LETCOREC","")
+ | we_proved -> return lexbuf ("WEPROVED","")
+ | we_have -> return lexbuf ("WEHAVE","")
+ | utf8_blank+ -> ligatures_token (aux desc href start) lexbuf
+ | meta ->
+ let s = Ulexing.utf8_lexeme lexbuf in
+ return lexbuf ("META", String.sub s 1 (String.length s - 1))
+ | implicit -> return lexbuf ("IMPLICIT", "")
+ | placeholder -> return lexbuf ("PLACEHOLDER", "")
+ | hreftag ->
+ let start = Ulexing.lexeme_start lexbuf in
+ aux_attr None None start lexbuf
+ | hrefclose -> return lexbuf ("LEXING_ERROR", Ulexing.utf8_lexeme lexbuf)
+ (* ignore other tags *)
+ | generictag
+ | closetag -> ligatures_token (aux desc href start) lexbuf
+ | ident ->
+ if start <> ~-1 then
+ let idtok,_ =
+ handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT"
+ in aux_close_tag desc href start idtok lexbuf
+(* ("IDENT", Ulexing.utf8_lexeme lexbuf) lexbuf *)
+ else
+ handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT"
+ | variable_ident ->
+ return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf)
+ | pident -> if start <> ~-1 then
+ aux_close_tag desc href start ("PIDENT", Ulexing.utf8_lexeme lexbuf) lexbuf
+ else
+ handle_keywords lexbuf (fun x -> StringSet.mem x status) "PIDENT"
+ | number -> let token = "NUMBER", Ulexing.utf8_lexeme lexbuf
+ in
+ if start <> ~-1 then
+ aux_close_tag desc href start token lexbuf
+ else return lexbuf token
+ | tex_token -> let token = expand_macro lexbuf
+ in
+ if start <> ~-1 then
+ aux_close_tag desc href start token lexbuf
+ else return lexbuf token
+ | nreference -> return lexbuf ("NREF", Ulexing.utf8_lexeme lexbuf)
+ | uri -> return lexbuf ("URI", Ulexing.utf8_lexeme lexbuf)
+ | qstring ->
+ return lexbuf ("QSTRING", remove_quotes (Ulexing.utf8_lexeme lexbuf))
+ | csymbol ->
+ return lexbuf ("CSYMBOL", remove_left_quote (Ulexing.utf8_lexeme lexbuf))
+ | "${" -> read_unparsed_group "UNPARSED_META" lexbuf
+ | "@{" -> read_unparsed_group "UNPARSED_AST" lexbuf
+ | '(' -> return lexbuf ("LPAREN", "")
+ | ')' -> return lexbuf ("RPAREN", "")
+ | meta_ident ->
+ return lexbuf ("UNPARSED_META",
+ remove_left_quote (Ulexing.utf8_lexeme lexbuf))
+ | meta_anonymous -> return lexbuf ("UNPARSED_META", "anonymous")
+ | beginnote ->
+ let _comment = comment_token (Ulexing.utf8_lexeme lexbuf) 0 lexbuf in
+ (* let comment =
+ Ulexing.utf8_sub_lexeme lexbuf 2 (Ulexing.lexeme_length lexbuf - 4)
+ in
+ return lexbuf ("NOTE", comment) *)
+ ligatures_token (aux desc href start) lexbuf
+ | begincomment -> return lexbuf ("BEGINCOMMENT","")
+ | endcomment -> return lexbuf ("ENDCOMMENT","")
+ | eof -> return_eoi lexbuf
+ | _ -> let token = "SYMBOL", (Ulexing.utf8_lexeme lexbuf)
+ in
+ if start <> ~-1 then
+ aux_close_tag desc href start token lexbuf
+ else return lexbuf token
+
+ and aux_attr desc href start = lexer
+ | utf8_blank+ -> ligatures_token (aux_attr desc href start) lexbuf
+ | href ->
+ aux_attr desc
+ (Some (Ulexing.utf8_sub_lexeme lexbuf 6 (Ulexing.lexeme_length lexbuf - 7)))
+ start lexbuf
+ | hreftitle ->
+ aux_attr
+ (Some (Ulexing.utf8_sub_lexeme lexbuf 7 (Ulexing.lexeme_length lexbuf - 8)))
+ href start lexbuf
+ | ymarkup -> aux desc href start lexbuf
+ | _ -> return lexbuf ("LEXING_ERROR", Ulexing.utf8_lexeme lexbuf)
+
+(* and aux_in_tag desc href = lexer
+ | ident -> loctable :=
+ update_table (loc_of_buf lexbuf) desc href !loctable;
+ handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT"
+ | variable_ident ->
+ return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf)
+ | pident -> loctable :=
+ update_table (loc_of_buf lexbuf) desc href !loctable;
+ handle_keywords lexbuf (fun x -> StringSet.mem x status) "PIDENT"
+ | number -> loctable :=
+ update_table (loc_of_buf lexbuf) desc href !loctable;
+ return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf)
+ | tex_token -> loctable :=
+ update_table (loc_of_buf lexbuf) desc href !loctable;
+ return lexbuf (expand_macro lexbuf)
+ | _ -> loctable :=
+ update_table (loc_of_buf lexbuf) desc href !loctable;
+ return_symbol lexbuf (Ulexing.utf8_lexeme lexbuf)
+ *)
+ and aux_close_tag desc href start token = lexer
+ | hrefclose -> let _,b = Ulexing.loc lexbuf in
+ loctable := update_table (HExtlib.floc_of_loc (start,b)) desc href !loctable;
+ prerr_endline
+ (Printf.sprintf "adding loc (%d,%d) to table" start b);
+ return_with_loc token start b
+ | _ -> return lexbuf ("LEXING_ERROR", Ulexing.utf8_lexeme lexbuf)
+ in aux None None ~-1
+
+let rec level1_pattern_token =