"break";
"list0"; "list1"; "sep";
"opt";
- "term"; "ident"; "number";
+ "term"; "ident"; "number"; "ref"
] @ level1_layouts
let level2_meta_keywords =
- [ "if"; "then"; "elCicNotationParser.se";
+ [ "if"; "then"; "else";
"fold"; "left"; "right"; "rec";
"fail";
"default";
let end_cnum = level2_pattern_token_group 0 buffer lexbuf in
return_with_loc (token_name, Buffer.contents buffer) begin_cnum end_cnum
-let handle_keywords lexbuf k name =
+let handle_keywords lexbuf k name =
let s = Ulexing.utf8_lexeme lexbuf in
+ prerr_endline (Printf.sprintf "handling \"%s\" as a keyword or %s?" s name);
if k s then
return lexbuf ("", s)
else
;;
let level2_ast_token loctable status =
- let rec aux desc href in_tag =
+ prerr_endline ("X keywords = " ^ (String.concat ", "
+ (StringSet.elements status)));
+ (* start = starting point of last open A tag (default -1 = no tag) *)
+ let rec aux desc href start =
lexer
| let_rec -> return lexbuf ("LETREC","")
| let_corec -> return lexbuf ("LETCOREC","")
| we_proved -> return lexbuf ("WEPROVED","")
| we_have -> return lexbuf ("WEHAVE","")
- | utf8_blank+ -> ligatures_token (aux desc href in_tag) lexbuf
+ | utf8_blank+ -> ligatures_token (aux desc href start) lexbuf
| meta ->
let s = Ulexing.utf8_lexeme lexbuf in
return lexbuf ("META", String.sub s 1 (String.length s - 1))
| implicit -> return lexbuf ("IMPLICIT", "")
| placeholder -> return lexbuf ("PLACEHOLDER", "")
- | hreftag -> aux_attr None None lexbuf
+ | hreftag ->
+ let start = Ulexing.lexeme_start lexbuf in
+ aux_attr None None start lexbuf
| hrefclose -> return lexbuf ("LEXING_ERROR", Ulexing.utf8_lexeme lexbuf)
(* ignore other tags *)
| generictag
- | closetag -> ligatures_token (aux desc href in_tag) lexbuf
- | ident -> if in_tag then
- aux_close_tag desc href ("IDENT", Ulexing.utf8_lexeme lexbuf) lexbuf
- else handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT"
+ | closetag -> ligatures_token (aux desc href start) lexbuf
+ | ident ->
+ if start <> ~-1 then
+ let idtok,_ =
+ handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT"
+ in aux_close_tag desc href start idtok lexbuf
+(* ("IDENT", Ulexing.utf8_lexeme lexbuf) lexbuf *)
+ else
+ handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT"
| variable_ident ->
return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf)
- | pident -> if in_tag then
- aux_close_tag desc href ("PIDENT", Ulexing.utf8_lexeme lexbuf) lexbuf
- else handle_keywords lexbuf (fun x -> StringSet.mem x status) "PIDENT"
+ | pident -> if start <> ~-1 then
+ aux_close_tag desc href start ("PIDENT", Ulexing.utf8_lexeme lexbuf) lexbuf
+ else
+ handle_keywords lexbuf (fun x -> StringSet.mem x status) "PIDENT"
| number -> let token = "NUMBER", Ulexing.utf8_lexeme lexbuf
in
- if in_tag then
- aux_close_tag desc href token lexbuf
+ if start <> ~-1 then
+ aux_close_tag desc href start token lexbuf
else return lexbuf token
| tex_token -> let token = expand_macro lexbuf
in
- if in_tag then
- aux_close_tag desc href token lexbuf
+ if start <> ~-1 then
+ aux_close_tag desc href start token lexbuf
else return lexbuf token
| nreference -> return lexbuf ("NREF", Ulexing.utf8_lexeme lexbuf)
| uri -> return lexbuf ("URI", Ulexing.utf8_lexeme lexbuf)
Ulexing.utf8_sub_lexeme lexbuf 2 (Ulexing.lexeme_length lexbuf - 4)
in
return lexbuf ("NOTE", comment) *)
- ligatures_token (aux desc href in_tag) lexbuf
+ ligatures_token (aux desc href start) lexbuf
| begincomment -> return lexbuf ("BEGINCOMMENT","")
| endcomment -> return lexbuf ("ENDCOMMENT","")
| eof -> return_eoi lexbuf
| _ -> let token = "SYMBOL", (Ulexing.utf8_lexeme lexbuf)
in
- if in_tag then
- aux_close_tag desc href token lexbuf
+ if start <> ~-1 then
+ aux_close_tag desc href start token lexbuf
else return lexbuf token
- and aux_attr desc href = lexer
- | utf8_blank+ -> ligatures_token (aux_attr desc href) lexbuf
+ and aux_attr desc href start = lexer
+ | utf8_blank+ -> ligatures_token (aux_attr desc href start) lexbuf
| href ->
aux_attr desc
(Some (Ulexing.utf8_sub_lexeme lexbuf 6 (Ulexing.lexeme_length lexbuf - 7)))
- lexbuf
+ start lexbuf
| hreftitle ->
aux_attr
(Some (Ulexing.utf8_sub_lexeme lexbuf 7 (Ulexing.lexeme_length lexbuf - 8)))
- href lexbuf
- | ymarkup -> aux desc href true lexbuf
+ href start lexbuf
+ | ymarkup -> aux desc href start lexbuf
| _ -> return lexbuf ("LEXING_ERROR", Ulexing.utf8_lexeme lexbuf)
(* and aux_in_tag desc href = lexer
update_table (loc_of_buf lexbuf) desc href !loctable;
return_symbol lexbuf (Ulexing.utf8_lexeme lexbuf)
*)
- and aux_close_tag desc href token = lexer
- | hrefclose -> loctable := update_table (loc_of_buf lexbuf) desc href !loctable;
- return lexbuf token
+ and aux_close_tag desc href start token = lexer
+ | hrefclose -> let _,b = Ulexing.loc lexbuf in
+ loctable := update_table (HExtlib.floc_of_loc (start,b)) desc href !loctable;
+ prerr_endline
+ (Printf.sprintf "adding loc (%d,%d) to table" start b);
+ return_with_loc token start b
| _ -> return lexbuf ("LEXING_ERROR", Ulexing.utf8_lexeme lexbuf)
- in aux None None false
+ in aux None None ~-1
let rec level1_pattern_token =
lexer
| utf8_blank+ -> ligatures_token level1_pattern_token lexbuf
| number -> return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf)
- | hreftag -> return lexbuf ("ATAG", "")
+(* | hreftag -> return lexbuf ("ATAG", "")
| hrefclose -> return lexbuf ("ATAGEND","")
| generictag
- | closetag -> ligatures_token level1_pattern_token lexbuf
+ | closetag -> ligatures_token level1_pattern_token lexbuf *)
+ | qkeyword ->
+ return lexbuf ("QKEYWORD", remove_quotes (Ulexing.utf8_lexeme lexbuf))
+ | csymbol -> prerr_endline ("lexing csymbol " ^ (Ulexing.utf8_lexeme lexbuf));
+ return lexbuf ("CSYMBOL", remove_left_quote (Ulexing.utf8_lexeme lexbuf))
| ident -> handle_keywords lexbuf (fun x -> List.mem x level1_keywords) "IDENT"
| variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf)
- | pident->handle_keywords lexbuf (fun x->List.mem x level1_keywords) "PIDENT"
+ | pident-> handle_keywords lexbuf (fun x->List.mem x level1_keywords) "PIDENT"
| color -> return lexbuf ("COLOR", Ulexing.utf8_lexeme lexbuf)
| percentage ->
return lexbuf ("PERCENTAGE", Ulexing.utf8_lexeme lexbuf)
| floatwithunit ->
return lexbuf ("FLOATWITHUNIT", Ulexing.utf8_lexeme lexbuf)
| tex_token -> return lexbuf (expand_macro lexbuf)
- | qkeyword ->
- return lexbuf ("QKEYWORD", remove_quotes (Ulexing.utf8_lexeme lexbuf))
| '(' -> return lexbuf ("LPAREN", "")
| ')' -> return lexbuf ("RPAREN", "")
| eof -> return_eoi lexbuf
type lexers = {
level1_pattern_lexer : (string * string) Token.glexer;
level2_ast_lexer : (string * string) Token.glexer;
- level2_meta_lexer : (string * string) Token.glexer
+ level2_meta_lexer : (string * string) Token.glexer;
}
-let mk_lexers loctable keywords =
- let initial_keywords =
+let initial_keywords =
[ "CProp"; "Prop"; "Type"; "Set"; "let"; "match";
"with"; "in"; "and"; "to"; "as"; "on"; "return"; "done" ]
- in
+;;
+
+let mk_lexers loctable keywords =
+ prerr_endline ("mk_lexers keywords: " ^ (String.concat ", " keywords));
let status =
List.fold_right StringSet.add (initial_keywords @ keywords) StringSet.empty
in
{
level1_pattern_lexer = mk_lexer level1_pattern_token;
level2_ast_lexer = mk_lexer (level2_ast_token loctable status);
- level2_meta_lexer = mk_lexer level2_meta_token
+ level2_meta_lexer = mk_lexer level2_meta_token;
}
;;