let end_cnum = level2_pattern_token_group 0 buffer lexbuf in
return_with_loc (token_name, Buffer.contents buffer) begin_cnum end_cnum
-let handle_keywords lexbuf k name =
+let handle_keywords lexbuf k name =
let s = Ulexing.utf8_lexeme lexbuf in
+ prerr_endline (Printf.sprintf "handling \"%s\" as a keyword or %s?" s name);
if k s then
return lexbuf ("", s)
else
;;
let level2_ast_token loctable status =
- let rec aux desc href in_tag =
+ prerr_endline ("X keywords = " ^ (String.concat ", "
+ (StringSet.elements status)));
+ (* start = starting point of last open A tag (default -1 = no tag) *)
+ let rec aux desc href start =
lexer
| let_rec -> return lexbuf ("LETREC","")
| let_corec -> return lexbuf ("LETCOREC","")
| we_proved -> return lexbuf ("WEPROVED","")
| we_have -> return lexbuf ("WEHAVE","")
- | utf8_blank+ -> ligatures_token (aux desc href in_tag) lexbuf
+ | utf8_blank+ -> ligatures_token (aux desc href start) lexbuf
| meta ->
let s = Ulexing.utf8_lexeme lexbuf in
return lexbuf ("META", String.sub s 1 (String.length s - 1))
| implicit -> return lexbuf ("IMPLICIT", "")
| placeholder -> return lexbuf ("PLACEHOLDER", "")
- | hreftag -> aux_attr None None lexbuf
+ | hreftag ->
+ let start = Ulexing.lexeme_start lexbuf in
+ aux_attr None None start lexbuf
| hrefclose -> return lexbuf ("LEXING_ERROR", Ulexing.utf8_lexeme lexbuf)
(* ignore other tags *)
| generictag
- | closetag -> ligatures_token (aux desc href in_tag) lexbuf
- | ident -> if in_tag then
- aux_close_tag desc href ("IDENT", Ulexing.utf8_lexeme lexbuf) lexbuf
- else handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT"
+ | closetag -> ligatures_token (aux desc href start) lexbuf
+ | ident ->
+ if start <> ~-1 then
+ let idtok,_ =
+ handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT"
+ in aux_close_tag desc href start idtok lexbuf
+(* ("IDENT", Ulexing.utf8_lexeme lexbuf) lexbuf *)
+ else
+ handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT"
| variable_ident ->
return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf)
- | pident -> if in_tag then
- aux_close_tag desc href ("PIDENT", Ulexing.utf8_lexeme lexbuf) lexbuf
- else handle_keywords lexbuf (fun x -> StringSet.mem x status) "PIDENT"
+ | pident -> if start <> ~-1 then
+ aux_close_tag desc href start ("PIDENT", Ulexing.utf8_lexeme lexbuf) lexbuf
+ else
+ handle_keywords lexbuf (fun x -> StringSet.mem x status) "PIDENT"
| number -> let token = "NUMBER", Ulexing.utf8_lexeme lexbuf
in
- if in_tag then
- aux_close_tag desc href token lexbuf
+ if start <> ~-1 then
+ aux_close_tag desc href start token lexbuf
else return lexbuf token
| tex_token -> let token = expand_macro lexbuf
in
- if in_tag then
- aux_close_tag desc href token lexbuf
+ if start <> ~-1 then
+ aux_close_tag desc href start token lexbuf
else return lexbuf token
| nreference -> return lexbuf ("NREF", Ulexing.utf8_lexeme lexbuf)
| uri -> return lexbuf ("URI", Ulexing.utf8_lexeme lexbuf)
Ulexing.utf8_sub_lexeme lexbuf 2 (Ulexing.lexeme_length lexbuf - 4)
in
return lexbuf ("NOTE", comment) *)
- ligatures_token (aux desc href in_tag) lexbuf
+ ligatures_token (aux desc href start) lexbuf
| begincomment -> return lexbuf ("BEGINCOMMENT","")
| endcomment -> return lexbuf ("ENDCOMMENT","")
| eof -> return_eoi lexbuf
| _ -> let token = "SYMBOL", (Ulexing.utf8_lexeme lexbuf)
in
- if in_tag then
- aux_close_tag desc href token lexbuf
+ if start <> ~-1 then
+ aux_close_tag desc href start token lexbuf
else return lexbuf token
- and aux_attr desc href = lexer
- | utf8_blank+ -> ligatures_token (aux_attr desc href) lexbuf
+ and aux_attr desc href start = lexer
+ | utf8_blank+ -> ligatures_token (aux_attr desc href start) lexbuf
| href ->
aux_attr desc
(Some (Ulexing.utf8_sub_lexeme lexbuf 6 (Ulexing.lexeme_length lexbuf - 7)))
- lexbuf
+ start lexbuf
| hreftitle ->
aux_attr
(Some (Ulexing.utf8_sub_lexeme lexbuf 7 (Ulexing.lexeme_length lexbuf - 8)))
- href lexbuf
- | ymarkup -> aux desc href true lexbuf
+ href start lexbuf
+ | ymarkup -> aux desc href start lexbuf
| _ -> return lexbuf ("LEXING_ERROR", Ulexing.utf8_lexeme lexbuf)
(* and aux_in_tag desc href = lexer
update_table (loc_of_buf lexbuf) desc href !loctable;
return_symbol lexbuf (Ulexing.utf8_lexeme lexbuf)
*)
- and aux_close_tag desc href token = lexer
- | hrefclose -> loctable := update_table (loc_of_buf lexbuf) desc href !loctable;
- return lexbuf token
+ and aux_close_tag desc href start token = lexer
+ | hrefclose -> let _,b = Ulexing.loc lexbuf in
+ loctable := update_table (HExtlib.floc_of_loc (start,b)) desc href !loctable;
+ prerr_endline
+ (Printf.sprintf "adding loc (%d,%d) to table" start b);
+ return_with_loc token start b
| _ -> return lexbuf ("LEXING_ERROR", Ulexing.utf8_lexeme lexbuf)
- in aux None None false
+ in aux None None ~-1
let rec level1_pattern_token =
lexer
| closetag -> ligatures_token level1_pattern_token lexbuf
| ident -> handle_keywords lexbuf (fun x -> List.mem x level1_keywords) "IDENT"
| variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf)
- | pident->handle_keywords lexbuf (fun x->List.mem x level1_keywords) "PIDENT"
+ | pident-> handle_keywords lexbuf (fun x->List.mem x level1_keywords) "PIDENT"
| color -> return lexbuf ("COLOR", Ulexing.utf8_lexeme lexbuf)
| percentage ->
return lexbuf ("PERCENTAGE", Ulexing.utf8_lexeme lexbuf)
type lexers = {
level1_pattern_lexer : (string * string) Token.glexer;
level2_ast_lexer : (string * string) Token.glexer;
- level2_meta_lexer : (string * string) Token.glexer
+ level2_meta_lexer : (string * string) Token.glexer;
}
-let mk_lexers loctable keywords =
- let initial_keywords =
+let initial_keywords =
[ "CProp"; "Prop"; "Type"; "Set"; "let"; "match";
"with"; "in"; "and"; "to"; "as"; "on"; "return"; "done" ]
- in
+;;
+
+let mk_lexers loctable keywords =
+ prerr_endline ("mk_lexers keywords: " ^ (String.concat ", " keywords));
let status =
List.fold_right StringSet.add (initial_keywords @ keywords) StringSet.empty
in
{
level1_pattern_lexer = mk_lexer level1_pattern_token;
level2_ast_lexer = mk_lexer (level2_ast_token loctable status);
- level2_meta_lexer = mk_lexer level2_meta_token
+ level2_meta_lexer = mk_lexer level2_meta_token;
}
;;
module Ast = NotationPt
module Env = NotationEnv
-let prerr_endline _ = ()
+module StringSet = Set.Make(String)
+
+(* let prerr_endline _ = () *)
exception Parse_error of string
exception Level_not_found of int
term: 'b Grammar.Entry.e;
ident: 'e Grammar.Entry.e;
sym_attributes: (string option * string option) Grammar.Entry.e;
- sym_table: (string * Stdpp.location Grammar.Entry.e) list;
+ sym_table: ([ `Sym of string | `Kwd of string ] * Stdpp.location Grammar.Entry.e) list;
let_defs: 'c Grammar.Entry.e;
let_codefs: 'c Grammar.Entry.e;
protected_binder_vars: 'd Grammar.Entry.e;
raise (Level_not_found precedence);
string_of_int precedence
-let add_symbol_to_grammar_explicit level2_ast_grammar
- sym_attributes sym_table s =
+let add_item_to_grammar_explicit level2_ast_grammar
+ sym_attributes sym_table item =
+ let stok, nonterm = match item with
+ | `Sym sy -> Gramext.Stoken ("SYMBOL",sy), "sym" ^ sy
+ | `Kwd i -> Gramext.Stoken("",i), "kwd" ^ i
+ in
try
- let _ = List.assoc s sym_table
+ let _ = List.assoc item sym_table
in sym_table
with Not_found ->
- let entry = Grammar.Entry.create level2_ast_grammar ("sym" ^ s) in
+(* let entry = Grammar.Entry.create level2_ast_grammar ("sym" ^ s) in *)
+ let entry = Grammar.Entry.create level2_ast_grammar nonterm in
Grammar.extend
[ Grammar.Entry.obj entry,
None,
[ None,
Some (*Gramext.NonA*) Gramext.NonA,
- [ [Gramext.Stoken ("SYMBOL",s)], (* concrete l1 syntax *)
- (Gramext.action (fun _ loc -> None, loc))
- ; [Gramext.Stoken ("ATAG","")
+ [ [stok], (* concrete l1 syntax *)
+ (Gramext.action (fun _ loc -> (* None, *) loc))
+(* ; [Gramext.Stoken ("ATAG","")
;Gramext.Snterm (Grammar.Entry.obj sym_attributes)
;Gramext.Stoken ("SYMBOL","\005")
- ;Gramext.Stoken ("SYMBOL",s)
+ ;stok
;Gramext.Stoken ("ATAGEND","")],
- (Gramext.action (fun _ uridesc _ _ _ loc -> (Some uridesc),loc))
+ (Gramext.action (fun _ uridesc _ _ _ loc -> (Some uridesc),loc)) *)
]]];
(* prerr_endline ("adding to grammar symbol " ^ s); *)
- (s,entry)::sym_table
+ (item,entry)::sym_table
-let add_symbol_to_grammar status s =
+let add_item_to_grammar status item =
let sym_attributes = status#notation_parser_db.grammars.sym_attributes in
let sym_table = status#notation_parser_db.grammars.sym_table in
let level2_ast_grammar =
status#notation_parser_db.grammars.level2_ast_grammar
in
let sym_table =
- add_symbol_to_grammar_explicit level2_ast_grammar sym_attributes sym_table s
+ add_item_to_grammar_explicit level2_ast_grammar sym_attributes sym_table
+ item
in
let grammars =
{ status#notation_parser_db.grammars with sym_table = sym_table }
let gram_symbol status s =
let sym_table = status#notation_parser_db.grammars.sym_table in
let entry =
- try List.assoc s sym_table
+ try List.assoc (`Sym s) sym_table
with Not_found ->
(let syms = List.map fst (status#notation_parser_db.grammars.sym_table) in
- let syms = List.map (fun x -> "\"" ^ x ^ "\"") syms in
- prerr_endline ("new symbol non-terminals: " ^ (String.concat ", " syms));
- prerr_endline ("unable to find symbol \"" ^ s ^ "\""); assert false)
+ let syms = List.map (fun x -> match x with `Sym x | `Kwd x -> "\"" ^ x ^ "\"") syms in
+ prerr_endline ("new symbol/keyword non-terminals: " ^ (String.concat ", " syms));
+ prerr_endline ("unable to find symbol \"" ^ s ^ "\"");
+ (* XXX: typing error
+ * Gramext.Stoken("SYMBOL", s) *)
+ assert false)
in
Gramext.Snterm (Grammar.Entry.obj entry)
-let gram_ident status =
- Gramext.Snterm (Grammar.Entry.obj
+let gram_ident status =
+ Gramext.Snterm (Grammar.Entry.obj
(status#notation_parser_db.grammars.ident : 'a Grammar.Entry.e))
(*Gramext.Stoken ("IDENT", s)*)
let gram_number s = Gramext.Stoken ("NUMBER", s)
-let gram_keyword s = Gramext.Stoken ("", s)
+let gram_keyword status s =
+ let sym_table = status#notation_parser_db.grammars.sym_table in
+ try Gramext.Snterm (Grammar.Entry.obj
+ (List.assoc (`Kwd s) sym_table))
+ with Not_found ->
+ (let syms = List.map fst (status#notation_parser_db.grammars.sym_table) in
+ let syms = List.map (fun x -> match x with `Sym x | `Kwd x -> "\"" ^ x ^ "\"") syms in
+ (* prerr_endline ("new symbol/keyword non-terminals: " ^ (String.concat ", " syms));
+ prerr_endline ("unable to find keyword \"" ^ s ^ "\""); *)
+ Gramext.Stoken("", s))
+
let gram_term status = function
| Ast.Self _ -> Gramext.Sself
| Ast.Level precedence ->
let gram_of_literal status =
function
| `Symbol (s,_) -> gram_symbol status s
- | `Keyword (s,_) -> gram_keyword s
+ | `Keyword (s,_) -> gram_keyword status s
| `Number (s,_) -> gram_number s
let make_action status action bindings =
[] -> Gramext.action (fun (loc: Ast.location) -> action vl loc)
| NoBinding :: tl ->
Gramext.action
- (fun (_,(loc: Ast.location)) ->
+ (fun ((*_,*)(loc: Ast.location)) ->
let uri,desc =
try
CicNotationLexer.LocalizeEnv.find loc
assert false
and aux_literal status =
function
- | `Symbol (s,_) -> add_symbol_to_grammar status s
- | `Keyword _ -> status
+ | `Symbol (s,_) -> add_item_to_grammar status (`Sym s)
+ | `Keyword (s,_) ->
+ if not (List.mem s CicNotationLexer.initial_keywords)
+ then add_item_to_grammar status (`Kwd s)
+ else status
| `Number _ -> status
and aux_layout status = function
| Ast.Sub (p1, p2) -> aux (aux status p1) p2
| `Symbol (s,_) -> [NoBinding, gram_symbol status s]
| `Keyword (s,_) ->
(* assumption: s will be registered as a keyword with the lexer *)
- [NoBinding, gram_keyword s]
+ [NoBinding, gram_keyword status s]
| `Number (s,_) -> [NoBinding, gram_number s]
and aux_layout = function
| Ast.Sub (p1, p2) -> aux p1 @ [NoBinding, gram_symbol status "\\sub "] @ aux p2
grammars
;;
+
let initial_grammars loctable keywords =
let lexers = CicNotationLexer.mk_lexers loctable keywords in
let level1_pattern_grammar =
Grammar.Entry.create level2_ast_grammar "atag_attributes" in
let sym_table =
List.fold_left
- (add_symbol_to_grammar_explicit level2_ast_grammar sym_attributes)
- [] initial_symbols
+ (add_item_to_grammar_explicit level2_ast_grammar sym_attributes)
+ [] (List.map (fun s -> `Sym s) initial_symbols)
in
let let_defs = Grammar.Entry.create level2_ast_grammar "let_defs" in
let let_codefs = Grammar.Entry.create level2_ast_grammar "let_codefs" in
[ None,
Some (*Gramext.NonA*) Gramext.NonA,
[ p_atoms, (* concrete l1 syntax *)
- (make_action status
+ (make_action status
(fun (env: NotationEnv.t) (loc: Ast.location) ->
(action env loc))
p_bindings) ]]];