From ae6757199708cc32166961debb52d48114c0eb74 Mon Sep 17 00:00:00 2001 From: Wilmer Ricciotti Date: Fri, 23 Mar 2012 10:24:22 +0000 Subject: [PATCH] Matitaweb: Fixes a bug in the extensible parser which caused Matita to crash when parsing a notation using keywords (rather than symbols). The semantic action associated to the notation was expecting the parser to decorate the keyword token with its location: this was not happening, causing a runtime type exception. --- .../content_pres/cicNotationLexer.ml | 83 +++++++++++-------- .../content_pres/cicNotationLexer.mli | 2 + .../content_pres/cicNotationParser.ml | 83 ++++++++++++------- 3 files changed, 106 insertions(+), 62 deletions(-) diff --git a/matitaB/components/content_pres/cicNotationLexer.ml b/matitaB/components/content_pres/cicNotationLexer.ml index f85d2e776..3c84e0bb5 100644 --- a/matitaB/components/content_pres/cicNotationLexer.ml +++ b/matitaB/components/content_pres/cicNotationLexer.ml @@ -254,8 +254,9 @@ let read_unparsed_group token_name lexbuf = let end_cnum = level2_pattern_token_group 0 buffer lexbuf in return_with_loc (token_name, Buffer.contents buffer) begin_cnum end_cnum -let handle_keywords lexbuf k name = +let handle_keywords lexbuf k name = let s = Ulexing.utf8_lexeme lexbuf in + prerr_endline (Printf.sprintf "handling \"%s\" as a keyword or %s?" s name); if k s then return lexbuf ("", s) else @@ -341,40 +342,51 @@ let update_table loc desc href loctable = ;; let level2_ast_token loctable status = - let rec aux desc href in_tag = + prerr_endline ("X keywords = " ^ (String.concat ", " + (StringSet.elements status))); + (* start = starting point of last open A tag (default -1 = no tag) *) + let rec aux desc href start = lexer | let_rec -> return lexbuf ("LETREC","") | let_corec -> return lexbuf ("LETCOREC","") | we_proved -> return lexbuf ("WEPROVED","") | we_have -> return lexbuf ("WEHAVE","") - | utf8_blank+ -> ligatures_token (aux desc href in_tag) lexbuf + | utf8_blank+ -> ligatures_token (aux desc href start) lexbuf | meta -> let s = Ulexing.utf8_lexeme lexbuf in return lexbuf ("META", String.sub s 1 (String.length s - 1)) | implicit -> return lexbuf ("IMPLICIT", "") | placeholder -> return lexbuf ("PLACEHOLDER", "") - | hreftag -> aux_attr None None lexbuf + | hreftag -> + let start = Ulexing.lexeme_start lexbuf in + aux_attr None None start lexbuf | hrefclose -> return lexbuf ("LEXING_ERROR", Ulexing.utf8_lexeme lexbuf) (* ignore other tags *) | generictag - | closetag -> ligatures_token (aux desc href in_tag) lexbuf - | ident -> if in_tag then - aux_close_tag desc href ("IDENT", Ulexing.utf8_lexeme lexbuf) lexbuf - else handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT" + | closetag -> ligatures_token (aux desc href start) lexbuf + | ident -> + if start <> ~-1 then + let idtok,_ = + handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT" + in aux_close_tag desc href start idtok lexbuf +(* ("IDENT", Ulexing.utf8_lexeme lexbuf) lexbuf *) + else + handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT" | variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf) - | pident -> if in_tag then - aux_close_tag desc href ("PIDENT", Ulexing.utf8_lexeme lexbuf) lexbuf - else handle_keywords lexbuf (fun x -> StringSet.mem x status) "PIDENT" + | pident -> if start <> ~-1 then + aux_close_tag desc href start ("PIDENT", Ulexing.utf8_lexeme lexbuf) lexbuf + else + handle_keywords lexbuf (fun x -> StringSet.mem x status) "PIDENT" | number -> let token = "NUMBER", Ulexing.utf8_lexeme lexbuf in - if in_tag then - aux_close_tag desc href token lexbuf + if start <> ~-1 then + aux_close_tag desc href start token lexbuf else return lexbuf token | tex_token -> let token = expand_macro lexbuf in - if in_tag then - aux_close_tag desc href token lexbuf + if start <> ~-1 then + aux_close_tag desc href start token lexbuf else return lexbuf token | nreference -> return lexbuf ("NREF", Ulexing.utf8_lexeme lexbuf) | uri -> return lexbuf ("URI", Ulexing.utf8_lexeme lexbuf) @@ -396,27 +408,27 @@ let level2_ast_token loctable status = Ulexing.utf8_sub_lexeme lexbuf 2 (Ulexing.lexeme_length lexbuf - 4) in return lexbuf ("NOTE", comment) *) - ligatures_token (aux desc href in_tag) lexbuf + ligatures_token (aux desc href start) lexbuf | begincomment -> return lexbuf ("BEGINCOMMENT","") | endcomment -> return lexbuf ("ENDCOMMENT","") | eof -> return_eoi lexbuf | _ -> let token = "SYMBOL", (Ulexing.utf8_lexeme lexbuf) in - if in_tag then - aux_close_tag desc href token lexbuf + if start <> ~-1 then + aux_close_tag desc href start token lexbuf else return lexbuf token - and aux_attr desc href = lexer - | utf8_blank+ -> ligatures_token (aux_attr desc href) lexbuf + and aux_attr desc href start = lexer + | utf8_blank+ -> ligatures_token (aux_attr desc href start) lexbuf | href -> aux_attr desc (Some (Ulexing.utf8_sub_lexeme lexbuf 6 (Ulexing.lexeme_length lexbuf - 7))) - lexbuf + start lexbuf | hreftitle -> aux_attr (Some (Ulexing.utf8_sub_lexeme lexbuf 7 (Ulexing.lexeme_length lexbuf - 8))) - href lexbuf - | ymarkup -> aux desc href true lexbuf + href start lexbuf + | ymarkup -> aux desc href start lexbuf | _ -> return lexbuf ("LEXING_ERROR", Ulexing.utf8_lexeme lexbuf) (* and aux_in_tag desc href = lexer @@ -438,11 +450,14 @@ let level2_ast_token loctable status = update_table (loc_of_buf lexbuf) desc href !loctable; return_symbol lexbuf (Ulexing.utf8_lexeme lexbuf) *) - and aux_close_tag desc href token = lexer - | hrefclose -> loctable := update_table (loc_of_buf lexbuf) desc href !loctable; - return lexbuf token + and aux_close_tag desc href start token = lexer + | hrefclose -> let _,b = Ulexing.loc lexbuf in + loctable := update_table (HExtlib.floc_of_loc (start,b)) desc href !loctable; + prerr_endline + (Printf.sprintf "adding loc (%d,%d) to table" start b); + return_with_loc token start b | _ -> return lexbuf ("LEXING_ERROR", Ulexing.utf8_lexeme lexbuf) - in aux None None false + in aux None None ~-1 let rec level1_pattern_token = lexer @@ -454,7 +469,7 @@ let rec level1_pattern_token = | closetag -> ligatures_token level1_pattern_token lexbuf | ident -> handle_keywords lexbuf (fun x -> List.mem x level1_keywords) "IDENT" | variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf) - | pident->handle_keywords lexbuf (fun x->List.mem x level1_keywords) "PIDENT" + | pident-> handle_keywords lexbuf (fun x->List.mem x level1_keywords) "PIDENT" | color -> return lexbuf ("COLOR", Ulexing.utf8_lexeme lexbuf) | percentage -> return lexbuf ("PERCENTAGE", Ulexing.utf8_lexeme lexbuf) @@ -476,20 +491,22 @@ let level2_ast_token loctable status = type lexers = { level1_pattern_lexer : (string * string) Token.glexer; level2_ast_lexer : (string * string) Token.glexer; - level2_meta_lexer : (string * string) Token.glexer + level2_meta_lexer : (string * string) Token.glexer; } -let mk_lexers loctable keywords = - let initial_keywords = +let initial_keywords = [ "CProp"; "Prop"; "Type"; "Set"; "let"; "match"; "with"; "in"; "and"; "to"; "as"; "on"; "return"; "done" ] - in +;; + +let mk_lexers loctable keywords = + prerr_endline ("mk_lexers keywords: " ^ (String.concat ", " keywords)); let status = List.fold_right StringSet.add (initial_keywords @ keywords) StringSet.empty in { level1_pattern_lexer = mk_lexer level1_pattern_token; level2_ast_lexer = mk_lexer (level2_ast_token loctable status); - level2_meta_lexer = mk_lexer level2_meta_token + level2_meta_lexer = mk_lexer level2_meta_token; } ;; diff --git a/matitaB/components/content_pres/cicNotationLexer.mli b/matitaB/components/content_pres/cicNotationLexer.mli index 13f0150a8..ee96b552c 100644 --- a/matitaB/components/content_pres/cicNotationLexer.mli +++ b/matitaB/components/content_pres/cicNotationLexer.mli @@ -39,6 +39,8 @@ type lexers = { level2_meta_lexer : (string * string) Token.glexer } +val initial_keywords : string list + val mk_lexers : (string option * string option) LocalizeEnv.t ref -> string list -> lexers diff --git a/matitaB/components/content_pres/cicNotationParser.ml b/matitaB/components/content_pres/cicNotationParser.ml index 6736211d4..fcd209307 100644 --- a/matitaB/components/content_pres/cicNotationParser.ml +++ b/matitaB/components/content_pres/cicNotationParser.ml @@ -30,7 +30,9 @@ open Printf module Ast = NotationPt module Env = NotationEnv -let prerr_endline _ = () +module StringSet = Set.Make(String) + +(* let prerr_endline _ = () *) exception Parse_error of string exception Level_not_found of int @@ -45,7 +47,7 @@ type ('a,'b,'c,'d,'e) grammars = { term: 'b Grammar.Entry.e; ident: 'e Grammar.Entry.e; sym_attributes: (string option * string option) Grammar.Entry.e; - sym_table: (string * Stdpp.location Grammar.Entry.e) list; + sym_table: ([ `Sym of string | `Kwd of string ] * Stdpp.location Grammar.Entry.e) list; let_defs: 'c Grammar.Entry.e; let_codefs: 'c Grammar.Entry.e; protected_binder_vars: 'd Grammar.Entry.e; @@ -90,39 +92,45 @@ let level_of precedence = raise (Level_not_found precedence); string_of_int precedence -let add_symbol_to_grammar_explicit level2_ast_grammar - sym_attributes sym_table s = +let add_item_to_grammar_explicit level2_ast_grammar + sym_attributes sym_table item = + let stok, nonterm = match item with + | `Sym sy -> Gramext.Stoken ("SYMBOL",sy), "sym" ^ sy + | `Kwd i -> Gramext.Stoken("",i), "kwd" ^ i + in try - let _ = List.assoc s sym_table + let _ = List.assoc item sym_table in sym_table with Not_found -> - let entry = Grammar.Entry.create level2_ast_grammar ("sym" ^ s) in +(* let entry = Grammar.Entry.create level2_ast_grammar ("sym" ^ s) in *) + let entry = Grammar.Entry.create level2_ast_grammar nonterm in Grammar.extend [ Grammar.Entry.obj entry, None, [ None, Some (*Gramext.NonA*) Gramext.NonA, - [ [Gramext.Stoken ("SYMBOL",s)], (* concrete l1 syntax *) - (Gramext.action (fun _ loc -> None, loc)) - ; [Gramext.Stoken ("ATAG","") + [ [stok], (* concrete l1 syntax *) + (Gramext.action (fun _ loc -> (* None, *) loc)) +(* ; [Gramext.Stoken ("ATAG","") ;Gramext.Snterm (Grammar.Entry.obj sym_attributes) ;Gramext.Stoken ("SYMBOL","\005") - ;Gramext.Stoken ("SYMBOL",s) + ;stok ;Gramext.Stoken ("ATAGEND","")], - (Gramext.action (fun _ uridesc _ _ _ loc -> (Some uridesc),loc)) + (Gramext.action (fun _ uridesc _ _ _ loc -> (Some uridesc),loc)) *) ]]]; (* prerr_endline ("adding to grammar symbol " ^ s); *) - (s,entry)::sym_table + (item,entry)::sym_table -let add_symbol_to_grammar status s = +let add_item_to_grammar status item = let sym_attributes = status#notation_parser_db.grammars.sym_attributes in let sym_table = status#notation_parser_db.grammars.sym_table in let level2_ast_grammar = status#notation_parser_db.grammars.level2_ast_grammar in let sym_table = - add_symbol_to_grammar_explicit level2_ast_grammar sym_attributes sym_table s + add_item_to_grammar_explicit level2_ast_grammar sym_attributes sym_table + item in let grammars = { status#notation_parser_db.grammars with sym_table = sym_table } @@ -134,21 +142,34 @@ let add_symbol_to_grammar status s = let gram_symbol status s = let sym_table = status#notation_parser_db.grammars.sym_table in let entry = - try List.assoc s sym_table + try List.assoc (`Sym s) sym_table with Not_found -> (let syms = List.map fst (status#notation_parser_db.grammars.sym_table) in - let syms = List.map (fun x -> "\"" ^ x ^ "\"") syms in - prerr_endline ("new symbol non-terminals: " ^ (String.concat ", " syms)); - prerr_endline ("unable to find symbol \"" ^ s ^ "\""); assert false) + let syms = List.map (fun x -> match x with `Sym x | `Kwd x -> "\"" ^ x ^ "\"") syms in + prerr_endline ("new symbol/keyword non-terminals: " ^ (String.concat ", " syms)); + prerr_endline ("unable to find symbol \"" ^ s ^ "\""); + (* XXX: typing error + * Gramext.Stoken("SYMBOL", s) *) + assert false) in Gramext.Snterm (Grammar.Entry.obj entry) -let gram_ident status = - Gramext.Snterm (Grammar.Entry.obj +let gram_ident status = + Gramext.Snterm (Grammar.Entry.obj (status#notation_parser_db.grammars.ident : 'a Grammar.Entry.e)) (*Gramext.Stoken ("IDENT", s)*) let gram_number s = Gramext.Stoken ("NUMBER", s) -let gram_keyword s = Gramext.Stoken ("", s) +let gram_keyword status s = + let sym_table = status#notation_parser_db.grammars.sym_table in + try Gramext.Snterm (Grammar.Entry.obj + (List.assoc (`Kwd s) sym_table)) + with Not_found -> + (let syms = List.map fst (status#notation_parser_db.grammars.sym_table) in + let syms = List.map (fun x -> match x with `Sym x | `Kwd x -> "\"" ^ x ^ "\"") syms in + (* prerr_endline ("new symbol/keyword non-terminals: " ^ (String.concat ", " syms)); + prerr_endline ("unable to find keyword \"" ^ s ^ "\""); *) + Gramext.Stoken("", s)) + let gram_term status = function | Ast.Self _ -> Gramext.Sself | Ast.Level precedence -> @@ -161,7 +182,7 @@ let gram_term status = function let gram_of_literal status = function | `Symbol (s,_) -> gram_symbol status s - | `Keyword (s,_) -> gram_keyword s + | `Keyword (s,_) -> gram_keyword status s | `Number (s,_) -> gram_number s let make_action status action bindings = @@ -170,7 +191,7 @@ let make_action status action bindings = [] -> Gramext.action (fun (loc: Ast.location) -> action vl loc) | NoBinding :: tl -> Gramext.action - (fun (_,(loc: Ast.location)) -> + (fun ((*_,*)(loc: Ast.location)) -> let uri,desc = try CicNotationLexer.LocalizeEnv.find loc @@ -229,8 +250,11 @@ let update_sym_grammar status pattern = assert false and aux_literal status = function - | `Symbol (s,_) -> add_symbol_to_grammar status s - | `Keyword _ -> status + | `Symbol (s,_) -> add_item_to_grammar status (`Sym s) + | `Keyword (s,_) -> + if not (List.mem s CicNotationLexer.initial_keywords) + then add_item_to_grammar status (`Kwd s) + else status | `Number _ -> status and aux_layout status = function | Ast.Sub (p1, p2) -> aux (aux status p1) p2 @@ -279,7 +303,7 @@ let extract_term_production status pattern = | `Symbol (s,_) -> [NoBinding, gram_symbol status s] | `Keyword (s,_) -> (* assumption: s will be registered as a keyword with the lexer *) - [NoBinding, gram_keyword s] + [NoBinding, gram_keyword status s] | `Number (s,_) -> [NoBinding, gram_number s] and aux_layout = function | Ast.Sub (p1, p2) -> aux p1 @ [NoBinding, gram_symbol status "\\sub "] @ aux p2 @@ -916,6 +940,7 @@ END grammars ;; + let initial_grammars loctable keywords = let lexers = CicNotationLexer.mk_lexers loctable keywords in let level1_pattern_grammar = @@ -938,8 +963,8 @@ let initial_grammars loctable keywords = Grammar.Entry.create level2_ast_grammar "atag_attributes" in let sym_table = List.fold_left - (add_symbol_to_grammar_explicit level2_ast_grammar sym_attributes) - [] initial_symbols + (add_item_to_grammar_explicit level2_ast_grammar sym_attributes) + [] (List.map (fun s -> `Sym s) initial_symbols) in let let_defs = Grammar.Entry.create level2_ast_grammar "let_defs" in let let_codefs = Grammar.Entry.create level2_ast_grammar "let_codefs" in @@ -1001,7 +1026,7 @@ let extend (status : #status) (CL1P (level1_pattern,precedence)) action = [ None, Some (*Gramext.NonA*) Gramext.NonA, [ p_atoms, (* concrete l1 syntax *) - (make_action status + (make_action status (fun (env: NotationEnv.t) (loc: Ast.location) -> (action env loc)) p_bindings) ]]]; -- 2.39.2