From 4a7dd5f16ad7a3ac176c0650580f736ae73f373d Mon Sep 17 00:00:00 2001 From: Stefano Zacchiroli Date: Thu, 19 May 2005 08:32:52 +0000 Subject: [PATCH] unified grammars and lexers in a single one --- helm/ocaml/cic_notation/cicNotationLexer.ml | 22 +-- helm/ocaml/cic_notation/cicNotationLexer.mli | 6 +- helm/ocaml/cic_notation/cicNotationParser.ml | 139 ++++++++----------- helm/ocaml/cic_notation/test_lexer.ml | 11 +- 4 files changed, 70 insertions(+), 108 deletions(-) diff --git a/helm/ocaml/cic_notation/cicNotationLexer.ml b/helm/ocaml/cic_notation/cicNotationLexer.ml index 9721ee805..dfbedef73 100644 --- a/helm/ocaml/cic_notation/cicNotationLexer.ml +++ b/helm/ocaml/cic_notation/cicNotationLexer.ml @@ -64,6 +64,8 @@ let return lexbuf token = let flocation_end = { flocation_begin with Lexing.pos_cnum = end_cnum } in (token, (flocation_begin, flocation_end)) +let return_lexeme lexbuf name = return lexbuf (name, Ulexing.utf8_lexeme lexbuf) + let remove_quotes s = String.sub s 1 (String.length s - 2) let mk_lexer token = @@ -97,21 +99,8 @@ let expand_macro lexbuf = let keyword lexbuf = "KEYWORD", remove_quotes (Ulexing.utf8_lexeme lexbuf) -(** {2 Level 1 lexer} *) - -let rec level1_token = lexer - | xml_blank+ -> level1_token lexbuf - | ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf) - | number -> return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf) - | keyword -> return lexbuf (keyword lexbuf) - | tex_token -> return lexbuf (expand_macro lexbuf) - | eof -> return lexbuf ("EOI", "") - | _ -> return lexbuf ("SYMBOL", Ulexing.utf8_lexeme lexbuf) - -(** {2 Level 2 lexer} *) - -let rec level2_token = lexer - | xml_blank+ -> level2_token lexbuf +let rec token = lexer + | xml_blank+ -> token lexbuf | meta -> return lexbuf ("META", Ulexing.utf8_lexeme lexbuf) | implicit -> return lexbuf ("IMPLICIT", Ulexing.utf8_lexeme lexbuf) | ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf) @@ -124,6 +113,5 @@ let rec level2_token = lexer (* API implementation *) -let syntax_pattern_lexer = mk_lexer level1_token -let ast_pattern_lexer = mk_lexer level2_token +let notation_lexer = mk_lexer token diff --git a/helm/ocaml/cic_notation/cicNotationLexer.mli b/helm/ocaml/cic_notation/cicNotationLexer.mli index 82731d904..dd1561be2 100644 --- a/helm/ocaml/cic_notation/cicNotationLexer.mli +++ b/helm/ocaml/cic_notation/cicNotationLexer.mli @@ -28,9 +28,5 @@ * error message *) exception Error of int * int * string - (** lexer for concrete syntax patterns (notation level 1) *) -val syntax_pattern_lexer: (string * string) Token.glexer - - (** lexer for ast patterns (notation level 2) *) -val ast_pattern_lexer: (string * string) Token.glexer +val notation_lexer: (string * string) Token.glexer diff --git a/helm/ocaml/cic_notation/cicNotationParser.ml b/helm/ocaml/cic_notation/cicNotationParser.ml index 5e94d20a0..08bac32f5 100644 --- a/helm/ocaml/cic_notation/cicNotationParser.ml +++ b/helm/ocaml/cic_notation/cicNotationParser.ml @@ -27,30 +27,16 @@ open Printf exception Parse_error of Token.flocation * string -module Level1Lexer = +module NotationLexer = struct type te = string * string - let lexer = CicNotationLexer.syntax_pattern_lexer + let lexer = CicNotationLexer.notation_lexer end -module Level1Parser = Grammar.GMake (Level1Lexer) +module NotationGrammar = Grammar.GMake (NotationLexer) -module Level2Lexer = -struct - type te = string * string - let lexer = CicNotationLexer.ast_pattern_lexer -end -module Level2Parser = Grammar.GMake (Level2Lexer) - -module Level3Lexer = -struct - type te = string * string - let lexer = CicNotationLexer.ast_pattern_lexer -end -module Level3Parser = Grammar.GMake (Level3Lexer) - -let level1_pattern = Level1Parser.Entry.create "level1_pattern" -let level2_pattern = Level2Parser.Entry.create "level2_pattern" -let level3_interpretation = Level3Parser.Entry.create "level3_interpretation" +let level1_pattern = NotationGrammar.Entry.create "level1_pattern" +let level2_pattern = NotationGrammar.Entry.create "level2_pattern" +let level3_interpretation = NotationGrammar.Entry.create "level3_interpretation" let return_term loc term = () @@ -64,11 +50,11 @@ let int_of_string s = with Failure _ -> failwith (sprintf "Lexer failure: string_of_int \"%s\" failed" s) +GEXTEND NotationGrammar + GLOBAL: level1_pattern level2_pattern level3_interpretation; (* {{{ Grammar for concrete syntax patterns, notation level 1 *) -GEXTEND Level1Parser - GLOBAL: level1_pattern; - level1_pattern: [ [ p = pattern -> () ] ]; - pattern: [ [ p = LIST1 simple_pattern -> () ] ]; + level1_pattern: [ [ p = l1_pattern -> () ] ]; + l1_pattern: [ [ p = LIST1 l1_simple_pattern -> () ] ]; literal: [ [ s = SYMBOL -> () | k = KEYWORD -> () @@ -77,55 +63,53 @@ GEXTEND Level1Parser sep: [ [ SYMBOL "\\SEP"; sep = literal -> () ] ]; row_sep: [ [ SYMBOL "\\ROWSEP"; sep = literal -> () ] ]; field_sep: [ [ SYMBOL "\\FIELDSEP"; sep = literal -> () ] ]; - box_pattern: [ - [ SYMBOL "\\HBOX"; p = simple_pattern -> () - | SYMBOL "\\VBOX"; p = simple_pattern -> () + l1_box_pattern: [ + [ SYMBOL "\\HBOX"; p = l1_simple_pattern -> () + | SYMBOL "\\VBOX"; p = l1_simple_pattern -> () | SYMBOL "\\BREAK" -> () ] ]; - magic_pattern: [ - [ SYMBOL "\\LIST0"; p = simple_pattern; sep = OPT sep -> () - | SYMBOL "\\LIST1"; p = simple_pattern; sep = OPT sep -> () - | SYMBOL "\\OPT"; p = simple_pattern -> () + l1_magic_pattern: [ + [ SYMBOL "\\LIST0"; p = l1_simple_pattern; sep = OPT sep -> () + | SYMBOL "\\LIST1"; p = l1_simple_pattern; sep = OPT sep -> () + | SYMBOL "\\OPT"; p = l1_simple_pattern -> () ] ]; - pattern_variable: [ + l1_pattern_variable: [ [ id = IDENT -> () | SYMBOL "\\NUM"; id = IDENT -> () | SYMBOL "\\IDENT"; id = IDENT -> () ] ]; - simple_pattern: + l1_simple_pattern: [ "layout" LEFTA [ p1 = SELF; SYMBOL "\\SUB"; p2 = SELF -> () | p1 = SELF; SYMBOL "\\SUP"; p2 = SELF -> () | p1 = SELF; SYMBOL "\\BELOW"; p2 = SELF -> () | p1 = SELF; SYMBOL "\\ABOVE"; p2 = SELF -> () - | SYMBOL "["; p1 = pattern; SYMBOL "\\OVER"; p2 = pattern; SYMBOL "]" -> + | SYMBOL "["; p1 = l1_pattern; SYMBOL "\\OVER"; p2 = l1_pattern; + SYMBOL "]" -> () - | SYMBOL "["; p1 = pattern; SYMBOL "\\ATOP"; p2 = pattern; SYMBOL "]" -> + | SYMBOL "["; p1 = l1_pattern; SYMBOL "\\ATOP"; p2 = l1_pattern; + SYMBOL "]" -> () | SYMBOL "\\ARRAY"; p = SELF; fsep = OPT field_sep; rsep = OPT row_sep -> () | SYMBOL "\\FRAC"; p1 = SELF; p2 = SELF -> () | SYMBOL "\\SQRT"; p = SELF -> () - | SYMBOL "\\ROOT"; arg = pattern; SYMBOL "\\OF"; index = SELF -> () + | SYMBOL "\\ROOT"; arg = l1_pattern; SYMBOL "\\OF"; index = SELF -> () ] | "simple" NONA - [ m = magic_pattern -> () - | v = pattern_variable -> () - | b = box_pattern -> () + [ m = l1_magic_pattern -> () + | v = l1_pattern_variable -> () + | b = l1_box_pattern -> () | n = NUMBER -> () - | SYMBOL "["; p = pattern; SYMBOL "]" -> () + | SYMBOL "["; p = l1_pattern; SYMBOL "]" -> () ] ]; -END (* }}} *) - (* {{{ Grammar for ast patterns, notation level 2 *) -GEXTEND Level2Parser - GLOBAL: level2_pattern; - level2_pattern: [ [ p = pattern -> () ] ]; + level2_pattern: [ [ p = l2_pattern -> () ] ]; sort: [ [ SYMBOL "\\PROP" -> () | SYMBOL "\\SET" -> () @@ -141,7 +125,7 @@ GEXTEND Level2Parser ] ]; possibly_typed_name: [ - [ SYMBOL "("; i = IDENT; SYMBOL ":"; typ = pattern; SYMBOL ")" -> () + [ SYMBOL "("; i = IDENT; SYMBOL ":"; typ = l2_pattern; SYMBOL ")" -> () | i = IDENT -> () ] ]; @@ -160,10 +144,10 @@ GEXTEND Level2Parser ]; bound_names: [ [ vars = LIST1 IDENT SEP SYMBOL ","; - typ = OPT [ SYMBOL ":"; p = pattern -> () ] -> () + typ = OPT [ SYMBOL ":"; p = l2_pattern -> () ] -> () | LIST1 [ SYMBOL "("; vars = LIST1 IDENT SEP SYMBOL ","; - typ = OPT [ SYMBOL ":"; p = pattern -> () ]; SYMBOL ")" -> () + typ = OPT [ SYMBOL ":"; p = l2_pattern -> () ]; SYMBOL ")" -> () ] -> () ] @@ -177,42 +161,42 @@ GEXTEND Level2Parser [ defs = LIST1 [ name = IDENT; args = bound_names; index_name = OPT [ IDENT "on"; idx = IDENT -> () ]; - ty = OPT [ SYMBOL ":" ; p = pattern -> () ]; - SYMBOL <:unicode> (* ≝ *); body = pattern -> + ty = OPT [ SYMBOL ":" ; p = l2_pattern -> () ]; + SYMBOL <:unicode> (* ≝ *); body = l2_pattern -> () ] SEP IDENT "and" -> () ] ]; - pattern_variable: [ + l2_pattern_variable: [ [ SYMBOL "\\NUM"; id = IDENT -> () | SYMBOL "\\IDENT"; id = IDENT -> () | SYMBOL "\\FRESH"; id = IDENT -> () ] ]; - magic_pattern: [ + l2_magic_pattern: [ [ SYMBOL "\\FOLD"; n = OPT NUMBER; [ IDENT "left" | IDENT "right" ]; - LIST1 IDENT; SYMBOL "."; p1 = pattern; - OPT [ SYMBOL "\\LAMBDA"; LIST1 IDENT ]; p2 = pattern -> + LIST1 IDENT; SYMBOL "."; p1 = l2_pattern; + OPT [ SYMBOL "\\LAMBDA"; LIST1 IDENT ]; p2 = l2_pattern -> () - | SYMBOL "\\DEFAULT"; id = IDENT; p1 = pattern; p2 = pattern -> () + | SYMBOL "\\DEFAULT"; id = IDENT; p1 = l2_pattern; p2 = l2_pattern -> () ] ]; - pattern: + l2_pattern: [ "letin" NONA [ IDENT "let"; var = possibly_typed_name; SYMBOL <:unicode> (* ≝ *); - p1 = pattern; "in"; p2 = pattern -> + p1 = l2_pattern; "in"; p2 = l2_pattern -> () | IDENT "let"; k = induction_kind; defs = let_defs; IDENT "in"; - body = pattern -> + body = l2_pattern -> () ] | "binder" RIGHTA - [ b = binder; bound_names; SYMBOL "."; body = pattern -> () ] + [ b = binder; bound_names; SYMBOL "."; body = l2_pattern -> () ] | "extension" [ ] | "apply" LEFTA - [ p1 = pattern; p2 = pattern -> () ] + [ p1 = l2_pattern; p2 = l2_pattern -> () ] | "simple" NONA [ i = IDENT -> () | i = IDENT; s = explicit_subst -> () @@ -223,29 +207,26 @@ GEXTEND Level2Parser | s = sort -> () | s = SYMBOL -> () | u = URI -> () - | outtyp = OPT [ SYMBOL "["; typ = pattern; SYMBOL "]" ]; - IDENT "match"; t = pattern; + | outtyp = OPT [ SYMBOL "["; typ = l2_pattern; SYMBOL "]" ]; + IDENT "match"; t = l2_pattern; indty_ident = OPT [ SYMBOL ":"; id = IDENT ]; IDENT "with"; SYMBOL "["; patterns = LIST0 [ lhs = match_pattern; SYMBOL <:unicode> (* ⇒ *); - rhs = pattern -> + rhs = l2_pattern -> () ] SEP SYMBOL "|"; SYMBOL "]" -> () - | SYMBOL "("; p1 = pattern; SYMBOL ":"; p2 = pattern; SYMBOL ")" -> () - | SYMBOL "("; p = pattern; SYMBOL ")" -> () - | v = pattern_variable -> () - | m = magic_pattern -> () + | SYMBOL "("; p1 = l2_pattern; SYMBOL ":"; p2 = l2_pattern; SYMBOL ")" -> + () + | SYMBOL "("; p = l2_pattern; SYMBOL ")" -> () + | v = l2_pattern_variable -> () + | m = l2_magic_pattern -> () ] ]; -END (* }}} *) - (* {{{ Grammar for interpretation, notation level 3 *) -GEXTEND Level3Parser - GLOBAL: level3_interpretation; level3_interpretation: [ [ i = interpretation -> () ] ]; argument: [ [ i = IDENT -> () @@ -253,7 +234,7 @@ GEXTEND Level3Parser | SYMBOL <:unicode> (* η *); i = IDENT; SYMBOL "."; a = SELF -> () ] ]; - term: [ + l3_term: [ [ u = URI -> () | a = argument -> () | SYMBOL "("; terms = LIST1 SELF; SYMBOL ")" -> () @@ -261,12 +242,12 @@ GEXTEND Level3Parser ]; interpretation: [ [ IDENT "interpretation"; s = SYMBOL; args = LIST1 argument; IDENT "as"; - t = term -> + t = l3_term -> () ] ]; -END (* }}} *) +END let exc_located_wrapper f = try @@ -280,17 +261,19 @@ let exc_located_wrapper f = let parse_syntax_pattern stream = exc_located_wrapper (fun () -> - (Level1Parser.Entry.parse level1_pattern (Level1Parser.parsable stream))) + (NotationGrammar.Entry.parse level1_pattern + (NotationGrammar.parsable stream))) let parse_ast_pattern stream = exc_located_wrapper (fun () -> - (Level2Parser.Entry.parse level2_pattern (Level2Parser.parsable stream))) + (NotationGrammar.Entry.parse level2_pattern + (NotationGrammar.parsable stream))) let parse_interpretation stream = exc_located_wrapper (fun () -> - (Level3Parser.Entry.parse level3_interpretation - (Level3Parser.parsable stream))) + (NotationGrammar.Entry.parse level3_interpretation + (NotationGrammar.parsable stream))) (* vim:set encoding=utf8 foldmethod=marker: *) diff --git a/helm/ocaml/cic_notation/test_lexer.ml b/helm/ocaml/cic_notation/test_lexer.ml index b41f7f6b8..386c0f1b9 100644 --- a/helm/ocaml/cic_notation/test_lexer.ml +++ b/helm/ocaml/cic_notation/test_lexer.ml @@ -26,19 +26,14 @@ let _ = let level = ref ~-1 in let ic = ref stdin in - let arg_spec = [ "-level", Arg.Set_int level, "set the notation level" ] in - let usage = "test_lexer -level { 1 | 2 | 3 } [ file ]" in + let arg_spec = [] in + let usage = "test_lexer [ file ]" in let open_file fname = if !ic <> stdin then close_in !ic; ic := open_in fname in Arg.parse arg_spec open_file usage; - let lexer = - match !level with - | 1 -> CicNotationLexer.syntax_pattern_lexer - | 2 -> CicNotationLexer.ast_pattern_lexer - | _ -> Arg.usage arg_spec usage; exit 1 - in + let lexer = CicNotationLexer.notation_lexer in let token_stream = fst (lexer.Token.tok_func (Stream.of_channel !ic)) in Printf.printf "Lexing notation level %d\n" !level; flush stdout; let rec dump () = -- 2.39.2