From 31d7f139796d6597915cd430baf37552dc26511c Mon Sep 17 00:00:00 2001 From: Stefano Zacchiroli Date: Tue, 17 May 2005 22:22:26 +0000 Subject: [PATCH] snapshot, notably: - solved factorization issues in level 1 - support for multiple grammars in cicNotationParser --- helm/ocaml/cic_notation/.depend | 4 - helm/ocaml/cic_notation/Makefile | 17 +-- helm/ocaml/cic_notation/cicNotationLexer.ml | 68 +++++------ helm/ocaml/cic_notation/cicNotationLexer.mli | 7 +- helm/ocaml/cic_notation/cicNotationParser.ml | 108 +++++++++++------- helm/ocaml/cic_notation/cicNotationParser.mli | 15 +-- helm/ocaml/cic_notation/test_lexer.ml | 43 ++++--- helm/ocaml/cic_notation/test_parser.ml | 54 +++++---- 8 files changed, 180 insertions(+), 136 deletions(-) diff --git a/helm/ocaml/cic_notation/.depend b/helm/ocaml/cic_notation/.depend index e0012b1af..e69de29bb 100644 --- a/helm/ocaml/cic_notation/.depend +++ b/helm/ocaml/cic_notation/.depend @@ -1,4 +0,0 @@ -cicNotationLexer.cmo: cicNotationLexer.cmi -cicNotationLexer.cmx: cicNotationLexer.cmi -cicNotationParser.cmo: cicNotationLexer.cmi cicNotationParser.cmi -cicNotationParser.cmx: cicNotationLexer.cmx cicNotationParser.cmi diff --git a/helm/ocaml/cic_notation/Makefile b/helm/ocaml/cic_notation/Makefile index 6b4d13c26..9fa417ef5 100644 --- a/helm/ocaml/cic_notation/Makefile +++ b/helm/ocaml/cic_notation/Makefile @@ -3,7 +3,6 @@ PACKAGE = cic_notation REQUIRES = \ helm-utf8_macros \ ulex -NOTATIONS = NULL = INTERFACE_FILES = \ cicNotationLexer.mli \ @@ -11,23 +10,19 @@ INTERFACE_FILES = \ $(NULL) IMPLEMENTATION_FILES = \ $(patsubst %.mli, %.ml, $(INTERFACE_FILES)) \ - $(patsubst %,%_notation.ml,$(NOTATIONS)) \ $(NULL) all: -cicNotationLexer.cmo: cicNotationLexer.ml +cicNotationLexer.cmo: cicNotationLexer.ml cicNotationLexer.cmi $(OCAMLC_P4) -c $< -cicNotationLexer.cmx: cicNotationLexer.ml +cicNotationLexer.cmx: cicNotationLexer.ml cicNotationLexer.cmi $(OCAMLOPT_P4) -c $< -cicNotationParser.cmo: cicNotationParser.ml +cicNotationParser.cmo: REQUIRES = helm-utf8_macros camlp4.gramlib +cicNotationParser.cmo: cicNotationParser.ml cicNotationParser.cmi cicNotationLexer.cmi $(OCAMLC_P4) -c $< -cicNotationParser.cmx: cicNotationParser.ml - $(OCAMLOPT_P4) -c $< - -%_notation.cmo: %_notation.ml - $(OCAMLC_P4) -c $< -%_notation.cmx: %_notation.ml +cicNotationParser.cmx: REQUIRES = helm-utf8_macros camlp4.gramlib +cicNotationParser.cmx: cicNotationParser.ml cicNotationParser.cmi cicNotationLexer.cmi $(OCAMLOPT_P4) -c $< LOCAL_LINKOPTS = -package helm-cic_notation -linkpkg diff --git a/helm/ocaml/cic_notation/cicNotationLexer.ml b/helm/ocaml/cic_notation/cicNotationLexer.ml index 6ff589384..306cf45b9 100644 --- a/helm/ocaml/cic_notation/cicNotationLexer.ml +++ b/helm/ocaml/cic_notation/cicNotationLexer.ml @@ -23,9 +23,9 @@ * http://helm.cs.unibo.it/ *) -exception Error of int * int * string +open Printf -exception Not_an_extended_ident +exception Error of int * int * string let regexp number = xml_digit+ @@ -41,40 +41,45 @@ let regexp implicit = '?' let regexp meta = implicit number let error lexbuf msg = - raise (Error (Ulexing.lexeme_start lexbuf, Ulexing.lexeme_end lexbuf, msg)) + let begin_cnum, end_cnum = Ulexing.loc lexbuf in + raise (Error (begin_cnum, end_cnum, msg)) let error_at_end lexbuf msg = - raise (Error (Ulexing.lexeme_end lexbuf, Ulexing.lexeme_end lexbuf, msg)) + let begin_cnum, end_cnum = Ulexing.loc lexbuf in + raise (Error (begin_cnum, end_cnum, msg)) let return lexbuf token = + let begin_cnum, end_cnum = Ulexing.loc lexbuf in + (* TODO handle line/column numbers *) let flocation_begin = - { Lexing.pos_fname = ""; Lexing.pos_lnum = -1; Lexing.pos_bol = -1; - Lexing.pos_cnum = Ulexing.lexeme_start lexbuf } - in - let flocation_end = - { flocation_begin with Lexing.pos_cnum = Ulexing.lexeme_end lexbuf } + { Lexing.pos_fname = ""; + Lexing.pos_lnum = -1; Lexing.pos_bol = -1; + Lexing.pos_cnum = begin_cnum } in + let flocation_end = { flocation_begin with Lexing.pos_cnum = end_cnum } in (token, (flocation_begin, flocation_end)) let remove_quotes s = String.sub s 1 (String.length s - 2) -let tok_func token stream = - let lexbuf = Ulexing.from_utf8_stream stream in - Token.make_stream_and_flocation - (fun () -> - try - token lexbuf - with - | Ulexing.Error -> error_at_end lexbuf "Unexpected character" - | Ulexing.InvalidCodepoint i -> error_at_end lexbuf "Invalid code point") - -let mk_lexer token = { - Token.tok_func = token; - Token.tok_using = (fun _ -> ()); - Token.tok_removing = (fun _ -> ()); - Token.tok_match = Token.default_match; - Token.tok_text = Token.lexer_text; - Token.tok_comm = None; -} +let mk_lexer token = + let tok_func stream = + let lexbuf = Ulexing.from_utf8_stream stream in + Token.make_stream_and_flocation + (fun () -> + try + token lexbuf + with + | Ulexing.Error -> error_at_end lexbuf "Unexpected character" + | Ulexing.InvalidCodepoint p -> + error_at_end lexbuf (sprintf "Invalid code point: %d" p)) + in + { + Token.tok_func = tok_func; + Token.tok_using = (fun _ -> ()); + Token.tok_removing = (fun _ -> ()); + Token.tok_match = Token.default_match; + Token.tok_text = Token.lexer_text; + Token.tok_comm = None; + } let expand_macro lexbuf = let macro = @@ -97,9 +102,6 @@ let rec level1_token = lexer | eof -> return lexbuf ("EOI", "") | _ -> return lexbuf ("SYMBOL", Ulexing.utf8_lexeme lexbuf) -let level1_tok_func stream = tok_func level1_token stream -let level1_lexer = mk_lexer level1_tok_func - (** {2 Level 2 lexer} *) let rec level2_token = lexer @@ -113,6 +115,8 @@ let rec level2_token = lexer | eof -> return lexbuf ("EOI", "") | _ -> return lexbuf ("SYMBOL", Ulexing.utf8_lexeme lexbuf) -let level2_tok_func stream = tok_func level2_token stream -let level2_lexer = mk_lexer level2_tok_func +(* API implementation *) + +let syntax_pattern_lexer = mk_lexer level1_token +let ast_pattern_lexer = mk_lexer level2_token diff --git a/helm/ocaml/cic_notation/cicNotationLexer.mli b/helm/ocaml/cic_notation/cicNotationLexer.mli index 61632667e..82731d904 100644 --- a/helm/ocaml/cic_notation/cicNotationLexer.mli +++ b/helm/ocaml/cic_notation/cicNotationLexer.mli @@ -23,11 +23,14 @@ * http://helm.cs.unibo.it/ *) + (** begin of error offset (counted in unicode codepoint) + * end of error offset (counted as above) + * error message *) exception Error of int * int * string (** lexer for concrete syntax patterns (notation level 1) *) -val level1_lexer: (string * string) Token.glexer +val syntax_pattern_lexer: (string * string) Token.glexer (** lexer for ast patterns (notation level 2) *) -val level2_lexer: (string * string) Token.glexer +val ast_pattern_lexer: (string * string) Token.glexer diff --git a/helm/ocaml/cic_notation/cicNotationParser.ml b/helm/ocaml/cic_notation/cicNotationParser.ml index 2b5598c42..ce9049005 100644 --- a/helm/ocaml/cic_notation/cicNotationParser.ml +++ b/helm/ocaml/cic_notation/cicNotationParser.ml @@ -27,9 +27,22 @@ open Printf exception Parse_error of Token.flocation * string -let grammar = Grammar.gcreate CicNotationLexer.level1_lexer - -let level1 = Grammar.Entry.create grammar "level1" +module Level1Lexer = +struct + type te = string * string + let lexer = CicNotationLexer.syntax_pattern_lexer +end +module Level1Parser = Grammar.GMake (Level1Lexer) + +module Level2Lexer = +struct + type te = string * string + let lexer = CicNotationLexer.ast_pattern_lexer +end +module Level2Parser = Grammar.GMake (Level2Lexer) + +let level1_pattern = Level1Parser.Entry.create "level1_pattern" +let level2_pattern = Level2Parser.Entry.create "level2_pattern" let return_term loc term = () @@ -43,10 +56,10 @@ let int_of_string s = with Failure _ -> failwith (sprintf "Lexer failure: string_of_int \"%s\" failed" s) -EXTEND - GLOBAL: level1; - - level1: [ [ p = pattern -> () ] ]; +(* {{{ Grammar for concrete syntax patterns, notation level 1 *) +GEXTEND Level1Parser + GLOBAL: level1_pattern; + level1_pattern: [ [ p = pattern -> () ] ]; pattern: [ [ p = LIST1 simple_pattern -> () ] ]; @@ -67,38 +80,46 @@ EXTEND ] ]; - layout_schemata: [ - [ SYMBOL "\\ARRAY"; p = simple_pattern; fsep = OPT field_sep; - rsep = OPT row_sep -> - () - | SYMBOL "\\FRAC"; p1 = simple_pattern; p2 = simple_pattern -> () - | SYMBOL "\\SQRT"; p = simple_pattern -> () - | SYMBOL "\\ROOT"; p1 = simple_pattern; SYMBOL "\\OF"; - p2 = simple_pattern -> - () - (* TODO XXX many issues here: - * - "^^" is lexed as two "^" symbols - * - "a_b" is lexed as IDENT "a_b" *) - | p1 = simple_pattern; SYMBOL "^"; p2 = simple_pattern -> () - | p1 = simple_pattern; SYMBOL "^"; SYMBOL "^"; p2 = simple_pattern -> () - | p1 = simple_pattern; SYMBOL "_"; p2 = simple_pattern -> () - | p1 = simple_pattern; SYMBOL "_"; SYMBOL "_"; p2 = simple_pattern -> () - ] - ]; + simple_pattern: + [ "infix" LEFTA + (* TODO XXX many issues here: + * - "^^" is lexed as two "^" symbols + * - "a_b" is lexed as IDENT "a_b" *) + [ p1 = SELF; SYMBOL "^"; p2 = SELF -> () + | p1 = SELF; SYMBOL "^"; SYMBOL "^"; p2 = SELF -> () + | p1 = SELF; SYMBOL "_"; p2 = SELF -> () + | p1 = SELF; SYMBOL "_"; SYMBOL "_"; p2 = SELF -> () + ] + | "simple" NONA + [ SYMBOL "\\LIST0"; p = SELF; sep = OPT sep -> () + | SYMBOL "\\LIST1"; p = SELF; sep = OPT sep -> () + | b = box_token -> () + | id = IDENT -> () + | SYMBOL "\\NUM"; id = IDENT -> () + | SYMBOL "\\IDENT"; id = IDENT -> () + | SYMBOL "\\OPT"; p = SELF -> () + | SYMBOL "\\ARRAY"; p = SELF; fsep = OPT field_sep; rsep = OPT row_sep -> + () + | SYMBOL "\\FRAC"; p1 = SELF; p2 = SELF -> () + | SYMBOL "\\SQRT"; p = SELF -> () + | SYMBOL "\\ROOT"; p1 = SELF; SYMBOL "\\OF"; p2 = SELF -> + () + | SYMBOL "["; p = pattern; SYMBOL "]" -> () + ] + ]; +END +(* }}} *) - simple_pattern: [ - [ SYMBOL "\\LIST0"; p = simple_pattern; sep = OPT sep -> () - | SYMBOL "\\LIST1"; p = simple_pattern; sep = OPT sep -> () - | b = box_token -> () - | id = IDENT -> () - | SYMBOL "\\NUM"; id = IDENT -> () - | SYMBOL "\\IDENT"; id = IDENT -> () - | SYMBOL "\\OPT"; p = simple_pattern -> () - | l = layout_schemata -> () - | SYMBOL "["; p = pattern; SYMBOL "]" -> () - ] - ]; +(* {{{ Grammar for ast patterns, notation level 2 *) +GEXTEND Level2Parser + GLOBAL: level2_pattern; + level2_pattern: [ [ p = pattern -> () ] ]; + + pattern: [ [ a = ast -> () ] ]; + + ast: [ [ SYMBOL "\\FOO" -> () ] ]; END +(* }}} *) let exc_located_wrapper f = try @@ -109,7 +130,14 @@ let exc_located_wrapper f = | Stdpp.Exc_located (floc, exn) -> raise (Parse_error (floc, (Printexc.to_string exn))) -let parse_level1_pattern stream = - exc_located_wrapper (fun () -> (Grammar.Entry.parse level1 stream)) +let parse_syntax_pattern stream = + exc_located_wrapper + (fun () -> + (Level1Parser.Entry.parse level1_pattern (Level1Parser.parsable stream))) + +let parse_ast_pattern stream = + exc_located_wrapper + (fun () -> + (Level2Parser.Entry.parse level2_pattern (Level2Parser.parsable stream))) -(* vim:set encoding=utf8: *) +(* vim:set encoding=utf8 foldmethod=marker: *) diff --git a/helm/ocaml/cic_notation/cicNotationParser.mli b/helm/ocaml/cic_notation/cicNotationParser.mli index 84ce24b14..fab4d4102 100644 --- a/helm/ocaml/cic_notation/cicNotationParser.mli +++ b/helm/ocaml/cic_notation/cicNotationParser.mli @@ -27,16 +27,9 @@ exception Parse_error of Token.flocation * string (** {2 Parsing functions} *) -val parse_level1_pattern: char Stream.t -> unit -(*val parse_level2_pattern: char Stream.t -> unit*) + (** concrete syntax pattern: notation level 1 *) +val parse_syntax_pattern: char Stream.t -> unit -(** {2 Grammar extensions} *) - -(*val term: CicAst.term Grammar.Entry.e |+* recursive rule +|*) -(*val term0: CicAst.term Grammar.Entry.e |+* top level rule +|*) - -(*val return_term: CicAst.location -> CicAst.term -> CicAst.term*) - - (** raise a parse error *) -(*val fail: CicAst.location -> string -> 'a*) + (** AST pattern: notation level 2 *) +val parse_ast_pattern: char Stream.t -> unit diff --git a/helm/ocaml/cic_notation/test_lexer.ml b/helm/ocaml/cic_notation/test_lexer.ml index 7672d3070..b41f7f6b8 100644 --- a/helm/ocaml/cic_notation/test_lexer.ml +++ b/helm/ocaml/cic_notation/test_lexer.ml @@ -23,20 +23,31 @@ * http://helm.cs.unibo.it/ *) -let ic = +let _ = + let level = ref ~-1 in + let ic = ref stdin in + let arg_spec = [ "-level", Arg.Set_int level, "set the notation level" ] in + let usage = "test_lexer -level { 1 | 2 | 3 } [ file ]" in + let open_file fname = + if !ic <> stdin then close_in !ic; + ic := open_in fname + in + Arg.parse arg_spec open_file usage; + let lexer = + match !level with + | 1 -> CicNotationLexer.syntax_pattern_lexer + | 2 -> CicNotationLexer.ast_pattern_lexer + | _ -> Arg.usage arg_spec usage; exit 1 + in + let token_stream = fst (lexer.Token.tok_func (Stream.of_channel !ic)) in + Printf.printf "Lexing notation level %d\n" !level; flush stdout; + let rec dump () = + let (a,b) = Stream.next token_stream in + if a = "EOI" then raise Stream.Failure; + print_endline (Printf.sprintf "%s '%s'" a b); + dump () + in try - open_in Sys.argv.(1) - with Invalid_argument _ -> stdin -in -let token_stream = - fst (CicNotationLexer.level1_lexer.Token.tok_func (Stream.of_channel ic)) -in -let rec dump () = - let (a,b) = Stream.next token_stream in - if a = "EOI" then raise Stream.Failure; - print_endline (Printf.sprintf "%s '%s'" a b); - dump () -in -try - dump () -with Stream.Failure -> () + dump () + with Stream.Failure -> () + diff --git a/helm/ocaml/cic_notation/test_parser.ml b/helm/ocaml/cic_notation/test_parser.ml index 21315c77f..61fd56d73 100644 --- a/helm/ocaml/cic_notation/test_parser.ml +++ b/helm/ocaml/cic_notation/test_parser.ml @@ -25,25 +25,39 @@ open Printf +(* cut and past from CicAst.loc_of_floc *) +let loc_of_floc = function + | { Lexing.pos_cnum = loc_begin }, { Lexing.pos_cnum = loc_end } -> + (loc_begin, loc_end) + let _ = - let ic = stdin in - try - while true do - let line = input_line ic in - let istream = Stream.of_string line in - try - let _ = CicNotationParser.parse_level1_pattern istream in - print_endline "ok" - with - | CicNotationParser.Parse_error (floc, msg) -> - eprintf "parse error: %s\n" msg; flush stderr -(* let (x, y) = CicAst.loc_of_floc floc in*) -(* let before = String.sub line 0 x in*) -(* let error = String.sub line x (y - x) in*) -(* let after = String.sub line y (String.length line - y) in*) -(* eprintf "%s%s%s\n" before error after;*) -(* prerr_endline (sprintf "at character %d-%d: %s" x y msg)*) - done - with End_of_file -> - close_in ic + let level = ref ~-1 in + let arg_spec = [ "-level", Arg.Set_int level, "set the notation level" ] in + let usage = "test_parser -level { 1 | 2 | 3 }" in + Arg.parse arg_spec (fun _ -> raise (Arg.Bad usage)) usage; + let parse_fun = + match !level with + | 1 -> CicNotationParser.parse_syntax_pattern + | 2 -> CicNotationParser.parse_ast_pattern + | _ -> Arg.usage arg_spec usage; exit 1 + in + let ic = stdin in + try + printf "Parsing notation level %d\n" !level; flush stdout; + while true do + let line = input_line ic in + let istream = Stream.of_string line in + try + let _ = parse_fun istream in + print_endline "ok" + with CicNotationParser.Parse_error (floc, msg) -> + let (x, y) = loc_of_floc floc in + let before = String.sub line 0 x in + let error = String.sub line x (y - x) in + let after = String.sub line y (String.length line - y) in + eprintf "%s%s%s\n" before error after; + prerr_endline (sprintf "at character %d-%d: %s" x y msg) + done + with End_of_file -> + close_in ic -- 2.39.2