From 5a40de00f69847c247100b0bde4569cb4003e316 Mon Sep 17 00:00:00 2001 From: Stefano Zacchiroli Date: Wed, 18 May 2005 10:53:29 +0000 Subject: [PATCH] snapshot, notably: - fixed some lexer bugs - implemented level 2 patterns grammar --- helm/ocaml/cic_notation/cicNotationLexer.ml | 9 +- helm/ocaml/cic_notation/cicNotationParser.ml | 172 +++++++++++++++---- 2 files changed, 151 insertions(+), 30 deletions(-) diff --git a/helm/ocaml/cic_notation/cicNotationLexer.ml b/helm/ocaml/cic_notation/cicNotationLexer.ml index 306cf45b9..9721ee805 100644 --- a/helm/ocaml/cic_notation/cicNotationLexer.ml +++ b/helm/ocaml/cic_notation/cicNotationLexer.ml @@ -40,6 +40,12 @@ let regexp keyword = '"' ident '"' let regexp implicit = '?' let regexp meta = implicit number +let regexp uri = + ("cic:/" | "theory:/") (* schema *) + ident ('/' ident)* (* path *) + ('.' ident)+ (* ext *) + ("#xpointer(" number ('/' number)+ ")")? (* xpointer *) + let error lexbuf msg = let begin_cnum, end_cnum = Ulexing.loc lexbuf in raise (Error (begin_cnum, end_cnum, msg)) @@ -105,13 +111,14 @@ let rec level1_token = lexer (** {2 Level 2 lexer} *) let rec level2_token = lexer - | xml_blank+ -> level1_token lexbuf + | xml_blank+ -> level2_token lexbuf | meta -> return lexbuf ("META", Ulexing.utf8_lexeme lexbuf) | implicit -> return lexbuf ("IMPLICIT", Ulexing.utf8_lexeme lexbuf) | ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf) | number -> return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf) | keyword -> return lexbuf (keyword lexbuf) | tex_token -> return lexbuf (expand_macro lexbuf) + | uri -> return lexbuf ("URI", Ulexing.utf8_lexeme lexbuf) | eof -> return lexbuf ("EOI", "") | _ -> return lexbuf ("SYMBOL", Ulexing.utf8_lexeme lexbuf) diff --git a/helm/ocaml/cic_notation/cicNotationParser.ml b/helm/ocaml/cic_notation/cicNotationParser.ml index ce9049005..bc33a815d 100644 --- a/helm/ocaml/cic_notation/cicNotationParser.ml +++ b/helm/ocaml/cic_notation/cicNotationParser.ml @@ -60,50 +60,54 @@ let int_of_string s = GEXTEND Level1Parser GLOBAL: level1_pattern; level1_pattern: [ [ p = pattern -> () ] ]; - pattern: [ [ p = LIST1 simple_pattern -> () ] ]; - literal: [ [ s = SYMBOL -> () | k = KEYWORD -> () ] ]; - sep: [ [ SYMBOL "\\SEP"; sep = literal -> () ] ]; row_sep: [ [ SYMBOL "\\ROWSEP"; sep = literal -> () ] ]; field_sep: [ [ SYMBOL "\\FIELDSEP"; sep = literal -> () ] ]; - - box_token: [ + box_pattern: [ [ SYMBOL "\\HBOX"; p = simple_pattern -> () | SYMBOL "\\VBOX"; p = simple_pattern -> () | SYMBOL "\\BREAK" -> () ] ]; - + magic_pattern: [ + [ SYMBOL "\\LIST0"; p = simple_pattern; sep = OPT sep -> () + | SYMBOL "\\LIST1"; p = simple_pattern; sep = OPT sep -> () + | SYMBOL "\\OPT"; p = simple_pattern -> () + ] + ]; + pattern_variable: [ + [ id = IDENT -> () + | SYMBOL "\\NUM"; id = IDENT -> () + | SYMBOL "\\IDENT"; id = IDENT -> () + ] + ]; simple_pattern: - [ "infix" LEFTA - (* TODO XXX many issues here: - * - "^^" is lexed as two "^" symbols - * - "a_b" is lexed as IDENT "a_b" *) - [ p1 = SELF; SYMBOL "^"; p2 = SELF -> () - | p1 = SELF; SYMBOL "^"; SYMBOL "^"; p2 = SELF -> () - | p1 = SELF; SYMBOL "_"; p2 = SELF -> () - | p1 = SELF; SYMBOL "_"; SYMBOL "_"; p2 = SELF -> () - ] - | "simple" NONA - [ SYMBOL "\\LIST0"; p = SELF; sep = OPT sep -> () - | SYMBOL "\\LIST1"; p = SELF; sep = OPT sep -> () - | b = box_token -> () - | id = IDENT -> () - | SYMBOL "\\NUM"; id = IDENT -> () - | SYMBOL "\\IDENT"; id = IDENT -> () - | SYMBOL "\\OPT"; p = SELF -> () + [ "layout" LEFTA + [ p1 = SELF; SYMBOL "\\SUB"; p2 = SELF -> () + | p1 = SELF; SYMBOL "\\SUP"; p2 = SELF -> () + | p1 = SELF; SYMBOL "\\BELOW"; p2 = SELF -> () + | p1 = SELF; SYMBOL "\\ABOVE"; p2 = SELF -> () + | SYMBOL "["; p1 = pattern; SYMBOL "\\OVER"; p2 = pattern; SYMBOL "]" -> + () + | SYMBOL "["; p1 = pattern; SYMBOL "\\ATOP"; p2 = pattern; SYMBOL "]" -> + () | SYMBOL "\\ARRAY"; p = SELF; fsep = OPT field_sep; rsep = OPT row_sep -> () | SYMBOL "\\FRAC"; p1 = SELF; p2 = SELF -> () | SYMBOL "\\SQRT"; p = SELF -> () - | SYMBOL "\\ROOT"; p1 = SELF; SYMBOL "\\OF"; p2 = SELF -> - () + | SYMBOL "\\ROOT"; arg = pattern; SYMBOL "\\OF"; index = SELF -> () + ] + | "simple" NONA + [ m = magic_pattern -> () + | v = pattern_variable -> () + | b = box_pattern -> () + | n = NUMBER -> () | SYMBOL "["; p = pattern; SYMBOL "]" -> () ] ]; @@ -114,10 +118,120 @@ END GEXTEND Level2Parser GLOBAL: level2_pattern; level2_pattern: [ [ p = pattern -> () ] ]; - - pattern: [ [ a = ast -> () ] ]; - - ast: [ [ SYMBOL "\\FOO" -> () ] ]; + sort: [ + [ SYMBOL "\\PROP" -> () + | SYMBOL "\\SET" -> () + | SYMBOL "\\TYPE" -> () + ] + ]; + explicit_subst: [ + [ (* TODO explicit substitution *) + ] + ]; + meta_subst: [ + [ (* TODO meta substitution *) + ] + ]; + possibly_typed_name: [ + [ SYMBOL "("; i = IDENT; SYMBOL ":"; typ = pattern; SYMBOL ")" -> () + | i = IDENT -> () + ] + ]; + match_pattern: [ + [ n = IDENT -> () + | SYMBOL "("; head = IDENT; vars = LIST1 possibly_typed_name; SYMBOL ")" -> + () + ] + ]; + binder: [ + [ SYMBOL <:unicode> (* Π *) -> () + | SYMBOL <:unicode> (* ∃ *) -> () + | SYMBOL <:unicode> (* ∀ *) -> () + | SYMBOL <:unicode> (* λ *) -> () + ] + ]; + bound_names: [ + [ vars = LIST1 IDENT SEP SYMBOL ","; + typ = OPT [ SYMBOL ":"; p = pattern -> () ] -> () + | LIST1 [ + SYMBOL "("; vars = LIST1 IDENT SEP SYMBOL ","; + typ = OPT [ SYMBOL ":"; p = pattern -> () ]; SYMBOL ")" -> () + ] -> + () + ] + ]; + induction_kind: [ + [ IDENT "rec" -> () + | IDENT "corec" -> () + ] + ]; + let_defs: [ + [ defs = LIST1 [ + name = IDENT; args = bound_names; + index_name = OPT [ IDENT "on"; idx = IDENT -> () ]; + ty = OPT [ SYMBOL ":" ; p = pattern -> () ]; + SYMBOL <:unicode> (* ≝ *); body = pattern -> + () + ] SEP IDENT "and" -> + () + ] + ]; + pattern_variable: [ + [ SYMBOL "\\NUM"; id = IDENT -> () + | SYMBOL "\\IDENT"; id = IDENT -> () + | SYMBOL "\\FRESH"; id = IDENT -> () + ] + ]; + magic_pattern: [ + [ SYMBOL "\\FOLD"; n = OPT NUMBER; [ IDENT "left" | IDENT "right" ]; + LIST1 IDENT; SYMBOL "."; p1 = pattern; + OPT [ SYMBOL "\\LAMBDA"; LIST1 IDENT ]; p2 = pattern -> + () + | SYMBOL "\\DEFAULT"; id = IDENT; p1 = pattern; p2 = pattern -> () + ] + ]; + pattern: + [ "letin" NONA + [ IDENT "let"; var = possibly_typed_name; SYMBOL <:unicode> (* ≝ *); + p1 = pattern; "in"; p2 = pattern -> + () + | IDENT "let"; k = induction_kind; defs = let_defs; IDENT "in"; + body = pattern -> + () + ] + | "binder" RIGHTA + [ b = binder; bound_names; SYMBOL "."; body = pattern -> () ] + | "extension" + [ ] + | "apply" LEFTA + [ p1 = pattern; p2 = pattern -> () ] + | "simple" NONA + [ i = IDENT -> () + | i = IDENT; s = explicit_subst -> () + | n = NUMBER -> () + | IMPLICIT -> () + | m = META -> () + | m = META; s = meta_subst -> () + | s = sort -> () + | s = SYMBOL -> () + | u = URI -> () + | outtyp = OPT [ SYMBOL "["; typ = pattern; SYMBOL "]" ]; + IDENT "match"; t = pattern; + indty_ident = OPT [ SYMBOL ":"; id = IDENT ]; + IDENT "with"; SYMBOL "["; + patterns = LIST0 [ + lhs = match_pattern; SYMBOL <:unicode> (* ⇒ *); + rhs = pattern -> + () + ] SEP SYMBOL "|"; + SYMBOL "]" -> + () + | SYMBOL "("; p1 = pattern; SYMBOL ":"; p2 = pattern; SYMBOL ")" -> () + | SYMBOL "("; p = pattern; SYMBOL ")" -> () + | v = pattern_variable -> () + | m = magic_pattern -> () + ] + ]; END (* }}} *) -- 2.39.2