From: Stefano Zacchiroli Date: Mon, 15 Dec 2003 23:00:39 +0000 (+0000) Subject: new experimental cic textual parser: checkin X-Git-Tag: V_0_2_2~14 X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=commitdiff_plain;h=0148419c577eab74538b8e2564a64e399d8bdd65;p=helm.git new experimental cic textual parser: checkin --- diff --git a/helm/ocaml/cic_disambiguation/.cvsignore b/helm/ocaml/cic_disambiguation/.cvsignore new file mode 100644 index 000000000..b94ae04d1 --- /dev/null +++ b/helm/ocaml/cic_disambiguation/.cvsignore @@ -0,0 +1,7 @@ +*.cma +*.cmo +*.cmi +*.cmx +*.cmxa +test_lexer +test_parser diff --git a/helm/ocaml/cic_disambiguation/.depend b/helm/ocaml/cic_disambiguation/.depend new file mode 100644 index 000000000..16d57692a --- /dev/null +++ b/helm/ocaml/cic_disambiguation/.depend @@ -0,0 +1,9 @@ +pp.cmi: ast.cmi +pp.cmo: ast.cmi pp.cmi +pp.cmx: ast.cmi pp.cmi +macro.cmo: macro.cmi +macro.cmx: macro.cmi +lexer.cmo: macro.cmi lexer.cmi +lexer.cmx: macro.cmx lexer.cmi +parser.cmo: ast.cmi lexer.cmi +parser.cmx: ast.cmi lexer.cmx diff --git a/helm/ocaml/cic_disambiguation/Makefile b/helm/ocaml/cic_disambiguation/Makefile new file mode 100644 index 000000000..4827da666 --- /dev/null +++ b/helm/ocaml/cic_disambiguation/Makefile @@ -0,0 +1,47 @@ + +PACKAGE = cic_textual_parser2 +REQUIRES = ulex camlp4 pxp +INTERFACE_FILES = ast.mli pp.mli macro.mli lexer.mli +NOTATIONS = logic arit +IMPLEMENTATION_FILES = \ + pp.ml macro.ml lexer.ml parser.ml \ + $(patsubst %,%_notation.ml,$(NOTATIONS)) + +ULEXDIR := $(shell ocamlfind query ulex) + +LEXER_P4_OPTS = -I $(ULEXDIR) pa_ulex.cma +PARSER_P4_OPTS = pa_extend.cmo ./macro.cmo ./pa_unicode_macro.cmo +PA_P4_OPTS = q_MLast.cmo pa_extend.cmo + +include ../Makefile.common + +lexer.cmo: lexer.ml + $(OCAMLC) -pp "camlp4o $(LEXER_P4_OPTS)" -c $< +parser.cmo: parser.ml macro.cmo pa_unicode_macro.cmo + $(OCAMLC) -pp "camlp4o $(PARSER_P4_OPTS)" -c $< + +%_notation.cmo: %_notation.ml parser.cmo + $(OCAMLC) -pp "camlp4o $(PARSER_P4_OPTS)" -c $< + +pa_unicode_macro.cmo: pa_unicode_macro.ml macro.cmo + $(OCAMLC) -pp "camlp4o $(PA_P4_OPTS)" -c $< + +LOCAL_LINKOPTS = -linkpkg gramlib.cma $(PACKAGE).cma +test: test_lexer test_parser +test_lexer: test_lexer.ml $(PACKAGE).cma + $(OCAMLC) $(LOCAL_LINKOPTS) -o $@ $< +test_parser: test_parser.ml $(PACKAGE).cma + $(OCAMLC) $(LOCAL_LINKOPTS) -o $@ $< +make_table: make_table.ml + $(OCAMLC) -linkpkg -o $@ $< + +.PHONY: macro_table.dump +macro_table.dump: make_table + ./make_table $@ + +clean: extra_clean +distclean: extra_clean + rm -f macro_table.dump +extra_clean: + rm -f test_lexer test_parser make_table + diff --git a/helm/ocaml/cic_disambiguation/arit_notation.ml b/helm/ocaml/cic_disambiguation/arit_notation.ml new file mode 100644 index 000000000..522021f5f --- /dev/null +++ b/helm/ocaml/cic_disambiguation/arit_notation.ml @@ -0,0 +1,27 @@ +open Ast +open Parser + +EXTEND + term: LEVEL "add" + [ + [ t1 = term; SYMBOL "+"; t2 = term -> + return_term loc (Appl [Ident ("plus", []); t1; t2]) + | t1 = term; SYMBOL "-"; t2 = term -> + return_term loc (Appl [Ident ("minus", []); t1; t2]) + ] + ]; + term: LEVEL "mult" + [ + [ t1 = term; SYMBOL "*"; t2 = term -> + return_term loc (Appl [Ident ("times", []); t1; t2]) + | t1 = term; SYMBOL "/"; t2 = term -> + return_term loc (Appl [Ident ("div", []); t1; t2]) + ] + ]; + term: LEVEL "inv" + [ + [ SYMBOL "-"; t = term -> + return_term loc (Appl [Ident ("uminus", []); t]) + ] + ]; +END diff --git a/helm/ocaml/cic_disambiguation/ast.mli b/helm/ocaml/cic_disambiguation/ast.mli new file mode 100644 index 000000000..d579ccdd3 --- /dev/null +++ b/helm/ocaml/cic_disambiguation/ast.mli @@ -0,0 +1,107 @@ + + (* when an 'ident option is None, the default is to apply the tactic + to the current goal *) + +type reduction_kind = [ `Reduce | `Simpl | `Whd ] + +type 'term pattern = + | Pattern of 'term + +type location = int * int + +type ('term, 'ident) tactic = + | LocatedTactic of location * ('term, 'ident) tactic + + | Absurd + | Apply of 'term + | Assumption + | Change of 'term * 'term * 'ident option (* what, with what, where *) + | Change_pattern of 'term pattern * 'term * 'ident option + (* what, with what, where *) + | Contradiction + | Cut of 'term + | Decompose of 'ident * 'ident list (* which hypothesis, which principles *) + | Discriminate of 'ident + | Elim of 'term * 'term option (* what to elim, which principle to use *) + | ElimType of 'term + | Exact of 'term + | Exists + | Fold of reduction_kind * 'term + | Fourier + | Injection of 'ident + | Intros of int option + | Left + | LetIn of 'term * 'ident + | Named_intros of 'ident list + | Reduce of reduction_kind * 'term pattern * 'ident option (* what, where *) + | Reflexivity + | Replace of 'term * 'term (* what, with what *) + | Replace_pattern of 'term pattern * 'term + | RewriteLeft of 'term * 'ident option + | RewriteRight of 'term * 'ident option + | Right + | Ring + | Split + | Symmetry + | Transitivity of 'term + +type 'tactic tactical = + | LocatedTactical of location * 'tactic tactical + + | Fail + | For of int * 'tactic tactical + | IdTac + | Repeat of 'tactic tactical + | Seq of 'tactic tactical list (* sequential composition *) + | Tactic of 'tactic + | Then of 'tactic tactical * 'tactic tactical list + | Tries of 'tactic tactical list + (* try a sequence of tacticals until one succeeds, fail otherwise *) + | Try of 'tactic tactical (* try a tactical and mask failures *) + +type binder_kind = [ `Lambda | `Pi | `Exists | `Forall ] + +type case_pattern = string list + +type term = + | LocatedTerm of location * term + + | Appl of term list + | Binder of binder_kind * string * term option * term + (* kind, name, type, body *) + | Case of term * term option * (case_pattern * term) list + (* what to match, case type, list *) + | LetIn of string * term * term (* name, body, where *) + | LetRec of (string * term * term option * int) list * term + (* (name, body, type, decreasing argument) list, where *) + | Ident of string * subst list + | Meta of string * meta_subst list + | Int of int + +and meta_subst = term option +and subst = string * term + +(* +type cexpr = + | Symbol of string option * string * (subst option) * string option + (* h:xref, name, subst, definitionURL *) + | LocalVar of string option * string (* h:xref, name *) + | Meta of string option * string * meta_subst (* h:xref, name, meta_subst *) + | Num of string option * string (* h:xref, value *) + | Appl of string option * cexpr list (* h:xref, args *) + | Binder of string option *string * decl * cexpr + (* h:xref, name, decl, body *) + | Letin of string option * def * cexpr (* h:xref, def, body *) + | Letrec of string option * def list * cexpr (* h:xref, def list, body *) + | Case of string option * cexpr * ((string * cexpr) list) + (* h:xref, case_expr, named-pattern list *) +and + decl = string * cexpr (* name, type *) +and + def = string * cexpr (* name, body *) +and + subst = (UriManager.uri * cexpr) list +and + meta_subst = cexpr option list +*) + diff --git a/helm/ocaml/cic_disambiguation/lexer.ml b/helm/ocaml/cic_disambiguation/lexer.ml new file mode 100644 index 000000000..79003792b --- /dev/null +++ b/helm/ocaml/cic_disambiguation/lexer.ml @@ -0,0 +1,71 @@ + +exception Error of int * int * string + +let regexp alpha = [ 'a' - 'z' 'A' - 'Z' ] +let regexp digit = [ '0' - '9' ] +let regexp blank = [ ' ' '\t' '\n' ] + +let regexp blanks = blank+ +let regexp num = digit+ +let regexp ident = alpha (alpha | num)* +let regexp symbol = [^ 'a' - 'z' 'A' - 'Z' '0' - '9' ' ' '\t' '\n' ] +let regexp tex_token = '\\' alpha+ +let regexp lparen = [ '(' '[' '{' ] +let regexp rparen = [ ')' ']' '}' ] +let regexp meta = '?' (alpha | num)+ + +let keywords = Hashtbl.create 17 +let _ = + List.iter (fun keyword -> Hashtbl.add keywords keyword ("", keyword)) + [ "Prop"; "Type"; "Set"; "let"; "rec"; "using"; "match"; "with" ] + +let error lexbuf msg = + raise (Error (Ulexing.lexeme_start lexbuf, Ulexing.lexeme_end lexbuf, msg)) +let error_at_end lexbuf msg = + raise (Error (Ulexing.lexeme_end lexbuf, Ulexing.lexeme_end lexbuf, msg)) + +let return lexbuf token = (token, Ulexing.loc lexbuf) + +let rec token = lexer +| blanks -> token lexbuf +| ident -> + let lexeme = Ulexing.utf8_lexeme lexbuf in + (try + return lexbuf (Hashtbl.find keywords lexeme) + with Not_found -> + return lexbuf ("IDENT", lexeme)) +| num -> return lexbuf ("INT", Ulexing.utf8_lexeme lexbuf) +| lparen -> return lexbuf ("LPAREN", Ulexing.utf8_lexeme lexbuf) +| rparen -> return lexbuf ("RPAREN", Ulexing.utf8_lexeme lexbuf) +| meta -> return lexbuf ("META", Ulexing.utf8_lexeme lexbuf) +| symbol -> return lexbuf ("SYMBOL", Ulexing.utf8_lexeme lexbuf) +| tex_token -> + let macro = + Ulexing.utf8_sub_lexeme lexbuf 1 (Ulexing.lexeme_length lexbuf - 1) + in + (try + return lexbuf ("SYMBOL", Macro.expand macro) + with Macro.Macro_not_found _ -> + return lexbuf ("SYMBOL", Ulexing.utf8_lexeme lexbuf)) +| eof -> return lexbuf ("EOI", "") +| _ -> error lexbuf "Invalid character" + +let tok_func stream = + let lexbuf = Ulexing.from_utf8_stream stream in + Token.make_stream_and_location + (fun () -> + try + token lexbuf + with + | Ulexing.Error -> error_at_end lexbuf "Unexpected character" + | Ulexing.InvalidCodepoint i -> error_at_end lexbuf "Invalid code point") + +let lex = + { + Token.tok_func = tok_func; + Token.tok_using = (fun _ -> ()); + Token.tok_removing = (fun _ -> ()); + Token.tok_match = Token.default_match; + Token.tok_text = Token.lexer_text; + Token.tok_comm = None; + } diff --git a/helm/ocaml/cic_disambiguation/lexer.mli b/helm/ocaml/cic_disambiguation/lexer.mli new file mode 100644 index 000000000..f8fe7c955 --- /dev/null +++ b/helm/ocaml/cic_disambiguation/lexer.mli @@ -0,0 +1,5 @@ + +exception Error of int * int * string + +val lex : (string * string) Token.glexer + diff --git a/helm/ocaml/cic_disambiguation/logic_notation.ml b/helm/ocaml/cic_disambiguation/logic_notation.ml new file mode 100644 index 000000000..35ce4b90c --- /dev/null +++ b/helm/ocaml/cic_disambiguation/logic_notation.ml @@ -0,0 +1,23 @@ +open Ast +open Parser + +EXTEND + term: LEVEL "add" + [ + [ t1 = term; SYMBOL "∨"; t2 = term -> + return_term loc (Appl [Ident ("or", []); t1; t2]) + ] + ]; + term: LEVEL "mult" + [ + [ t1 = term; SYMBOL "∧"; t2 = term -> + return_term loc (Appl [Ident ("and", []); t1; t2]) + ] + ]; + term: LEVEL "inv" + [ + [ SYMBOL "¬"; t = term -> + return_term loc (Appl [Ident ("not", []); t]) + ] + ]; +END diff --git a/helm/ocaml/cic_disambiguation/macro.ml b/helm/ocaml/cic_disambiguation/macro.ml new file mode 100644 index 000000000..fd93aa9ae --- /dev/null +++ b/helm/ocaml/cic_disambiguation/macro.ml @@ -0,0 +1,28 @@ + +exception Macro_not_found of string +exception Utf8_not_found of string + +let dump_file = "macro_table.dump" + +let init () = + let ic = open_in dump_file in + let (macro2utf8, utf82macro): + ((string, string) Hashtbl.t * (string, string) Hashtbl.t) + = + Marshal.from_channel ic + in + close_in ic; + (macro2utf8, utf82macro) + +let (macro2utf8, utf82macro) = init () + +let expand macro = + try + Hashtbl.find macro2utf8 macro + with Not_found -> raise (Macro_not_found macro) + +let contract utf8 = + try + Hashtbl.find utf82macro utf8 + with Not_found -> raise (Utf8_not_found utf8) + diff --git a/helm/ocaml/cic_disambiguation/macro.mli b/helm/ocaml/cic_disambiguation/macro.mli new file mode 100644 index 000000000..5f8cf5994 --- /dev/null +++ b/helm/ocaml/cic_disambiguation/macro.mli @@ -0,0 +1,10 @@ + +exception Macro_not_found of string +exception Utf8_not_found of string + + (* @param macro name + @return utf8 string *) +val expand: string -> string + +val contract: string -> string + diff --git a/helm/ocaml/cic_disambiguation/macro_table.dump b/helm/ocaml/cic_disambiguation/macro_table.dump new file mode 100644 index 000000000..627b86b2a Binary files /dev/null and b/helm/ocaml/cic_disambiguation/macro_table.dump differ diff --git a/helm/ocaml/cic_disambiguation/macros/dictionary-tex.xml b/helm/ocaml/cic_disambiguation/macros/dictionary-tex.xml new file mode 100644 index 000000000..47995454f --- /dev/null +++ b/helm/ocaml/cic_disambiguation/macros/dictionary-tex.xml @@ -0,0 +1,378 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/helm/ocaml/cic_disambiguation/macros/entities-table.xml b/helm/ocaml/cic_disambiguation/macros/entities-table.xml new file mode 100644 index 000000000..ca0bdabcf --- /dev/null +++ b/helm/ocaml/cic_disambiguation/macros/entities-table.xml @@ -0,0 +1,2081 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/helm/ocaml/cic_disambiguation/macros/extra-entities.xml b/helm/ocaml/cic_disambiguation/macros/extra-entities.xml new file mode 100644 index 000000000..991c2d84b --- /dev/null +++ b/helm/ocaml/cic_disambiguation/macros/extra-entities.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/helm/ocaml/cic_disambiguation/make_table.ml b/helm/ocaml/cic_disambiguation/make_table.ml new file mode 100644 index 000000000..770fe2b05 --- /dev/null +++ b/helm/ocaml/cic_disambiguation/make_table.ml @@ -0,0 +1,88 @@ + +open Printf +open Pxp_types +open Pxp_ev_parser + +(* Usage: make_table *) + +let debug = false +let debug_print s = if debug then prerr_endline s + +let tables = [ +(* + `Entities, "/usr/share/gtkmathview/entities-table.xml"; + `Dictionary, "/usr/share/editex/dictionary-tex.xml" +*) + `Entities, "macros/entities-table.xml"; + `Dictionary, "macros/dictionary-tex.xml"; + `Entities, "macros/extra-entities.xml"; +] + +let macro2utf8 = Hashtbl.create 2000 +let utf82macro = Hashtbl.create 2000 + +let add_macro macro utf8 = + debug_print (sprintf "Adding macro %s = '%s'" macro utf8); + Hashtbl.add macro2utf8 macro utf8; + Hashtbl.add utf82macro utf8 macro + +let rec find_first_tag pull_parser = + match pull_parser () with + | Some (E_start_tag _ as e) -> e + | None -> assert false + | _ -> find_first_tag pull_parser + +let iter_entities_file f pull_parser = + ignore (find_first_tag pull_parser); (* *) + let rec aux () = + match pull_parser () with + | Some (E_start_tag ("entity", attrs, _)) -> + (try + let name = List.assoc "name" attrs in + let value = List.assoc "value" attrs in + f name value + with Not_found -> ()); + aux () + | None -> () + | _ -> aux () + in + aux () + +let iter_dictionary_file f pull_parser = + ignore (find_first_tag pull_parser); (* *) + let rec aux () = + match pull_parser () with + | Some (E_start_tag ("entry", attrs, _)) -> + (try + let name = List.assoc "name" attrs in + let value = List.assoc "val" attrs in + f name value + with Not_found -> ()); + aux () + | None -> () + | _ -> aux () + in + aux () + +let fill_table () = + List.iter + (fun (typ, fname) -> + let entry = `Entry_document [ `Extend_dtd_fully; `Parse_xml_decl ] in + let config = { default_config with encoding = `Enc_utf8 } in + let entity_manager = + create_entity_manager ~is_document:true config (from_file fname) + in + let pull_parser = create_pull_parser config entry entity_manager in + match typ with + | `Entities -> iter_entities_file add_macro pull_parser + | `Dictionary -> iter_dictionary_file add_macro pull_parser) + tables + +let main () = + let oc = open_out Sys.argv.(1) in + fill_table (); + Marshal.to_channel oc (macro2utf8, utf82macro) []; + close_out oc + +let _ = main () + diff --git a/helm/ocaml/cic_disambiguation/pa_unicode_macro.ml b/helm/ocaml/cic_disambiguation/pa_unicode_macro.ml new file mode 100644 index 000000000..044f8ae02 --- /dev/null +++ b/helm/ocaml/cic_disambiguation/pa_unicode_macro.ml @@ -0,0 +1,36 @@ + +let debug = false +let debug_print = if debug then prerr_endline else ignore + +let loc = (0, 0) + +let expand_unicode_macro macro = + debug_print (Printf.sprintf "Expanding macro '%s' ..." macro); + let expansion = Macro.expand macro in + <:expr< $str:expansion$ >> + +let _ = + Quotation.add "unicode" + (Quotation.ExAst (expand_unicode_macro, (fun _ -> assert false))) + +open Pa_extend + +EXTEND + symbol: FIRST + [ + [ x = UIDENT; q = QUOTATION -> + let (quotation, arg) = + let pos = String.index q ':' in + (String.sub q 0 pos, + String.sub q (pos + 1) (String.length q - pos - 1)) + in + debug_print (Printf.sprintf "QUOTATION = %s; ARG = %s" quotation arg); + if quotation = "unicode" then + let text = TXtok (loc, x, expand_unicode_macro arg) in + {used = []; text = text; styp = STlid (loc, "string")} + else + assert false + ] + ]; +END + diff --git a/helm/ocaml/cic_disambiguation/parser.ml b/helm/ocaml/cic_disambiguation/parser.ml new file mode 100644 index 000000000..0ee41ab2a --- /dev/null +++ b/helm/ocaml/cic_disambiguation/parser.ml @@ -0,0 +1,102 @@ + +open Ast + +let grammar = Grammar.gcreate Lexer.lex + +let term = Grammar.Entry.create grammar "term" +(* let tactic = Grammar.Entry.create grammar "tactic" *) +(* let tactical = Grammar.Entry.create grammar "tactical" *) + +let return_term loc term = LocatedTerm (loc, term) + +EXTEND + GLOBAL: term; + meta_subst: [ + [ s = SYMBOL "_" -> None + | t = term -> Some t ] + ]; + binder: [ + [ SYMBOL <:unicode> (* λ *) -> `Lambda + | SYMBOL <:unicode> (* π *) -> `Pi + | SYMBOL <:unicode> (* ∃ *) -> `Exists + | SYMBOL <:unicode> (* ∀ *) -> `Forall + ] + ]; + substituted_name: [ (* a subs.name is an explicit substitution subject *) + [ s = [ IDENT | SYMBOL ]; + subst = OPT [ + SYMBOL "\subst"; (* to avoid catching frequent "a [1]" cases *) + LPAREN "["; + substs = LIST1 [ + i = IDENT; SYMBOL <:unicode> (* ≔ *); t = term -> (i, t) + ] SEP SYMBOL ";"; + RPAREN "]" -> + substs + ] -> + (match subst with + | Some l -> Ident (s, l) + | None -> Ident (s, [])) + ] + ]; + name: [ (* as substituted_name with no explicit substitution *) + [ s = [ IDENT | SYMBOL ] -> s ] + ]; + pattern: [ + [ n = name -> [n] + | LPAREN "("; names = LIST1 name; RPAREN ")" -> names ] + ]; + term: + [ "add" LEFTA [ (* nothing here by default *) ] + | "mult" LEFTA [ (* nothing here by default *) ] + | "inv" NONA [ (* nothing here by default *) ] + | "simple" NONA + [ + b = binder; vars = LIST1 IDENT SEP SYMBOL ","; + typ = OPT [ SYMBOL ":"; t = term -> t ]; + SYMBOL "."; body = term -> + let binder = + List.fold_right (fun var body -> Binder (b, var, typ, body)) + vars body + in + return_term loc binder + | n = substituted_name -> return_term loc n + | LPAREN "("; head = term; args = LIST1 term; RPAREN ")" -> + return_term loc (Appl (head :: args)) + | i = INT -> return_term loc (Int (int_of_string i)) + | m = META; + substs = [ + LPAREN "["; substs = LIST0 meta_subst SEP SYMBOL ";" ; RPAREN "]" -> + substs + ] -> + return_term loc (Meta (m, substs)) + (* actually "in" and "and" are _not_ keywords. Parsing works anyway + * since applications are required to be bound by parens *) + | "let"; name = IDENT; SYMBOL <:unicode> (* ≝ *); t1 = term; + IDENT "in"; t2 = term -> + return_term loc (LetIn (name, t1, t2)) + | "let"; "rec"; defs = LIST1 [ + name = IDENT; + index = OPT [ LPAREN "("; index = INT; RPAREN ")" -> + int_of_string index + ]; + typ = OPT [ SYMBOL ":"; typ = term -> typ ]; + SYMBOL <:unicode> (* ≝ *); t1 = term -> + (name, t1, typ, (match index with None -> 1 | Some i -> i)) + ] SEP (IDENT "and"); + IDENT "in"; body = term -> + return_term loc (LetRec (defs, body)) + | typ = OPT [ LPAREN "["; typ = term; RPAREN "]" -> typ ]; + "match"; t = term; "with"; + LPAREN "["; + patterns = LIST0 [ + p = pattern; SYMBOL <:unicode> (* ⇒*); t = term -> (p, t) + ] SEP SYMBOL "|"; + RPAREN "]" -> + return_term loc (Case (t, typ, patterns)) + | LPAREN "("; t = term; RPAREN ")" -> return_term loc t + ] + ]; +END + +let parse_term = Grammar.Entry.parse term + diff --git a/helm/ocaml/cic_disambiguation/pp.ml b/helm/ocaml/cic_disambiguation/pp.ml new file mode 100644 index 000000000..6ee9cf39a --- /dev/null +++ b/helm/ocaml/cic_disambiguation/pp.ml @@ -0,0 +1,56 @@ + +open Ast +open Printf + +let pp_binder = function + | `Lambda -> "lambda" + | `Pi -> "pi" + | `Exists -> "exists" + | `Forall -> "forall" + +let rec pp_term = function + | LocatedTerm ((p_begin, p_end), term) -> + sprintf "[%d,%d]%s" p_begin p_end (pp_term term) + + | Appl terms -> sprintf "(%s)" (String.concat " " (List.map pp_term terms)) + | Binder (kind, var, typ, body) -> + sprintf "\\%s %s%s.%s" (pp_binder kind) var + (match typ with None -> "" | Some typ -> ": " ^ pp_term typ) + (pp_term body) + | Case (term, typ, patterns) -> + sprintf "%smatch %s with %s" + (match typ with None -> "" | Some t -> sprintf "<%s>" (pp_term t)) + (pp_term term) (pp_patterns patterns) + | LetIn (name, t1, t2) -> + sprintf "let %s = %s in %s" name (pp_term t1) (pp_term t2) + | LetRec (definitions, term) -> + sprintf "let rec %s in %s" + (String.concat " and " + (List.map + (fun (name, body, typ, index) -> + sprintf "%s%s = %s" name + (match typ with None -> "" | Some typ -> ": " ^ pp_term typ) + (pp_term body)) + definitions)) + (pp_term term) + | Ident (name, []) -> name + | Ident (name, substs) -> sprintf "%s[%s]" name (pp_substs substs) + | Meta (name, substs) -> + sprintf "%s[%s]" name + (String.concat "; " + (List.map (function None -> "_" | Some term -> pp_term term) substs)) + | Int num -> string_of_int num + +and pp_subst (name, term) = sprintf "%s := %s" name (pp_term term) +and pp_substs substs = String.concat "; " (List.map pp_subst substs) + +and pp_pattern (names, term) = + sprintf "%s -> %s" + (match names with + | [n] -> n + | names -> "(" ^ String.concat " " names ^ ")") + (pp_term term) + +and pp_patterns patterns = + sprintf "[%s]" (String.concat " | " (List.map pp_pattern patterns)) + diff --git a/helm/ocaml/cic_disambiguation/pp.mli b/helm/ocaml/cic_disambiguation/pp.mli new file mode 100644 index 000000000..078517c56 --- /dev/null +++ b/helm/ocaml/cic_disambiguation/pp.mli @@ -0,0 +1 @@ +val pp_term: Ast.term -> string diff --git a/helm/ocaml/cic_disambiguation/test_lexer.ml b/helm/ocaml/cic_disambiguation/test_lexer.ml new file mode 100644 index 000000000..8667953c3 --- /dev/null +++ b/helm/ocaml/cic_disambiguation/test_lexer.ml @@ -0,0 +1,11 @@ +let ic = open_in Sys.argv.(1) in +let token_stream = fst (Lexer.lex.Token.tok_func (Stream.of_channel ic)) in +let rec dump () = + let (a,b) = Stream.next token_stream in + if a = "EOI" then raise Stream.Failure; + print_endline (Printf.sprintf "%s '%s'" a b); + dump () +in +try + dump () +with Stream.Failure -> () diff --git a/helm/ocaml/cic_disambiguation/test_parser.ml b/helm/ocaml/cic_disambiguation/test_parser.ml new file mode 100644 index 000000000..41634cbb3 --- /dev/null +++ b/helm/ocaml/cic_disambiguation/test_parser.ml @@ -0,0 +1,13 @@ +try + let ic = open_in Sys.argv.(1) in + let term = Parser.parse_term (Stream.of_channel ic) in + close_in ic; + print_endline (Pp.pp_term term) +with Stdpp.Exc_located ((p_start, p_end), exn) -> + prerr_endline (Printf.sprintf "Exception at character %d-%d: %s" + p_start p_end (Printexc.to_string exn)) + +(* print_endline (Macro.expand "def") *) + +(* Printf.printf "'%s'\n" (Macro.expand Sys.argv.(1)) *) +