From: matitaweb Date: Thu, 1 Mar 2012 17:52:50 +0000 (+0000) Subject: commit by user andrea X-Git-Tag: make_still_working~1922 X-Git-Url: http://matita.cs.unibo.it/gitweb/?p=helm.git;a=commitdiff_plain;h=256ee9a806709bc4bede01cd80a62bbbfde908f3 commit by user andrea --- diff --git a/weblib/tutorial/chapter7.ma b/weblib/tutorial/chapter7.ma index ff1f95993..208db9e3f 100644 --- a/weblib/tutorial/chapter7.ma +++ b/weblib/tutorial/chapter7.ma @@ -1,12 +1,18 @@ -include "lang.ma". +(* We shall apply all the previous machinery to the study of regular languages +and the constructions of the associated finite automata. *) -inductive re (S: DeqSet) : Type[0] ≝ - z: re S - | e: re S - | s: S → re S - | c: re S → re S → re S - | o: re S → re S → re S - | k: re S → re S. +include "tutorial/chapter6.ma". + +(* The type re of regular expressions over an alphabet $S$ is the smallest +collection of objects generated by the following constructors: *) + +inductive re (S: a href="cic:/matita/tutorial/chapter4/DeqSet.ind(1,0,0)"DeqSet/a) : Type[0] ≝ + z: re S (* empty: ∅ *) + | e: re S (* epsilon: ϵ *) + | s: S → re S (* symbol: a *) + | c: re S → re S → re S (* concatenation: e1 · e2 *) + | o: re S → re S → re S (* plus: e1 + e2 *) + | k: re S → re S. (* kleene's star: e* *) interpretation "re epsilon" 'epsilon = (e ?). interpretation "re or" 'plus a b = (o ? a b). @@ -20,34 +26,70 @@ interpretation "atom" 'ps a = (s ? a). notation "`∅" non associative with precedence 90 for @{ 'empty }. interpretation "empty" 'empty = (z ?). -let rec in_l (S : DeqSet) (r : re S) on r : word S → Prop ≝ +(* The language $sem{e}$ associated with the regular expression e is inductively +defined by the following function: *) + +let rec in_l (S : a href="cic:/matita/tutorial/chapter4/DeqSet.ind(1,0,0)"DeqSet/a) (r : a href="cic:/matita/tutorial/chapter7/re.ind(1,0,1)"re/a S) on r : a href="cic:/matita/tutorial/chapter6/word.def(3)"word/a S → Prop ≝ match r with -[ z ⇒ ∅ -| e ⇒ {ϵ} -| s x ⇒ {[x]} -| c r1 r2 ⇒ (in_l ? r1) · (in_l ? r2) -| o r1 r2 ⇒ (in_l ? r1) ∪ (in_l ? r2) -| k r1 ⇒ (in_l ? r1) ^*]. +[ z ⇒ a title="empty set" href="cic:/fakeuri.def(1)"∅/a +| e ⇒ a title="singleton" href="cic:/fakeuri.def(1)"{/aa title="epsilon" href="cic:/fakeuri.def(1)"ϵ/a} +| s x ⇒ a title="singleton" href="cic:/fakeuri.def(1)"{/aa title="cons" href="cic:/fakeuri.def(1)"[/ax]} +| c r1 r2 ⇒ (in_l ? r1) a title="cat lang" href="cic:/fakeuri.def(1)"·/a (in_l ? r2) +| o r1 r2 ⇒ (in_l ? r1) a title="union" href="cic:/fakeuri.def(1)"∪/a (in_l ? r2) +| k r1 ⇒ (in_l ? r1) a title="star lang" href="cic:/fakeuri.def(1)"^/a*]. notation "\sem{term 19 E}" non associative with precedence 75 for @{'in_l $E}. interpretation "in_l" 'in_l E = (in_l ? E). interpretation "in_l mem" 'mem w l = (in_l ? l w). -lemma rsem_star : ∀S.∀r: re S. \sem{r^*} = \sem{r}^*. +lemma rsem_star : ∀S.∀r: a href="cic:/matita/tutorial/chapter7/re.ind(1,0,1)"re/a S. a title="in_l" href="cic:/fakeuri.def(1)"\sem/a{ra title="re star" href="cic:/fakeuri.def(1)"^/a*} a title="leibnitz's equality" href="cic:/fakeuri.def(1)"=/a a title="in_l" href="cic:/fakeuri.def(1)"\sem/a{r}a title="star lang" href="cic:/fakeuri.def(1)"^/a*. // qed. -(* pointed items *) -inductive pitem (S: DeqSet) : Type[0] ≝ - pz: pitem S - | pe: pitem S - | ps: S → pitem S - | pp: S → pitem S - | pc: pitem S → pitem S → pitem S - | po: pitem S → pitem S → pitem S - | pk: pitem S → pitem S. +(* We now introduce pointed regular expressions, that are the main tool we shall +use for the construction of the automaton. +A pointed regular expression is just a regular expression internally labelled +with some additional points. Intuitively, points mark the positions inside the +regular expression which have been reached after reading some prefix of +the input string, or better the positions where the processing of the remaining +string has to be started. Each pointed expression for $e$ represents a state of +the {\em deterministic} automaton associated with $e$; since we obviously have +only a finite number of possible labellings, the number of states of the automaton +is finite. + +Pointed regular expressions provide the tool for an algebraic revisitation of +McNaughton and Yamada's algorithm for position automata, making the proof of its +correctness, that is far from trivial, particularly clear and simple. In particular, +pointed expressions offer an appealing alternative to Brzozowski's derivatives, +avoiding their weakest point, namely the fact of being forced to quotient derivatives +w.r.t. a suitable notion of equivalence in order to get a finite number of states +(that is not essential for recognizing strings, but is crucial for comparing regular +expressions). + +Our main data structure is the notion of pointed item, that is meant whose purpose +is to encode a set of positions inside a regular expression. +The idea of formalizing pointers inside a data type by means of a labelled version +of the data type itself is probably one of the first, major lessons learned in the +formalization of the metatheory of programming languages. For our purposes, it is +enough to mark positions preceding individual characters, so we shall have two kinds +of characters •a (pp a) and a (ps a) according to the case a is pointed or not. *) + +inductive pitem (S: a href="cic:/matita/tutorial/chapter4/DeqSet.ind(1,0,0)"DeqSet/a) : Type[0] ≝ + pz: pitem S (* empty *) + | pe: pitem S (* epsilon *) + | ps: S → pitem S (* symbol *) + | pp: S → pitem S (* pointed sysmbol *) + | pc: pitem S → pitem S → pitem S (* concatenation *) + | po: pitem S → pitem S → pitem S (* plus *) + | pk: pitem S → pitem S. (* kleene's star *) -definition pre ≝ λS.pitem S × bool. +(* A pointed regular expression (pre) is just a pointed item with an additional +boolean, that must be understood as the possibility to have a trailing point at +the end of the expression. As we shall see, pointed regular expressions can be +understood as states of a DFA, and the boolean indicates if +the state is final or not. *) + +definition pre ≝ λS.a href="cic:/matita/tutorial/chapter7/pitem.ind(1,0,1)"pitem/a S a title="Product" href="cic:/fakeuri.def(1)"×/a a href="cic:/matita/basics/bool/bool.ind(1,0,0)"bool/a. interpretation "pitem star" 'star a = (pk ? a). interpretation "pitem or" 'plus a b = (po ? a b). @@ -59,6 +101,10 @@ interpretation "pitem ps" 'ps a = (ps ? a). interpretation "pitem epsilon" 'epsilon = (pe ?). interpretation "pitem empty" 'empty = (pz ?). +(* The carrier $|i|$ of an item i is the regular expression obtained from i by +removing all the points. Similarly, the carrier of a pointed regular expression +is the carrier of its item. *) + let rec forget (S: DeqSet) (l : pitem S) on l: re S ≝ match l with [ pz ⇒ `∅ @@ -82,7 +128,10 @@ lemma erase_plus : ∀S.∀i1,i2:pitem S. lemma erase_star : ∀S.∀i:pitem S.|i^*| = |i|^*. // qed. -(* boolean equality *) +(* Items and pres are a very concrete datatype: they can be effectively compared, +and enumerated. In particular, we can define a boolean equality beqitem and +\vebeqitem_true+ enriching the set \verb+(pitem S)+ +to a \verb+DeqSet+. boolean equality *) let rec beqitem S (i1,i2: pitem S) on i1 ≝ match i1 with [ pz ⇒ match i2 with [ pz ⇒ true | _ ⇒ false] @@ -521,5 +570,4 @@ theorem sem_ostar: ∀S.∀e:pre S. @eqP_trans [||@eqP_sym @epsilon_cat_l] @eqP_sym @star_fix_eps |>sem_pre_false >sem_pre_false >sem_star /2/ ] -qed. - +qed. \ No newline at end of file