X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=matita%2Fmatita%2Flib%2Ftutorial%2Fchapter7.ma;h=02afa4186920f6566a2951b0e5ffd03c10df746b;hb=9c0398174ebfa6b483dbdd5c10e8b15e39067329;hp=739afb52df7acd0ad2af9e964f98ce0fc4568daf;hpb=716338697e54d7a7e3602aabdecc2a8a639d683e;p=helm.git diff --git a/matita/matita/lib/tutorial/chapter7.ma b/matita/matita/lib/tutorial/chapter7.ma index 739afb52d..02afa4186 100644 --- a/matita/matita/lib/tutorial/chapter7.ma +++ b/matita/matita/lib/tutorial/chapter7.ma @@ -1,307 +1,199 @@ -(* -Regular Expressions - -We shall apply all the previous machinery to the study of regular languages -and the constructions of the associated finite automata. *) +(* +Formal Languages +In this chapter we shall apply our notion of DeqSet, and the list operations +defined in Chapter 4 to formal languages. A formal language is an arbitrary set +of words over a given alphabet, that we shall represent as a predicate over words. +*) include "tutorial/chapter6.ma". -(* The type re of regular expressions over an alphabet $S$ is the smallest -collection of objects generated by the following constructors: *) +(* A word (or string) over an alphabet S is just a list of elements of S.*) +definition word ≝ λS:DeqSet.list S. -inductive re (S: DeqSet) : Type[0] ≝ - z: re S (* empty: ∅ *) - | e: re S (* epsilon: ϵ *) - | s: S → re S (* symbol: a *) - | c: re S → re S → re S (* concatenation: e1 · e2 *) - | o: re S → re S → re S (* plus: e1 + e2 *) - | k: re S → re S. (* kleene's star: e* *) +(* For any alphabet there is only one word of length 0, the empty word, which is +denoted by ϵ .*) -interpretation "re epsilon" 'epsilon = (e ?). -interpretation "re or" 'plus a b = (o ? a b). -interpretation "re cat" 'middot a b = (c ? a b). -interpretation "re star" 'star a = (k ? a). +notation "ϵ" non associative with precedence 90 for @{ 'epsilon }. +interpretation "epsilon" 'epsilon = (nil ?). -notation < "a" non associative with precedence 90 for @{ 'ps $a}. -notation > "` term 90 a" non associative with precedence 90 for @{ 'ps $a}. -interpretation "atom" 'ps a = (s ? a). +(* The operation that consists in appending two words to form a new word, whose +length is the sum of the lengths of the original words is called concatenation. +String concatenation is just the append operation over lists, hence there is no +point to define it. Similarly, many of its properties, such as the fact that +concatenating a word with the empty word gives the original word, follow by +general results over lists. +*) -notation "`∅" non associative with precedence 90 for @{ 'empty }. -interpretation "empty" 'empty = (z ?). +(* +Operations over languages -(* The language sem{e} associated with the regular expression e is inductively -defined by the following function: *) +Languages inherit all the basic operations for sets, namely union, intersection, +complementation, substraction, and so on. In addition, we may define some new +operations induced by string concatenation, and in particular the concatenation +A · B of two languages A and B, the so called Kleene's star A* of A and the +derivative of a language A w.r.t. a given character a. *) -let rec in_l (S : DeqSet) (r : re S) on r : word S → Prop ≝ -match r with -[ z ⇒ ∅ -| e ⇒ {ϵ} -| s x ⇒ { (x::[]) } -| c r1 r2 ⇒ (in_l ? r1) · (in_l ? r2) -| o r1 r2 ⇒ (in_l ? r1) ∪ (in_l ? r2) -| k r1 ⇒ (in_l ? r1) ^*]. +definition cat : ∀S,l1,l2,w.Prop ≝ + λS.λl1,l2.λw:word S.∃w1,w2.w1 @ w2 = w ∧ l1 w1 ∧ l2 w2. +(* +notation "a · b" non associative with precedence 60 for @{ 'middot $a $b}. +*) +interpretation "cat lang" 'middot a b = (cat ? a b). -notation "\sem{term 19 E}" non associative with precedence 75 for @{'in_l $E}. -interpretation "in_l" 'in_l E = (in_l ? E). -interpretation "in_l mem" 'mem w l = (in_l ? l w). +(* Given a language l, the Kleene's star of l, denoted by l*, is the set of +finite-length strings that can be generated by concatenating arbitrary strings of +l. In other words, w belongs to l* is and only if there exists a list of strings +w1,w2,...wk all belonging to l, such that l = w1w2...wk. +We need to define the latter operations. The following flatten function takes in +input a list of words and concatenates them together. *) -lemma rsem_star : ∀S.∀r: re S. \sem{r^*} = \sem{r}^*. -// qed. +(* Already in the library +let rec flatten (S : DeqSet) (l : list (word S)) on l : word S ≝ +match l with [ nil ⇒ [ ] | cons w tl ⇒ w @ flatten ? tl ]. +*) +(* Given a list of words l and a language r, (conjunct l r) is true if and only +if all words in l are in r, that is for every w in l, r w holds. *) -(* -Pointed Regular expressions - -We now introduce pointed regular expressions, that are the main tool we shall -use for the construction of the automaton. -A pointed regular expression is just a regular expression internally labelled -with some additional points. Intuitively, points mark the positions inside the -regular expression which have been reached after reading some prefix of -the input string, or better the positions where the processing of the remaining -string has to be started. Each pointed expression for $e$ represents a state of -the {\em deterministic} automaton associated with $e$; since we obviously have -only a finite number of possible labellings, the number of states of the automaton -is finite. - -Pointed regular expressions provide the tool for an algebraic revisitation of -McNaughton and Yamada's algorithm for position automata, making the proof of its -correctness, that is far from trivial, particularly clear and simple. In particular, -pointed expressions offer an appealing alternative to Brzozowski's derivatives, -avoiding their weakest point, namely the fact of being forced to quotient derivatives -w.r.t. a suitable notion of equivalence in order to get a finite number of states -(that is not essential for recognizing strings, but is crucial for comparing regular -expressions). - -Our main data structure is the notion of pointed item, that is meant whose purpose -is to encode a set of positions inside a regular expression. -The idea of formalizing pointers inside a data type by means of a labelled version -of the data type itself is probably one of the first, major lessons learned in the -formalization of the metatheory of programming languages. For our purposes, it is -enough to mark positions preceding individual characters, so we shall have two kinds -of characters •a (pp a) and a (ps a) according to the case a is pointed or not. *) - -inductive pitem (S: DeqSet) : Type[0] ≝ - pz: pitem S (* empty *) - | pe: pitem S (* epsilon *) - | ps: S → pitem S (* symbol *) - | pp: S → pitem S (* pointed sysmbol *) - | pc: pitem S → pitem S → pitem S (* concatenation *) - | po: pitem S → pitem S → pitem S (* plus *) - | pk: pitem S → pitem S. (* kleene's star *) - -(* A pointed regular expression (pre) is just a pointed item with an additional -boolean, that must be understood as the possibility to have a trailing point at -the end of the expression. As we shall see, pointed regular expressions can be -understood as states of a DFA, and the boolean indicates if -the state is final or not. *) - -definition pre ≝ λS.pitem S × bool. - -interpretation "pitem star" 'star a = (pk ? a). -interpretation "pitem or" 'plus a b = (po ? a b). -interpretation "pitem cat" 'middot a b = (pc ? a b). -notation < ".a" non associative with precedence 90 for @{ 'pp $a}. -notation > "`. term 90 a" non associative with precedence 90 for @{ 'pp $a}. -interpretation "pitem pp" 'pp a = (pp ? a). -interpretation "pitem ps" 'ps a = (ps ? a). -interpretation "pitem epsilon" 'epsilon = (pe ?). -interpretation "pitem empty" 'empty = (pz ?). - -(* The carrier $|i|$ of an item i is the regular expression obtained from i by -removing all the points. Similarly, the carrier of a pointed regular expression -is the carrier of its item. *) - -let rec forget (S: DeqSet) (l : pitem S) on l: re S ≝ - match l with - [ pz ⇒ z ? (* `∅ *) - | pe ⇒ ϵ - | ps x ⇒ `x - | pp x ⇒ `x - | pc E1 E2 ⇒ (forget ? E1) · (forget ? E2) - | po E1 E2 ⇒ (forget ? E1) + (forget ? E2) - | pk E ⇒ (forget ? E)^* ]. - -interpretation "forget" 'card a = (forget ? a). - -lemma erase_dot : ∀S.∀e1,e2:pitem S. |e1 · e2| = c ? (|e1|) (|e2|). -// qed. - -lemma erase_plus : ∀S.∀i1,i2:pitem S. - |i1 + i2| = |i1| + |i2|. -// qed. - -lemma erase_star : ∀S.∀i:pitem S.|i^*| = |i|^*. -// qed. +let rec conjunct (S : DeqSet) (l : list (word S)) (r : word S → Prop) on l: Prop ≝ +match l with [ nil ⇒ True | cons w tl ⇒ r w ∧ conjunct ? tl r ]. -(* -Comparing items and pres - -Items and pres are very concrete datatypes: they can be effectively compared, -and enumerated. In particular, we can define a boolean equality beqitem and a proof -beqitem_true that it refects propositional equality, enriching the set (pitem S) -to a DeqSet. *) - -let rec beqitem S (i1,i2: pitem S) on i1 ≝ - match i1 with - [ pz ⇒ match i2 with [ pz ⇒ true | _ ⇒ false] - | pe ⇒ match i2 with [ pe ⇒ true | _ ⇒ false] - | ps y1 ⇒ match i2 with [ ps y2 ⇒ y1==y2 | _ ⇒ false] - | pp y1 ⇒ match i2 with [ pp y2 ⇒ y1==y2 | _ ⇒ false] - | po i11 i12 ⇒ match i2 with - [ po i21 i22 ⇒ beqitem S i11 i21 ∧ beqitem S i12 i22 - | _ ⇒ false] - | pc i11 i12 ⇒ match i2 with - [ pc i21 i22 ⇒ beqitem S i11 i21 ∧ beqitem S i12 i22 - | _ ⇒ false] - | pk i11 ⇒ match i2 with [ pk i21 ⇒ beqitem S i11 i21 | _ ⇒ false] - ]. - -lemma beqitem_true: ∀S,i1,i2. iff (beqitem S i1 i2 = true) (i1 = i2). -#S #i1 elim i1 - [#i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % // normalize #H destruct - |#i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % // normalize #H destruct - |#x #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % normalize #H destruct - [>(\P H) // | @(\b (refl …))] - |#x #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % normalize #H destruct - [>(\P H) // | @(\b (refl …))] - |#i11 #i12 #Hind1 #Hind2 #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % - normalize #H destruct - [cases (true_or_false (beqitem S i11 i21)) #H1 - [>(proj1 … (Hind1 i21) H1) >(proj1 … (Hind2 i22)) // >H1 in H; #H @H - |>H1 in H; normalize #abs @False_ind /2/ - ] - |>(proj2 … (Hind1 i21) (refl …)) >(proj2 … (Hind2 i22) (refl …)) // - ] - |#i11 #i12 #Hind1 #Hind2 #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % - normalize #H destruct - [cases (true_or_false (beqitem S i11 i21)) #H1 - [>(proj1 … (Hind1 i21) H1) >(proj1 … (Hind2 i22)) // >H1 in H; #H @H - |>H1 in H; normalize #abs @False_ind /2/ - ] - |>(proj2 … (Hind1 i21) (refl …)) >(proj2 … (Hind2 i22) (refl …)) // - ] - |#i3 #Hind #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i4] % - normalize #H destruct - [>(proj1 … (Hind i4) H) // |>(proj2 … (Hind i4) (refl …)) //] - ] -qed. +(* We are ready to give the formal definition of the Kleene's star of l: +a word w belongs to l* is and only if there exists a list of strings +lw such that (conjunct lw l) and l = flatten lw. *) + +definition flatten ≝ λA.foldr (list A) (list A) (append A) []. + +definition star ≝ λS.λl.λw:word S.∃lw.flatten ? lw = w ∧ conjunct ? lw l. -definition DeqItem ≝ λS. - mk_DeqSet (pitem S) (beqitem S) (beqitem_true S). +notation "a ^ *" non associative with precedence 90 for @{ 'star $a}. +interpretation "star lang" 'star l = (star ? l). -(* We also add a couple of unification hints to allow the type inference system -to look at (pitem S) as the carrier of a DeqSet, and at beqitem as if it was the -equality function of a DeqSet. *) +(* The derivative of a language A with respect to a character a is the set of +all strings w such that aw is in A. *) -unification hint 0 ≔ S; - X ≟ mk_DeqSet (pitem S) (beqitem S) (beqitem_true S) -(* ---------------------------------------- *) ⊢ - pitem S ≡ carr X. - -unification hint 0 ≔ S,i1,i2; - X ≟ mk_DeqSet (pitem S) (beqitem S) (beqitem_true S) -(* ---------------------------------------- *) ⊢ - beqitem S i1 i2 ≡ eqb X i1 i2. +definition deriv ≝ λS.λA:word S → Prop.λa,w. A (a::w). (* -Semantics of pointed regular expressions - -The intuitive semantic of a point is to mark the position where -we should start reading the regular expression. The language associated -to a pre is the union of the languages associated with its points. *) - -let rec in_pl (S : DeqSet) (r : pitem S) on r : word S → Prop ≝ -match r with -[ pz ⇒ ∅ -| pe ⇒ ∅ -| ps _ ⇒ ∅ -| pp x ⇒ { (x::[]) } -| pc r1 r2 ⇒ (in_pl ? r1) · \sem{forget ? r2} ∪ (in_pl ? r2) -| po r1 r2 ⇒ (in_pl ? r1) ∪ (in_pl ? r2) -| pk r1 ⇒ (in_pl ? r1) · \sem{forget ? r1}^* ]. - -interpretation "in_pl" 'in_l E = (in_pl ? E). -interpretation "in_pl mem" 'mem w l = (in_pl ? l w). - -definition in_prl ≝ λS : DeqSet.λp:pre S. - if (\snd p) then \sem{\fst p} ∪ {ϵ} else \sem{\fst p}. +Language equalities + +Equality between languages is just the usual extensional equality between +sets. The operation of concatenation behaves well with respect to this equality. *) + +lemma cat_ext_l: ∀S.∀A,B,C:word S →Prop. + A ≐ C → A · B ≐ C · B. +#S #A #B #C #H #w % * #w1 * #w2 * * #eqw #inw1 #inw2 +cases (H w1) /6/ +qed. + +lemma cat_ext_r: ∀S.∀A,B,C:word S →Prop. + B ≐ C → A · B ≐ A · C. +#S #A #B #C #H #w % * #w1 * #w2 * * #eqw #inw1 #inw2 +cases (H w2) /6/ +qed. -interpretation "in_prl mem" 'mem w l = (in_prl ? l w). -interpretation "in_prl" 'in_l E = (in_prl ? E). - -(* The following, trivial lemmas are only meant for rewriting purposes. *) - -lemma sem_pre_true : ∀S.∀i:pitem S. - \sem{〈i,true〉} = \sem{i} ∪ {ϵ}. -// qed. - -lemma sem_pre_false : ∀S.∀i:pitem S. - \sem{〈i,false〉} = \sem{i}. -// qed. - -lemma sem_cat: ∀S.∀i1,i2:pitem S. - \sem{i1 · i2} = \sem{i1} · \sem{|i2|} ∪ \sem{i2}. -// qed. - -lemma sem_cat_w: ∀S.∀i1,i2:pitem S.∀w. - \sem{i1 · i2} w = ((\sem{i1} · \sem{|i2|}) w ∨ \sem{i2} w). -// qed. - -lemma sem_plus: ∀S.∀i1,i2:pitem S. - \sem{i1 + i2} = \sem{i1} ∪ \sem{i2}. -// qed. - -lemma sem_plus_w: ∀S.∀i1,i2:pitem S.∀w. - \sem{i1 + i2} w = (\sem{i1} w ∨ \sem{i2} w). -// qed. - -lemma sem_star : ∀S.∀i:pitem S. - \sem{i^*} = \sem{i} · \sem{|i|}^*. -// qed. - -lemma sem_star_w : ∀S.∀i:pitem S.∀w. - \sem{i^*} w = (∃w1,w2.w1 @ w2 = w ∧ \sem{i} w1 ∧ \sem{|i|}^* w2). -// qed. - -(* Below are a few, simple, semantic properties of items. In particular: -- not_epsilon_item : ∀S:DeqSet.∀i:pitem S. ¬ (\sem{i} ϵ). -- epsilon_pre : ∀S.∀e:pre S. (\sem{i} ϵ) ↔ (\snd e = true). -- minus_eps_item: ∀S.∀i:pitem S. \sem{i} =1 \sem{i}-{[ ]}. -- minus_eps_pre: ∀S.∀e:pre S. \sem{\fst e} =1 \sem{e}-{[ ]}. -The first property is proved by a simple induction on $i$; the other -results are easy corollaries. We need an auxiliary lemma first. *) - -lemma append_eq_nil : ∀S.∀w1,w2:word S. w1 @ w2 = ϵ → w1 = ϵ. -#S #w1 #w2 cases w1 // #a #tl normalize #H destruct qed. - -lemma not_epsilon_lp : ∀S:DeqSet.∀e:pitem S. ¬ (ϵ ∈ e). -#S #e elim e normalize /2/ - [#r1 #r2 * #n1 #n2 % * /2/ * #w1 * #w2 * * #H - >(append_eq_nil …H…) /2/ - |#r1 #r2 #n1 #n2 % * /2/ - |#r #n % * #w1 * #w2 * * #H >(append_eq_nil …H…) /2/ +(* Concatenating a language with the empty language results in the +empty language. *) +lemma cat_empty_l: ∀S.∀A:word S→Prop. ∅ · A ≐ ∅. +#S #A #w % [|*] * #w1 * #w2 * * #_ * +qed. + +(* Concatenating a language l with the singleton language containing the +empty string, results in the language l; that is {ϵ} is a left and right +unit with respect to concatenation. *) + +lemma epsilon_cat_r: ∀S.∀A:word S →Prop. + A · {ϵ} ≐ A. +#S #A #w % + [* #w1 * #w2 * * #eqw #inw1 normalize #eqw2 btrue %2 // +lemma distr_cat_r_eps: ∀S.∀A,C:word S →Prop. + (A ∪ {ϵ}) · C ≐ A · C ∪ C. + #S #A #C @eqP_trans [|@distr_cat_r |@eqP_union_l @epsilon_cat_l] qed. -lemma minus_eps_item: ∀S.∀i:pitem S. \sem{i} =1 \sem{i}-{[ ]}. -#S #i #w % - [#H whd % // normalize @(not_to_not … (not_epsilon_lp …i)) // - |* // +(* The following is a major property of derivatives *) + +lemma deriv_middot: ∀S,A,B,a. ¬ A ϵ → deriv S (A·B) a ≐ (deriv S A a) · B. +#S #A #B #a #noteps #w normalize % + [* #w1 cases w1 + [* #w2 * * #_ #Aeps @False_ind /2/ + |#b #w2 * #w3 * * whd in ⊢ ((??%?)→?); #H destruct + #H #H1 @(ex_intro … w2) @(ex_intro … w3) % // % // + ] + |* #w1 * #w2 * * #H #H1 #H2 @(ex_intro … (a::w1)) + @(ex_intro … w2) % // % normalize // ] +qed. + +(* +Main Properties of Kleene's star + +We conclude this section with some important properties of Kleene's +star that will be used in the following chapters. *) + +lemma espilon_in_star: ∀S.∀A:word S → Prop. + A^* ϵ. +#S #A @(ex_intro … [ ]) normalize /2/ qed. -lemma minus_eps_pre: ∀S.∀e:pre S. \sem{\fst e} =1 \sem{e}-{[ ]}. -#S * #i * - [>sem_pre_true normalize in ⊢ (??%?); #w % - [/3/ | * * // #H1 #H2 @False_ind @(absurd …H1 H2)] - |>sem_pre_false normalize in ⊢ (??%?); #w % [ /3/ | * // ] +lemma cat_to_star:∀S.∀A:word S → Prop. + ∀w1,w2. A w1 → A^* w2 → A^* (w1@w2). +#S #A #w1 #w2 #Aw * #l * #H #H1 @(ex_intro … (w1::l)) +% normalize destruct /2/ +qed. + +lemma fix_star: ∀S.∀A:word S → Prop. + A^* ≐ A · A^* ∪ {ϵ}. +#S #A #w % + [* #l generalize in match w; -w cases l [normalize #w * /2/] + #w1 #tl #w * whd in ⊢ ((??%?)→?); #eqw whd in ⊢ (%→?); * + #w1A #cw1 %1 @(ex_intro … w1) @(ex_intro … (flatten S tl)) + % destruct /2/ whd @(ex_intro … tl) /2/ + |* [2: whd in ⊢ (%→?); #eqw