X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=matita%2Fmatita%2Flib%2Ftutorial%2Fchapter7.ma;fp=matita%2Fmatita%2Flib%2Ftutorial%2Fchapter7.ma;h=4b12b75fcf1c682779bebe3c8ecb2d7e560a5f6c;hb=770ba48ba232d7f1782629c572820a0f1bfe4fde;hp=0000000000000000000000000000000000000000;hpb=66d22e9bc8ecc624e93e3e142676045d511ed9b0;p=helm.git

diff --git a/matita/matita/lib/tutorial/chapter7.ma b/matita/matita/lib/tutorial/chapter7.ma
new file mode 100644
index 000000000..4b12b75fc
--- /dev/null
+++ b/matita/matita/lib/tutorial/chapter7.ma
@@ -0,0 +1,308 @@
+(* 
+Regular Expressions
+
+We shall apply all the previous machinery to the study of regular languages 
+and the constructions of the associated finite automata. *)
+
+include "tutorial/chapter6.ma".
+
+(* The type re of regular expressions over an alphabet $S$ is the smallest 
+collection of objects generated by the following constructors: *)
+
+inductive re (S: DeqSet) : Type[0] â
+   z: re S                (* empty: â *)
+ | e: re S                (* epsilon: Ïµ *)
+ | s: S â re S            (* symbol: a *)
+ | c: re S â re S â re S  (* concatenation: e1 Â· e2 *)
+ | o: re S â re S â re S  (* plus: e1 + e2 *)
+ | k: re S â re S.        (* kleene's star: e* *)
+
+interpretation "re epsilon" 'epsilon = (e ?).
+interpretation "re or" 'plus a b = (o ? a b).
+interpretation "re cat" 'middot a b = (c ? a b).
+interpretation "re star" 'star a = (k ? a).
+
+notation < "a" non associative with precedence 90 for @{ 'ps $a}.
+notation > "` term 90 a" non associative with precedence 90 for @{ 'ps $a}.
+interpretation "atom" 'ps a = (s ? a).
+
+notation "`â" non associative with precedence 90 for @{ 'empty }.
+interpretation "empty" 'empty = (z ?).
+
+(* The language sem{e} associated with the regular expression e is inductively 
+defined by the following function: *)
+
+let rec in_l (S : DeqSet) (r : re S) on r : word S â Prop â 
+match r with
+[ z â â
+| e â {Ïµ}
+| s x â { (x::[]) }
+| c r1 r2 â (in_l ? r1) Â· (in_l ? r2)
+| o r1 r2 â (in_l ? r1) âª (in_l ? r2)
+| k r1 â (in_l ? r1) ^*].
+
+notation "\sem{term 19 E}" non associative with precedence 75 for @{'in_l $E}.
+interpretation "in_l" 'in_l E = (in_l ? E).
+interpretation "in_l mem" 'mem w l = (in_l ? l w).
+
+lemma rsem_star : âS.âr: re S. \sem{r^*} = \sem{r}^*.
+// qed.
+
+
+(* 
+Pointed Regular expressions
+
+We now introduce pointed regular expressions, that are the main tool we shall 
+use for the construction of the automaton. 
+A pointed regular expression is just a regular expression internally labelled 
+with some additional points. Intuitively, points mark the positions inside the 
+regular expression which have been reached after reading some prefix of
+the input string, or better the positions where the processing of the remaining 
+string has to be started. Each pointed expression for $e$ represents a state of 
+the {\em deterministic} automaton associated with $e$; since we obviously have 
+only a finite number of possible labellings, the number of states of the automaton 
+is finite.
+
+Pointed regular expressions provide the tool for an algebraic revisitation of 
+McNaughton and Yamada's algorithm for position automata, making the proof of its 
+correctness, that is far from trivial, particularly clear and simple. In particular, 
+pointed expressions offer an appealing alternative to Brzozowski's derivatives, 
+avoiding their weakest point, namely the fact of being forced to quotient derivatives 
+w.r.t. a suitable notion of equivalence in order to get a finite number of states 
+(that is not essential for recognizing strings, but is crucial for comparing regular 
+expressions). 
+
+Our main data structure is the notion of pointed item, that is meant whose purpose
+is to encode a set of positions inside a regular expression. 
+The idea of formalizing pointers inside a data type by means of a labelled version 
+of the data type itself is probably one of the first, major lessons learned in the 
+formalization of the metatheory of programming languages. For our purposes, it is 
+enough to mark positions preceding individual characters, so we shall have two kinds 
+of characters â¢a (pp a) and a (ps a) according to the case a is pointed or not. *)
+
+inductive pitem (S: DeqSet) : Type[0] â
+   pz: pitem S                       (* empty *)
+ | pe: pitem S                       (* epsilon *)
+ | ps: S â pitem S                   (* symbol *)
+ | pp: S â pitem S                   (* pointed sysmbol *)
+ | pc: pitem S â pitem S â pitem S   (* concatenation *)
+ | po: pitem S â pitem S â pitem S   (* plus *)
+ | pk: pitem S â pitem S.            (* kleene's star *)
+ 
+(* A pointed regular expression (pre) is just a pointed item with an additional 
+boolean, that must be understood as the possibility to have a trailing point at 
+the end of the expression. As we shall see, pointed regular expressions can be 
+understood as states of a DFA, and the boolean indicates if
+the state is final or not. *)
+
+definition pre â Î»S.pitem S Ã bool.
+
+interpretation "pitem star" 'star a = (pk ? a).
+interpretation "pitem or" 'plus a b = (po ? a b).
+interpretation "pitem cat" 'middot a b = (pc ? a b).
+notation < ".a" non associative with precedence 90 for @{ 'pp $a}.
+notation > "`. term 90 a" non associative with precedence 90 for @{ 'pp $a}.
+interpretation "pitem pp" 'pp a = (pp ? a).
+interpretation "pitem ps" 'ps a = (ps ? a).
+interpretation "pitem epsilon" 'epsilon = (pe ?).
+interpretation "pitem empty" 'empty = (pz ?).
+
+(* The carrier $|i|$ of an item i is the regular expression obtained from i by 
+removing all the points. Similarly, the carrier of a pointed regular expression 
+is the carrier of its item. *)
+
+let rec forget (S: DeqSet) (l : pitem S) on l: re S â
+ match l with
+  [ pz â z ? (* `â *)
+  | pe â Ïµ
+  | ps x â `x
+  | pp x â `x
+  | pc E1 E2 â (forget ? E1) Â· (forget ? E2)
+  | po E1 E2 â (forget ? E1) + (forget ? E2)
+  | pk E â (forget ? E)^* ].
+ 
+(* notation < "|term 19 e|" non associative with precedence 70 for @{'forget $e}.*)
+interpretation "forget" 'norm a = (forget ? a).
+
+lemma erase_dot : âS.âe1,e2:pitem S. |e1 Â· e2| = c ? (|e1|) (|e2|).
+// qed.
+
+lemma erase_plus : âS.âi1,i2:pitem S.
+  |i1 + i2| = |i1| + |i2|.
+// qed.
+
+lemma erase_star : âS.âi:pitem S.|i^*| = |i|^*. 
+// qed.
+
+(* 
+Comparing items and pres
+
+Items and pres are very concrete datatypes: they can be effectively compared, 
+and enumerated. In particular, we can define a boolean equality beqitem and a proof
+beqitem_true that it refects propositional equality, enriching the set (pitem S)
+to a DeqSet. *)
+
+let rec beqitem S (i1,i2: pitem S) on i1 â 
+  match i1 with
+  [ pz â match i2 with [ pz â true | _ â false]
+  | pe â match i2 with [ pe â true | _ â false]
+  | ps y1 â match i2 with [ ps y2 â y1==y2 | _ â false]
+  | pp y1 â match i2 with [ pp y2 â y1==y2 | _ â false]
+  | po i11 i12 â match i2 with 
+    [ po i21 i22 â beqitem S i11 i21 â§ beqitem S i12 i22
+    | _ â false]
+  | pc i11 i12 â match i2 with 
+    [ pc i21 i22 â beqitem S i11 i21 â§ beqitem S i12 i22
+    | _ â false]
+  | pk i11 â match i2 with [ pk i21 â beqitem S i11 i21 | _ â false]
+  ].
+
+lemma beqitem_true: âS,i1,i2. iff (beqitem S i1 i2 = true) (i1 = i2). 
+#S #i1 elim i1
+  [#i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % // normalize #H destruct
+  |#i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % // normalize #H destruct
+  |#x #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % normalize #H destruct
+    [>(\P H) // | @(\b (refl â¦))]
+  |#x #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % normalize #H destruct
+    [>(\P H) // | @(\b (refl â¦))]
+  |#i11 #i12 #Hind1 #Hind2 #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] %
+   normalize #H destruct 
+    [cases (true_or_false (beqitem S i11 i21)) #H1
+      [>(proj1 â¦ (Hind1 i21) H1) >(proj1 â¦ (Hind2 i22)) // >H1 in H; #H @H
+      |>H1 in H; normalize #abs @False_ind /2/
+      ]
+    |>(proj2 â¦ (Hind1 i21) (refl â¦)) >(proj2 â¦ (Hind2 i22) (refl â¦)) //
+    ]
+  |#i11 #i12 #Hind1 #Hind2 #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] %
+   normalize #H destruct 
+    [cases (true_or_false (beqitem S i11 i21)) #H1
+      [>(proj1 â¦ (Hind1 i21) H1) >(proj1 â¦ (Hind2 i22)) // >H1 in H; #H @H
+      |>H1 in H; normalize #abs @False_ind /2/
+      ]
+    |>(proj2 â¦ (Hind1 i21) (refl â¦)) >(proj2 â¦ (Hind2 i22) (refl â¦)) //
+    ]
+  |#i3 #Hind #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i4] %
+   normalize #H destruct 
+    [>(proj1 â¦ (Hind i4) H) // |>(proj2 â¦ (Hind i4) (refl â¦)) //]
+  ]
+qed. 
+
+definition DeqItem â Î»S.
+  mk_DeqSet (pitem S) (beqitem S) (beqitem_true S).
+
+(* We also add a couple of unification hints to allow the type inference system 
+to look at (pitem S) as the carrier of a DeqSet, and at beqitem as if it was the 
+equality function of a DeqSet. *)
+
+unification hint  0 â S; 
+    X â mk_DeqSet (pitem S) (beqitem S) (beqitem_true S)
+(* ---------------------------------------- *) â¢ 
+    pitem S â¡ carr X.
+    
+unification hint  0 â S,i1,i2; 
+    X â mk_DeqSet (pitem S) (beqitem S) (beqitem_true S)
+(* ---------------------------------------- *) â¢ 
+    beqitem S i1 i2 â¡ eqb X i1 i2.
+
+(* 
+Semantics of pointed regular expressions
+
+The intuitive semantic of a point is to mark the position where
+we should start reading the regular expression. The language associated
+to a pre is the union of the languages associated with its points. *)
+
+let rec in_pl (S : DeqSet) (r : pitem S) on r : word S â Prop â 
+match r with
+[ pz â â
+| pe â â
+| ps _ â â
+| pp x â { (x::[]) }
+| pc r1 r2 â (in_pl ? r1) Â· \sem{forget ? r2} âª (in_pl ? r2)
+| po r1 r2 â (in_pl ? r1) âª (in_pl ? r2)
+| pk r1 â (in_pl ? r1) Â· \sem{forget ? r1}^*  ].
+
+interpretation "in_pl" 'in_l E = (in_pl ? E).
+interpretation "in_pl mem" 'mem w l = (in_pl ? l w).
+
+definition in_prl â Î»S : DeqSet.Î»p:pre S. 
+  if (\snd p) then \sem{\fst p} âª {Ïµ} else \sem{\fst p}.
+  
+interpretation "in_prl mem" 'mem w l = (in_prl ? l w).
+interpretation "in_prl" 'in_l E = (in_prl ? E).
+
+(* The following, trivial lemmas are only meant for rewriting purposes. *)
+
+lemma sem_pre_true : âS.âi:pitem S. 
+  \sem{â©i,trueâª} = \sem{i} âª {Ïµ}. 
+// qed.
+
+lemma sem_pre_false : âS.âi:pitem S. 
+  \sem{â©i,falseâª} = \sem{i}. 
+// qed.
+
+lemma sem_cat: âS.âi1,i2:pitem S. 
+  \sem{i1 Â· i2} = \sem{i1} Â· \sem{|i2|} âª \sem{i2}.
+// qed.
+
+lemma sem_cat_w: âS.âi1,i2:pitem S.âw.
+  \sem{i1 Â· i2} w = ((\sem{i1} Â· \sem{|i2|}) w â¨ \sem{i2} w).
+// qed.
+
+lemma sem_plus: âS.âi1,i2:pitem S. 
+  \sem{i1 + i2} = \sem{i1} âª \sem{i2}.
+// qed.
+
+lemma sem_plus_w: âS.âi1,i2:pitem S.âw. 
+  \sem{i1 + i2} w = (\sem{i1} w â¨ \sem{i2} w).
+// qed.
+
+lemma sem_star : âS.âi:pitem S.
+  \sem{i^*} = \sem{i} Â· \sem{|i|}^*.
+// qed.
+
+lemma sem_star_w : âS.âi:pitem S.âw.
+  \sem{i^*} w = (âw1,w2.w1 @ w2 = w â§ \sem{i} w1 â§ \sem{|i|}^* w2).
+// qed.
+
+(* Below are a few, simple, semantic properties of items. In particular:
+- not_epsilon_item : âS:DeqSet.âi:pitem S. Â¬ (\sem{i} Ïµ).
+- epsilon_pre : âS.âe:pre S. (\sem{i} Ïµ) â (\snd e = true).
+- minus_eps_item: âS.âi:pitem S. \sem{i} =1 \sem{i}-{[ ]}.
+- minus_eps_pre: âS.âe:pre S. \sem{\fst e} =1 \sem{e}-{[ ]}.
+The first property is proved by a simple induction on $i$; the other
+results are easy corollaries. We need an auxiliary lemma first. *)
+
+lemma append_eq_nil : âS.âw1,w2:word S. w1 @ w2 = Ïµ â w1 = Ïµ.
+#S #w1 #w2 cases w1 // #a #tl normalize #H destruct qed.
+
+lemma not_epsilon_lp : âS:DeqSet.âe:pitem S. Â¬ (Ïµ â e).
+#S #e elim e normalize /2/  
+  [#r1 #r2 * #n1 #n2 % * /2/ * #w1 * #w2 * * #H 
+   >(append_eq_nil â¦Hâ¦) /2/
+  |#r1 #r2 #n1 #n2 % * /2/
+  |#r #n % * #w1 * #w2 * * #H >(append_eq_nil â¦Hâ¦) /2/
+  ]
+qed.
+
+lemma epsilon_to_true : âS.âe:pre S. Ïµ â e â \snd e = true.
+#S * #i #b cases b // normalize #H @False_ind /2/ 
+qed.
+
+lemma true_to_epsilon : âS.âe:pre S. \snd e = true â Ïµ â e.
+#S * #i #b #btrue normalize in btrue; >btrue %2 // 
+qed.
+
+lemma minus_eps_item: âS.âi:pitem S. \sem{i} =1 \sem{i}-{[ ]}.
+#S #i #w % 
+  [#H whd % // normalize @(not_to_not â¦ (not_epsilon_lp â¦i)) //
+  |* //
+  ]
+qed.
+
+lemma minus_eps_pre: âS.âe:pre S. \sem{\fst e} =1 \sem{e}-{[ ]}.
+#S * #i * 
+  [>sem_pre_true normalize in â¢ (??%?); #w % 
+    [/3/ | * * // #H1 #H2 @False_ind @(absurd â¦H1 H2)]
+  |>sem_pre_false normalize in â¢ (??%?); #w % [ /3/ | * // ]
+  ]
+qed.
\ No newline at end of file