-include "lang.ma".
+(* We shall apply all the previous machinery to the study of regular languages
+and the constructions of the associated finite automata. *)
-inductive re (S: DeqSet) : Type[0] ≝
- z: re S
- | e: re S
- | s: S → re S
- | c: re S → re S → re S
- | o: re S → re S → re S
- | k: re S → re S.
+include "tutorial/chapter6.ma".
+
+(* The type re of regular expressions over an alphabet $S$ is the smallest
+collection of objects generated by the following constructors: *)
+
+inductive re (S: \ 5a href="cic:/matita/tutorial/chapter4/DeqSet.ind(1,0,0)"\ 6DeqSet\ 5/a\ 6) : Type[0] ≝
+ z: re S (* empty: ∅ *)
+ | e: re S (* epsilon: ϵ *)
+ | s: S → re S (* symbol: a *)
+ | c: re S → re S → re S (* concatenation: e1 · e2 *)
+ | o: re S → re S → re S (* plus: e1 + e2 *)
+ | k: re S → re S. (* kleene's star: e* *)
interpretation "re epsilon" 'epsilon = (e ?).
interpretation "re or" 'plus a b = (o ? a b).
notation "`∅" non associative with precedence 90 for @{ 'empty }.
interpretation "empty" 'empty = (z ?).
-let rec in_l (S : DeqSet) (r : re S) on r : word S → Prop ≝
+(* The language $sem{e}$ associated with the regular expression e is inductively
+defined by the following function: *)
+
+let rec in_l (S : \ 5a href="cic:/matita/tutorial/chapter4/DeqSet.ind(1,0,0)"\ 6DeqSet\ 5/a\ 6) (r : \ 5a href="cic:/matita/tutorial/chapter7/re.ind(1,0,1)"\ 6re\ 5/a\ 6 S) on r : \ 5a href="cic:/matita/tutorial/chapter6/word.def(3)"\ 6word\ 5/a\ 6 S → Prop ≝
match r with
-[ z ⇒ ∅
-| e ⇒ {ϵ}
-| s x ⇒ {[x]}
-| c r1 r2 ⇒ (in_l ? r1) · (in_l ? r2)
-| o r1 r2 ⇒ (in_l ? r1) ∪ (in_l ? r2)
-| k r1 ⇒ (in_l ? r1) ^*].
+[ z ⇒ \ 5a title="empty set" href="cic:/fakeuri.def(1)"\ 6∅\ 5/a\ 6
+| e ⇒ \ 5a title="singleton" href="cic:/fakeuri.def(1)"\ 6{\ 5/a\ 6\ 5a title="epsilon" href="cic:/fakeuri.def(1)"\ 6ϵ\ 5/a\ 6}
+| s x ⇒ \ 5a title="singleton" href="cic:/fakeuri.def(1)"\ 6{\ 5/a\ 6\ 5a title="cons" href="cic:/fakeuri.def(1)"\ 6[\ 5/a\ 6x]}
+| c r1 r2 ⇒ (in_l ? r1) \ 5a title="cat lang" href="cic:/fakeuri.def(1)"\ 6·\ 5/a\ 6 (in_l ? r2)
+| o r1 r2 ⇒ (in_l ? r1) \ 5a title="union" href="cic:/fakeuri.def(1)"\ 6∪\ 5/a\ 6 (in_l ? r2)
+| k r1 ⇒ (in_l ? r1) \ 5a title="star lang" href="cic:/fakeuri.def(1)"\ 6^\ 5/a\ 6*].
notation "\sem{term 19 E}" non associative with precedence 75 for @{'in_l $E}.
interpretation "in_l" 'in_l E = (in_l ? E).
interpretation "in_l mem" 'mem w l = (in_l ? l w).
-lemma rsem_star : ∀S.∀r: re S. \sem{r^*} = \sem{r}^*.
+lemma rsem_star : ∀S.∀r: \ 5a href="cic:/matita/tutorial/chapter7/re.ind(1,0,1)"\ 6re\ 5/a\ 6 S. \ 5a title="in_l" href="cic:/fakeuri.def(1)"\ 6\sem\ 5/a\ 6{r\ 5a title="re star" href="cic:/fakeuri.def(1)"\ 6^\ 5/a\ 6*} \ 5a title="leibnitz's equality" href="cic:/fakeuri.def(1)"\ 6=\ 5/a\ 6 \ 5a title="in_l" href="cic:/fakeuri.def(1)"\ 6\sem\ 5/a\ 6{r}\ 5a title="star lang" href="cic:/fakeuri.def(1)"\ 6^\ 5/a\ 6*.
// qed.
-(* pointed items *)
-inductive pitem (S: DeqSet) : Type[0] ≝
- pz: pitem S
- | pe: pitem S
- | ps: S → pitem S
- | pp: S → pitem S
- | pc: pitem S → pitem S → pitem S
- | po: pitem S → pitem S → pitem S
- | pk: pitem S → pitem S.
+(* We now introduce pointed regular expressions, that are the main tool we shall
+use for the construction of the automaton.
+A pointed regular expression is just a regular expression internally labelled
+with some additional points. Intuitively, points mark the positions inside the
+regular expression which have been reached after reading some prefix of
+the input string, or better the positions where the processing of the remaining
+string has to be started. Each pointed expression for $e$ represents a state of
+the {\em deterministic} automaton associated with $e$; since we obviously have
+only a finite number of possible labellings, the number of states of the automaton
+is finite.
+
+Pointed regular expressions provide the tool for an algebraic revisitation of
+McNaughton and Yamada's algorithm for position automata, making the proof of its
+correctness, that is far from trivial, particularly clear and simple. In particular,
+pointed expressions offer an appealing alternative to Brzozowski's derivatives,
+avoiding their weakest point, namely the fact of being forced to quotient derivatives
+w.r.t. a suitable notion of equivalence in order to get a finite number of states
+(that is not essential for recognizing strings, but is crucial for comparing regular
+expressions).
+
+Our main data structure is the notion of pointed item, that is meant whose purpose
+is to encode a set of positions inside a regular expression.
+The idea of formalizing pointers inside a data type by means of a labelled version
+of the data type itself is probably one of the first, major lessons learned in the
+formalization of the metatheory of programming languages. For our purposes, it is
+enough to mark positions preceding individual characters, so we shall have two kinds
+of characters •a (pp a) and a (ps a) according to the case a is pointed or not. *)
+
+inductive pitem (S: \ 5a href="cic:/matita/tutorial/chapter4/DeqSet.ind(1,0,0)"\ 6DeqSet\ 5/a\ 6) : Type[0] ≝
+ pz: pitem S (* empty *)
+ | pe: pitem S (* epsilon *)
+ | ps: S → pitem S (* symbol *)
+ | pp: S → pitem S (* pointed sysmbol *)
+ | pc: pitem S → pitem S → pitem S (* concatenation *)
+ | po: pitem S → pitem S → pitem S (* plus *)
+ | pk: pitem S → pitem S. (* kleene's star *)
-definition pre ≝ λS.pitem S × bool.
+(* A pointed regular expression (pre) is just a pointed item with an additional
+boolean, that must be understood as the possibility to have a trailing point at
+the end of the expression. As we shall see, pointed regular expressions can be
+understood as states of a DFA, and the boolean indicates if
+the state is final or not. *)
+
+definition pre ≝ λS.\ 5a href="cic:/matita/tutorial/chapter7/pitem.ind(1,0,1)"\ 6pitem\ 5/a\ 6 S \ 5a title="Product" href="cic:/fakeuri.def(1)"\ 6×\ 5/a\ 6 \ 5a href="cic:/matita/basics/bool/bool.ind(1,0,0)"\ 6bool\ 5/a\ 6.
interpretation "pitem star" 'star a = (pk ? a).
interpretation "pitem or" 'plus a b = (po ? a b).
interpretation "pitem epsilon" 'epsilon = (pe ?).
interpretation "pitem empty" 'empty = (pz ?).
+(* The carrier $|i|$ of an item i is the regular expression obtained from i by
+removing all the points. Similarly, the carrier of a pointed regular expression
+is the carrier of its item. *)
+
let rec forget (S: DeqSet) (l : pitem S) on l: re S ≝
match l with
[ pz ⇒ `∅
lemma erase_star : ∀S.∀i:pitem S.|i^*| = |i|^*.
// qed.
-(* boolean equality *)
+(* Items and pres are a very concrete datatype: they can be effectively compared,
+and enumerated. In particular, we can define a boolean equality beqitem and
+\vebeqitem_true+ enriching the set \verb+(pitem S)+
+to a \verb+DeqSet+. boolean equality *)
let rec beqitem S (i1,i2: pitem S) on i1 ≝
match i1 with
[ pz ⇒ match i2 with [ pz ⇒ true | _ ⇒ false]
@eqP_trans [||@eqP_sym @epsilon_cat_l] @eqP_sym @star_fix_eps
|>sem_pre_false >sem_pre_false >sem_star /2/
]
-qed.
-
+qed.
\ No newline at end of file