(**************************************************************************)
include "re/lang.ma".
+include "basics/core_notation/card_1.ma".
+
+(* The type re of regular expressions over an alphabet $S$ is the smallest
+collection of objects generated by the following constructors: *)
inductive re (S: DeqSet) : Type[0] ≝
z: re S
notation "`∅" non associative with precedence 90 for @{ 'empty }.
interpretation "empty" 'empty = (z ?).
+(* The language sem{e} associated with the regular expression e is inductively
+defined by the following function: *)
+
let rec in_l (S : DeqSet) (r : re S) on r : word S → Prop ≝
match r with
[ z ⇒ ∅
lemma rsem_star : ∀S.∀r: re S. \sem{r^*} = \sem{r}^*.
// qed.
+(*
+Pointed Regular expressions
+
+We now introduce pointed regular expressions, that are the main tool we shall
+use for the construction of the automaton.
+A pointed regular expression is just a regular expression internally labelled
+with some additional points. Intuitively, points mark the positions inside the
+regular expression which have been reached after reading some prefix of
+the input string, or better the positions where the processing of the remaining
+string has to be started. Each pointed expression for $e$ represents a state of
+the {\em deterministic} automaton associated with $e$; since we obviously have
+only a finite number of possible labellings, the number of states of the automaton
+is finite.
+
+Pointed regular expressions provide the tool for an algebraic revisitation of
+McNaughton and Yamada's algorithm for position automata, making the proof of its
+correctness, that is far from trivial, particularly clear and simple. In particular,
+pointed expressions offer an appealing alternative to Brzozowski's derivatives,
+avoiding their weakest point, namely the fact of being forced to quotient derivatives
+w.r.t. a suitable notion of equivalence in order to get a finite number of states
+(that is not essential for recognizing strings, but is crucial for comparing regular
+expressions).
+
+Our main data structure is the notion of pointed item, that is meant whose purpose
+is to encode a set of positions inside a regular expression.
+The idea of formalizing pointers inside a data type by means of a labelled version
+of the data type itself is probably one of the first, major lessons learned in the
+formalization of the metatheory of programming languages. For our purposes, it is
+enough to mark positions preceding individual characters, so we shall have two kinds
+of characters •a (pp a) and a (ps a) according to the case a is pointed or not. *)
-(* pointed items *)
inductive pitem (S: DeqSet) : Type[0] ≝
pz: pitem S
| pe: pitem S
| po: pitem S → pitem S → pitem S
| pk: pitem S → pitem S.
+(* A pointed regular expression (pre) is just a pointed item with an additional
+boolean, that must be understood as the possibility to have a trailing point at
+the end of the expression. As we shall see, pointed regular expressions can be
+understood as states of a DFA, and the boolean indicates if
+the state is final or not. *)
+
definition pre ≝ λS.pitem S × bool.
interpretation "pitem star" 'star a = (pk ? a).
interpretation "pitem epsilon" 'epsilon = (pe ?).
interpretation "pitem empty" 'empty = (pz ?).
+(* The carrier $|i|$ of an item i is the regular expression obtained from i by
+removing all the points. Similarly, the carrier of a pointed regular expression
+is the carrier of its item. *)
+
let rec forget (S: DeqSet) (l : pitem S) on l: re S ≝
match l with
[ pz ⇒ `∅
| pc E1 E2 ⇒ (forget ? E1) · (forget ? E2)
| po E1 E2 ⇒ (forget ? E1) + (forget ? E2)
| pk E ⇒ (forget ? E)^* ].
-
-(* notation < "|term 19 e|" non associative with precedence 70 for @{'forget $e}.*)
-interpretation "forget" 'norm a = (forget ? a).
+
+interpretation "forget" 'card a = (forget ? a).
+
+lemma erase_dot : ∀S.∀e1,e2:pitem S. |e1 · e2| = c ? (|e1|) (|e2|).
+// qed.
+
+lemma erase_plus : ∀S.∀i1,i2:pitem S.
+ |i1 + i2| = |i1| + |i2|.
+// qed.
+
+lemma erase_star : ∀S.∀i:pitem S.|i^*| = |i|^*.
+// qed.
+
+(*
+Comparing items and pres
+
+Items and pres are very concrete datatypes: they can be effectively compared,
+and enumerated. In particular, we can define a boolean equality beqitem and a proof
+beqitem_true that it refects propositional equality, enriching the set (pitem S)
+to a DeqSet. *)
+
+let rec beqitem S (i1,i2: pitem S) on i1 ≝
+ match i1 with
+ [ pz ⇒ match i2 with [ pz ⇒ true | _ ⇒ false]
+ | pe ⇒ match i2 with [ pe ⇒ true | _ ⇒ false]
+ | ps y1 ⇒ match i2 with [ ps y2 ⇒ y1==y2 | _ ⇒ false]
+ | pp y1 ⇒ match i2 with [ pp y2 ⇒ y1==y2 | _ ⇒ false]
+ | po i11 i12 ⇒ match i2 with
+ [ po i21 i22 ⇒ beqitem S i11 i21 ∧ beqitem S i12 i22
+ | _ ⇒ false]
+ | pc i11 i12 ⇒ match i2 with
+ [ pc i21 i22 ⇒ beqitem S i11 i21 ∧ beqitem S i12 i22
+ | _ ⇒ false]
+ | pk i11 ⇒ match i2 with [ pk i21 ⇒ beqitem S i11 i21 | _ ⇒ false]
+ ].
+
+lemma beqitem_true: ∀S,i1,i2. iff (beqitem S i1 i2 = true) (i1 = i2).
+#S #i1 elim i1
+ [#i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % // normalize #H destruct
+ |#i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % // normalize #H destruct
+ |#x #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % normalize #H destruct
+ [>(\P H) // | @(\b (refl …))]
+ |#x #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % normalize #H destruct
+ [>(\P H) // | @(\b (refl …))]
+ |#i11 #i12 #Hind1 #Hind2 #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] %
+ normalize #H destruct
+ [cases (true_or_false (beqitem S i11 i21)) #H1
+ [>(proj1 … (Hind1 i21) H1) >(proj1 … (Hind2 i22)) // >H1 in H; #H @H
+ |>H1 in H; normalize #abs @False_ind /2/
+ ]
+ |>(proj2 … (Hind1 i21) (refl …)) >(proj2 … (Hind2 i22) (refl …)) //
+ ]
+ |#i11 #i12 #Hind1 #Hind2 #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] %
+ normalize #H destruct
+ [cases (true_or_false (beqitem S i11 i21)) #H1
+ [>(proj1 … (Hind1 i21) H1) >(proj1 … (Hind2 i22)) // >H1 in H; #H @H
+ |>H1 in H; normalize #abs @False_ind /2/
+ ]
+ |>(proj2 … (Hind1 i21) (refl …)) >(proj2 … (Hind2 i22) (refl …)) //
+ ]
+ |#i3 #Hind #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i4] %
+ normalize #H destruct
+ [>(proj1 … (Hind i4) H) // |>(proj2 … (Hind i4) (refl …)) //]
+ ]
+qed.
+
+definition DeqItem ≝ λS.
+ mk_DeqSet (pitem S) (beqitem S) (beqitem_true S).
+
+(* We also add a couple of unification hints to allow the type inference system
+to look at (pitem S) as the carrier of a DeqSet, and at beqitem as if it was the
+equality function of a DeqSet. *)
+
+unification hint 0 ≔ S;
+ X ≟ mk_DeqSet (pitem S) (beqitem S) (beqitem_true S)
+(* ---------------------------------------- *) ⊢
+ pitem S ≡ carr X.
+
+unification hint 0 ≔ S,i1,i2;
+ X ≟ mk_DeqSet (pitem S) (beqitem S) (beqitem_true S)
+(* ---------------------------------------- *) ⊢
+ beqitem S i1 i2 ≡ eqb X i1 i2.
+
+(*
+Semantics of pointed regular expressions
+
+The intuitive semantic of a point is to mark the position where
+we should start reading the regular expression. The language associated
+to a pre is the union of the languages associated with its points. *)
let rec in_pl (S : DeqSet) (r : pitem S) on r : word S → Prop ≝
match r with
interpretation "in_prl mem" 'mem w l = (in_prl ? l w).
interpretation "in_prl" 'in_l E = (in_prl ? E).
+(* The following, trivial lemmas are only meant for rewriting purposes. *)
+
lemma sem_pre_true : ∀S.∀i:pitem S.
\sem{〈i,true〉} = \sem{i} ∪ {ϵ}.
// qed.
\sem{i^*} w = (∃w1,w2.w1 @ w2 = w ∧ \sem{i} w1 ∧ \sem{|i|}^* w2).
// qed.
+(* Below are a few, simple, semantic properties of items. In particular:
+- not_epsilon_item : ∀S:DeqSet.∀i:pitem S. ¬ (\sem{i} ϵ).
+- epsilon_pre : ∀S.∀e:pre S. (\sem{i} ϵ) ↔ (\snd e = true).
+- minus_eps_item: ∀S.∀i:pitem S. \sem{i} ≐ \sem{i}-{[ ]}.
+- minus_eps_pre: ∀S.∀e:pre S. \sem{\fst e} ≐ \sem{e}-{[ ]}.
+The first property is proved by a simple induction on $i$; the other
+results are easy corollaries. We need an auxiliary lemma first. *)
+
lemma append_eq_nil : ∀S.∀w1,w2:word S. w1 @ w2 = ϵ → w1 = ϵ.
#S #w1 #w2 cases w1 // #a #tl normalize #H destruct qed.
]
qed.
-(* lemma 12 *)
lemma epsilon_to_true : ∀S.∀e:pre S. ϵ ∈ e → \snd e = true.
#S * #i #b cases b // normalize #H @False_ind /2/
qed.
#S * #i #b #btrue normalize in btrue; >btrue %2 //
qed.
+lemma minus_eps_item: ∀S.∀i:pitem S. \sem{i} ≐ \sem{i}-{[ ]}.
+#S #i #w %
+ [#H whd % // normalize @(not_to_not … (not_epsilon_lp …i)) //
+ |* //
+ ]
+qed.
+
+lemma minus_eps_pre: ∀S.∀e:pre S. \sem{\fst e} ≐ \sem{e}-{[ ]}.
+#S * #i *
+ [>sem_pre_true normalize in ⊢ (??%?); #w %
+ [/3/ | * * // #H1 #H2 @False_ind @(absurd …H1 H2)]
+ |>sem_pre_false normalize in ⊢ (??%?); #w % [ /3/ | * // ]
+ ]
+qed.
+
+(*
+Broadcasting points
+
+Intuitively, a regular expression e must be understood as a pointed expression with a single
+point in front of it. Since however we only allow points before symbols, we must broadcast
+this initial point inside e traversing all nullable subexpressions, that essentially corresponds
+to the ϵ-closure operation on automata. We use the notation •(_) to denote such an operation;
+its definition is the expected one: let us start discussing an example.
+
+Example
+Let us broadcast a point inside (a + ϵ)(b*a + b)b. We start working in parallel on the
+first occurrence of a (where the point stops), and on ϵ that gets traversed. We have hence
+reached the end of a + ϵ and we must pursue broadcasting inside (b*a + b)b. Again, we work in
+parallel on the two additive subterms b^*a and b; the first point is allowed to both enter the
+star, and to traverse it, stopping in front of a; the second point just stops in front of b.
+No point reached that end of b^*a + b hence no further propagation is possible. In conclusion:
+ •((a + ϵ)(b^*a + b)b) = 〈(•a + ϵ)((•b)^*•a + •b)b, false〉
+*)
+
+(* Broadcasting a point inside an item generates a pre, since the point could possibly reach
+the end of the expression.
+Broadcasting inside a i1+i2 amounts to broadcast in parallel inside i1 and i2.
+If we define
+ 〈i1,b1〉 ⊕ 〈i2,b2〉 = 〈i1 + i2, b1 ∨ b2〉
+then, we just have •(i1+i2) = •(i1)⊕ •(i2).
+*)
+
definition lo ≝ λS:DeqSet.λa,b:pre S.〈\fst a + \fst b,\snd a ∨ \snd b〉.
-notation "a ⊕ b" left associative with precedence 60 for @{'oplus $a $b}.
+notation "a ⊕ b" left associative with precedence 65 for @{'oplus $a $b}.
interpretation "oplus" 'oplus a b = (lo ? a b).
lemma lo_def: ∀S.∀i1,i2:pitem S.∀b1,b2. 〈i1,b1〉⊕〈i2,b2〉=〈i1+i2,b1∨b2〉.
// qed.
+(*
+Concatenation is a bit more complex. In order to broadcast a point inside i1 · i2
+we should start broadcasting it inside i1 and then proceed into i2 if and only if a
+point reached the end of i1. This suggests to define •(i1 · i2) as •(i1) ▹ i2, where
+e ▹ i is a general operation of concatenation between a pre and an item, defined by
+cases on the boolean in e:
+
+ 〈i1,true〉 ▹ i2 = i1 ◃ •(i_2)
+ 〈i1,false〉 ▹ i2 = i1 · i2
+
+In turn, ◃ says how to concatenate an item with a pre, that is however extremely simple:
+
+ i1 ◃ 〈i1,b〉 = 〈i_1 · i2, b〉
+
+Let us come to the formalized definitions:
+*)
+
definition pre_concat_r ≝ λS:DeqSet.λi:pitem S.λe:pre S.
match e with [ mk_Prod i1 b ⇒ 〈i · i1, b〉].
-notation "i ◂ e" left associative with precedence 60 for @{'ltrif $i $e}.
-interpretation "pre_concat_r" 'ltrif i e = (pre_concat_r ? i e).
+notation "i ◃ e" left associative with precedence 65 for @{'lhd $i $e}.
+interpretation "pre_concat_r" 'lhd i e = (pre_concat_r ? i e).
+
+(* The behaviour of ◃ is summarized by the following, easy lemma: *)
lemma eq_to_ex_eq: ∀S.∀A,B:word S → Prop.
- A = B → A =1 B.
+ A = B → A ≐ B.
#S #A #B #H >H /2/ qed.
lemma sem_pre_concat_r : ∀S,i.∀e:pre S.
- \sem{i â\97\82 e} =1 \sem{i} · \sem{|\fst e|} ∪ \sem{e}.
+ \sem{i â\97\83 e} â\89\90 \sem{i} · \sem{|\fst e|} ∪ \sem{e}.
#S #i * #i1 #b1 cases b1 [2: @eq_to_ex_eq //]
>sem_pre_true >sem_cat >sem_pre_true /2/
qed.
-
-definition lc ≝ λS:DeqSet.λbcast:∀S:DeqSet.pitem S → pre S.λe1:pre S.λi2:pitem S.
+
+(* The definition of $•(-)$ (eclose) and ▹ (pre_concat_l) are mutually recursive.
+In this situation, a viable alternative that is usually simpler to reason about,
+is to abstract one of the two functions with respect to the other. In particular
+we abstract pre_concat_l with respect to an input bcast function from items to
+pres. *)
+
+definition pre_concat_l ≝ λS:DeqSet.λbcast:∀S:DeqSet.pitem S → pre S.λe1:pre S.λi2:pitem S.
match e1 with
[ mk_Prod i1 b1 ⇒ match b1 with
- [ true â\87\92 (i1 â\97\82 (bcast ? i2))
+ [ true â\87\92 (i1 â\97\83 (bcast ? i2))
| false ⇒ 〈i1 · i2,false〉
]
].
-
-definition lift ≝ λS.λf:pitem S →pre S.λe:pre S.
- match e with
- [ mk_Prod i b ⇒ 〈\fst (f i), \snd (f i) ∨ b〉].
-notation "a â\96¸ b" left associative with precedence 60 for @{'lc eclose $a $b}.
-interpretation "lc" 'lc op a b = (lc ? op a b).
+notation "a â\96¹ b" left associative with precedence 65 for @{'tril eclose $a $b}.
+interpretation "item-pre concat" 'tril op a b = (pre_concat_l ? op a b).
-definition lk ≝ λS:DeqSet.λbcast:∀S:DeqSet.∀E:pitem S.pre S.λe:pre S.
- match e with
- [ mk_Prod i1 b1 ⇒
- match b1 with
- [true ⇒ 〈(\fst (bcast ? i1))^*, true〉
- |false ⇒ 〈i1^*,false〉
- ]
- ].
-
-(* notation < "a \sup ⊛" non associative with precedence 90 for @{'lk $op $a}.*)
-interpretation "lk" 'lk op a = (lk ? op a).
-notation "a^⊛" non associative with precedence 90 for @{'lk eclose $a}.
+notation "•" non associative with precedence 65 for @{eclose ?}.
-notation "•" non associative with precedence 60 for @{eclose ?}.
+(* We are ready to give the formal definition of the broadcasting operation. *)
let rec eclose (S: DeqSet) (i: pitem S) on i : pre S ≝
match i with
| ps x ⇒ 〈 `.x, false〉
| pp x ⇒ 〈 `.x, false 〉
| po i1 i2 ⇒ •i1 ⊕ •i2
- | pc i1 i2 â\87\92 â\80¢i1 â\96¸ i2
+ | pc i1 i2 â\87\92 â\80¢i1 â\96¹ i2
| pk i ⇒ 〈(\fst (•i))^*,true〉].
-notation "• x" non associative with precedence 60 for @{'eclose $x}.
+notation "• x" non associative with precedence 65 for @{'eclose $x}.
interpretation "eclose" 'eclose x = (eclose ? x).
+(* Here are a few simple properties of ▹ and •(-) *)
+
lemma eclose_plus: ∀S:DeqSet.∀i1,i2:pitem S.
•(i1 + i2) = •i1 ⊕ •i2.
// qed.
lemma eclose_dot: ∀S:DeqSet.∀i1,i2:pitem S.
- â\80¢(i1 · i2) = â\80¢i1 â\96¸ i2.
+ â\80¢(i1 · i2) = â\80¢i1 â\96¹ i2.
// qed.
lemma eclose_star: ∀S:DeqSet.∀i:pitem S.
•i^* = 〈(\fst(•i))^*,true〉.
// qed.
-definition reclose ≝ λS. lift S (eclose S).
-interpretation "reclose" 'eclose x = (reclose ? x).
-
-(* theorem 16: 2 *)
-lemma sem_oplus: ∀S:DeqSet.∀e1,e2:pre S.
- \sem{e1 ⊕ e2} =1 \sem{e1} ∪ \sem{e2}.
-#S * #i1 #b1 * #i2 #b2 #w %
- [cases b1 cases b2 normalize /2/ * /3/ * /3/
- |cases b1 cases b2 normalize /2/ * /3/ * /3/
- ]
-qed.
-
lemma odot_true :
∀S.∀i1,i2:pitem S.
- â\8c©i1,trueâ\8cª â\96¸ i2 = i1 â\97\82 (•i2).
+ â\8c©i1,trueâ\8cª â\96¹ i2 = i1 â\97\83 (•i2).
// qed.
lemma odot_true_bis :
∀S.∀i1,i2:pitem S.
- â\8c©i1,trueâ\8cª â\96¸ i2 = 〈i1 · \fst (•i2), \snd (•i2)〉.
+ â\8c©i1,trueâ\8cª â\96¹ i2 = 〈i1 · \fst (•i2), \snd (•i2)〉.
#S #i1 #i2 normalize cases (•i2) // qed.
lemma odot_false:
∀S.∀i1,i2:pitem S.
- 〈i1,false〉 ▸ i2 = 〈i1 · i2, false〉.
-// qed.
-
-lemma LcatE : ∀S.∀e1,e2:pitem S.
- \sem{e1 · e2} = \sem{e1} · \sem{|e2|} ∪ \sem{e2}.
+ 〈i1,false〉 ▹ i2 = 〈i1 · i2, false〉.
// qed.
-lemma erase_dot : ∀S.∀e1,e2:pitem S. |e1 · e2| = c ? (|e1|) (|e2|).
-// qed.
+(* The definition of •(-) (eclose) can then be lifted from items to pres
+in the obvious way. *)
-lemma erase_plus : ∀S.∀i1,i2:pitem S.
- |i1 + i2| = |i1| + |i2|.
-// qed.
+definition lift ≝ λS.λf:pitem S →pre S.λe:pre S.
+ match e with
+ [ mk_Prod i b ⇒ 〈\fst (f i), \snd (f i) ∨ b〉].
+
+definition preclose ≝ λS. lift S (eclose S).
+interpretation "preclose" 'eclose x = (preclose ? x).
-lemma erase_star : ∀S.∀i:pitem S.|i^*| = |i|^*.
-// qed.
+(* Obviously, broadcasting does not change the carrier of the item,
+as it is easily proved by structural induction. *)
lemma erase_bull : ∀S.∀i:pitem S. |\fst (•i)| = |i|.
#S #i elim i //
| #i #IH >eclose_star >(erase_star … i) <IH cases (•i) //
]
qed.
-
-lemma sem_eclose_star: ∀S:DeqSet.∀i:pitem S.
- \sem{〈i^*,true〉} =1 \sem{〈i,false〉}·\sem{|i|}^* ∪ {ϵ}.
-/2/ qed.
-(* theorem 16: 1 → 3 *)
+(* We are now ready to state the main semantic properties of ⊕, ◃ and •(-):
+
+sem_oplus: \sem{e1 ⊕ e2} ≐ \sem{e1} ∪ \sem{e2}
+sem_pcl: \sem{e1 ▹ i2} ≐ \sem{e1} · \sem{|i2|} ∪ \sem{i2}
+sem_bullet \sem{•i} ≐ \sem{i} ∪ \sem{|i|}
+
+The proof of sem_oplus is straightforward. *)
+
+lemma sem_oplus: ∀S:DeqSet.∀e1,e2:pre S.
+ \sem{e1 ⊕ e2} ≐ \sem{e1} ∪ \sem{e2}.
+#S * #i1 #b1 * #i2 #b2 #w %
+ [cases b1 cases b2 normalize /2/ * /3/ * /3/
+ |cases b1 cases b2 normalize /2/ * /3/ * /3/
+ ]
+qed.
+
+(* For the others, we proceed as follow: we first prove the following
+auxiliary lemma, that assumes sem_bullet:
+
+sem_pcl_aux:
+ \sem{•i2} ≐ \sem{i2} ∪ \sem{|i2|} →
+ \sem{e1 ▹ i2} ≐ \sem{e1} · \sem{|i2|} ∪ \sem{i2}.
+
+Then, using the previous result, we prove sem_bullet by induction
+on i. Finally, sem_pcl_aux and sem_bullet give sem_pcl. *)
+
+lemma LcatE : ∀S.∀e1,e2:pitem S.
+ \sem{e1 · e2} = \sem{e1} · \sem{|e2|} ∪ \sem{e2}.
+// qed.
+
lemma odot_dot_aux : ∀S.∀e1:pre S.∀i2:pitem S.
- \sem{•i2} =1 \sem{i2} ∪ \sem{|i2|} →
- \sem{e1 â\96¸ i2} =1 \sem{e1} · \sem{|i2|} ∪ \sem{i2}.
+ \sem{•i2} ≐ \sem{i2} ∪ \sem{|i2|} →
+ \sem{e1 â\96¹ i2} â\89\90 \sem{e1} · \sem{|i2|} ∪ \sem{i2}.
#S * #i1 #b1 #i2 cases b1
[2:#th >odot_false >sem_pre_false >sem_pre_false >sem_cat /2/
|#H >odot_true >sem_pre_true @(eqP_trans … (sem_pre_concat_r …))
@eqP_trans [|@eqP_sym @union_assoc ] /3/
]
qed.
-
-lemma sem_fst: ∀S.∀e:pre S. \sem{\fst e} =1 \sem{e}-{[ ]}.
-#S * #i *
- [>sem_pre_true normalize in ⊢ (??%?); #w %
- [/3/ | * * // #H1 #H2 @False_ind @(absurd …H1 H2)]
- |>sem_pre_false normalize in ⊢ (??%?); #w % [ /3/ | * // ]
- ]
-qed.
-
-lemma item_eps: ∀S.∀i:pitem S. \sem{i} =1 \sem{i}-{[ ]}.
-#S #i #w %
- [#H whd % // normalize @(not_to_not … (not_epsilon_lp …i)) //
- |* //
- ]
-qed.
-lemma sem_fst_aux: ∀S.∀e:pre S.∀i:pitem S.∀A.
- \sem{e} =1 \sem{i} ∪ A → \sem{\fst e} =1 \sem{i} ∪ (A - {[ ]}).
+lemma minus_eps_pre_aux: ∀S.∀e:pre S.∀i:pitem S.∀A.
+ \sem{e} ≐ \sem{i} ∪ A → \sem{\fst e} ≐ \sem{i} ∪ (A - {[ ]}).
#S #e #i #A #seme
-@eqP_trans [|@sem_fst]
-@eqP_trans [||@eqP_union_r [|@eqP_sym @item_eps]]
+@eqP_trans [|@minus_eps_pre]
+@eqP_trans [||@eqP_union_r [|@eqP_sym @minus_eps_item]]
@eqP_trans [||@distribute_substract]
@eqP_substract_r //
qed.
-(* theorem 16: 1 *)
-theorem sem_bull: ∀S:DeqSet. ∀e:pitem S. \sem{•e} =1 \sem{e} ∪ \sem{|e|}.
+theorem sem_bull: ∀S:DeqSet. ∀i:pitem S. \sem{•i} ≐ \sem{i} ∪ \sem{|i|}.
#S #e elim e
[#w normalize % [/2/ | * //]
|/2/
|#x normalize #w % [ /2/ | * [@False_ind | //]]
|#x normalize #w % [ /2/ | * // ]
- |#i1 #i2 #IH1 #IH2 >eclose_dot
- @eqP_trans [|@odot_dot_aux //] >sem_cat
+ |#i1 #i2 #IH1 #IH2
+ (* lhs = \sem{•(i1 ·i2)} *)
+ >eclose_dot
+ (* lhs =\sem{•(i1) ▹ i2)} *)
+ @eqP_trans [|@odot_dot_aux //]
+ (* lhs = \sem{•(i1)·\sem{|i2|}∪\sem{i2} *)
@eqP_trans
[|@eqP_union_r
[|@eqP_trans [|@(cat_ext_l … IH1)] @distr_cat_r]]
+ (* lhs = \sem{i1}·\sem{|i2|}∪\sem{|i1|}·\sem{|i2|}∪\sem{i2} *)
@eqP_trans [|@union_assoc]
+ (* lhs = \sem{i1}·\sem{|i2|}∪(\sem{|i1|}·\sem{|i2|}∪\sem{i2}) *)
+ (* Now we work on the rhs that is
+ rhs = \sem{i1·i2} ∪ \sem{|i1·i2|} *)
+ >sem_cat
+ (* rhs = \sem{i1}·\sem{|i2|} ∪ \sem{i2} ∪ \sem{|i1·i2|} *)
@eqP_trans [||@eqP_sym @union_assoc]
- @eqP_union_l //
+ (* rhs = \sem{i1}·\sem{|i2|}∪ (\sem{i2} ∪ \sem{|i1·i2|}) *)
+ @eqP_union_l @union_comm
|#i1 #i2 #IH1 #IH2 >eclose_plus
@eqP_trans [|@sem_oplus] >sem_plus >erase_plus
@eqP_trans [|@(eqP_union_l … IH2)]
@eqP_trans [||@eqP_union_l [|@union_comm]]
@eqP_trans [||@union_assoc] /2/
|#i #H >sem_pre_true >sem_star >erase_bull >sem_star
- @eqP_trans [|@eqP_union_r [|@cat_ext_l [|@sem_fst_aux //]]]
+ @eqP_trans [|@eqP_union_r [|@cat_ext_l [|@minus_eps_pre_aux //]]]
@eqP_trans [|@eqP_union_r [|@distr_cat_r]]
@eqP_trans [|@union_assoc] @eqP_union_l >erase_star
@eqP_sym @star_fix_eps
]
qed.
+(*
+Blank item
+
+As a corollary of theorem sem_bullet, given a regular expression e, we can easily
+find an item with the same semantics of $e$: it is enough to get an item (blank e)
+having e as carrier and no point, and then broadcast a point in it. The semantics of
+(blank e) is obviously the empty language: from the point of view of the automaton,
+it corresponds with the pit state. *)
+
+let rec blank (S: DeqSet) (i: re S) on i :pitem S ≝
+ match i with
+ [ z ⇒ `∅
+ | e ⇒ ϵ
+ | s y ⇒ `y
+ | o e1 e2 ⇒ (blank S e1) + (blank S e2)
+ | c e1 e2 ⇒ (blank S e1) · (blank S e2)
+ | k e ⇒ (blank S e)^* ].
+
+lemma forget_blank: ∀S.∀e:re S.|blank S e| = e.
+#S #e elim e normalize //
+qed.
+
+lemma sem_blank: ∀S.∀e:re S.\sem{blank S e} ≐ ∅.
+#S #e elim e
+ [1,2:@eq_to_ex_eq //
+ |#s @eq_to_ex_eq //
+ |#e1 #e2 #Hind1 #Hind2 >sem_cat
+ @eqP_trans [||@(union_empty_r … ∅)]
+ @eqP_trans [|@eqP_union_l[|@Hind2]] @eqP_union_r
+ @eqP_trans [||@(cat_empty_l … ?)] @cat_ext_l @Hind1
+ |#e1 #e2 #Hind1 #Hind2 >sem_plus
+ @eqP_trans [||@(union_empty_r … ∅)]
+ @eqP_trans [|@eqP_union_l[|@Hind2]] @eqP_union_r @Hind1
+ |#e #Hind >sem_star
+ @eqP_trans [||@(cat_empty_l … ?)] @cat_ext_l @Hind
+ ]
+qed.
+
+theorem re_embedding: ∀S.∀e:re S.
+ \sem{•(blank S e)} ≐ \sem{e}.
+#S #e @eqP_trans [|@sem_bull] >forget_blank
+@eqP_trans [|@eqP_union_r [|@sem_blank]]
+@eqP_trans [|@union_comm] @union_empty_r.
+qed.
+
+(*
+Lifted Operators
+
+Plus and bullet have been already lifted from items to pres. We can now
+do a similar job for concatenation ⊙ and Kleene's star ⊛. *)
+
definition lifted_cat ≝ λS:DeqSet.λe:pre S.
- lift S (lc S eclose e).
+ lift S (pre_concat_l S eclose e).
notation "e1 ⊙ e2" left associative with precedence 70 for @{'odot $e1 $e2}.
#S * #i1 * * #i2 #b2 // >odot_true_b //
qed.
+(* Let us come to the star operation: *)
+
+definition lk ≝ λS:DeqSet.λe:pre S.
+ match e with
+ [ mk_Prod i1 b1 ⇒
+ match b1 with
+ [true ⇒ 〈(\fst (eclose ? i1))^*, true〉
+ |false ⇒ 〈i1^*,false〉
+ ]
+ ].
+
+(* notation < "a \sup ⊛" non associative with precedence 90 for @{'lk $a}.*)
+interpretation "lk" 'lk a = (lk ? a).
+notation "a^⊛" non associative with precedence 90 for @{'lk $a}.
+
+
lemma ostar_true: ∀S.∀i:pitem S.
〈i,true〉^⊛ = 〈(\fst (•i))^*, true〉.
// qed.
#S * #i * // qed.
lemma sem_odot_true: ∀S:DeqSet.∀e1:pre S.∀i.
- \sem{e1 ⊙ 〈i,true〉} =1 \sem{e1 ▸ i} ∪ { [ ] }.
+ \sem{e1 ⊙ 〈i,true〉} ≐ \sem{e1 ▹ i} ∪ { [ ] }.
#S #e1 #i
-cut (e1 â\8a\99 â\8c©i,trueâ\8cª = â\8c©\fst (e1 â\96¸ i), \snd(e1 â\96¸ i) ∨ true〉) [//]
-#H >H cases (e1 â\96¸ i) #i1 #b1 cases b1
+cut (e1 â\8a\99 â\8c©i,trueâ\8cª = â\8c©\fst (e1 â\96¹ i), \snd(e1 â\96¹ i) ∨ true〉) [//]
+#H >H cases (e1 â\96¹ i) #i1 #b1 cases b1
[>sem_pre_true @eqP_trans [||@eqP_sym @union_assoc]
@eqP_union_l /2/
|/2/
qed.
lemma eq_odot_false: ∀S:DeqSet.∀e1:pre S.∀i.
- e1 â\8a\99 â\8c©i,falseâ\8cª = e1 â\96¸ i.
+ e1 â\8a\99 â\8c©i,falseâ\8cª = e1 â\96¹ i.
#S #e1 #i
-cut (e1 â\8a\99 â\8c©i,falseâ\8cª = â\8c©\fst (e1 â\96¸ i), \snd(e1 â\96¸ i) ∨ false〉) [//]
-cases (e1 â\96¸ i) #i1 #b1 cases b1 #H @H
+cut (e1 â\8a\99 â\8c©i,falseâ\8cª = â\8c©\fst (e1 â\96¹ i), \snd(e1 â\96¹ i) ∨ false〉) [//]
+cases (e1 â\96¹ i) #i1 #b1 cases b1 #H @H
qed.
+(* We conclude this section with the proof of the main semantic properties
+of ⊙ and ⊛. *)
+
lemma sem_odot:
- ∀S.∀e1,e2: pre S. \sem{e1 ⊙ e2} =1 \sem{e1}· \sem{|\fst e2|} ∪ \sem{e2}.
+ ∀S.∀e1,e2: pre S. \sem{e1 ⊙ e2} ≐ \sem{e1}· \sem{|\fst e2|} ∪ \sem{e2}.
#S #e1 * #i2 *
[>sem_pre_true
@eqP_trans [|@sem_odot_true]
|>sem_pre_false >eq_odot_false @odot_dot_aux //
]
qed.
-
-(* theorem 16: 4 *)
+
theorem sem_ostar: ∀S.∀e:pre S.
- \sem{e^⊛} =1 \sem{e} · \sem{|\fst e|}^*.
+ \sem{e^⊛} ≐ \sem{e} · \sem{|\fst e|}^*.
#S * #i #b cases b
- [>sem_pre_true >sem_pre_true >sem_star >erase_bull
- @eqP_trans [|@eqP_union_r[|@cat_ext_l [|@sem_fst_aux //]]]
+ [(* lhs = \sem{〈i,true〉^⊛} *)
+ >sem_pre_true (* >sem_pre_true *)
+ (* lhs = \sem{(\fst (•i))^*}∪{ϵ} *)
+ >sem_star >erase_bull
+ (* lhs = \sem{\fst (•i)}·(\sem{|i|)^*∪{ϵ} *)
+ @eqP_trans [|@eqP_union_r[|@cat_ext_l [|@minus_eps_pre_aux //]]]
+ (* lhs = (\sem{i}∪(\sem{|i|}-{ϵ})·(\sem{|i|)^*∪{ϵ} *)
@eqP_trans [|@eqP_union_r [|@distr_cat_r]]
+ (* lhs = (\sem{i}·(\sem{|i|)^*∪(\sem{|i|}-{ϵ})·(\sem{|i|)^*∪{ϵ} *)
+ @eqP_trans [|@union_assoc]
+ (* lhs = (\sem{i}·(\sem{|i|)^*∪((\sem{|i|}-{ϵ})·(\sem{|i|)^*∪{ϵ}) *)
+ @eqP_trans [|@eqP_union_l[|@eqP_sym @star_fix_eps]]
+ (* lhs = (\sem{i}·(\sem{|i|)^*∪(\sem{|i|)^* *)
+ (* now we work on the right hand side, that is
+ rhs = \sem{〈i,true〉}·(\sem{|i|}^* *)
@eqP_trans [||@eqP_sym @distr_cat_r]
- @eqP_trans [|@union_assoc] @eqP_union_l
- @eqP_trans [||@eqP_sym @epsilon_cat_l] @eqP_sym @star_fix_eps
+ (* rhs = (\sem{i}·(\sem{|i|)^*∪{ϵ}·(\sem{|i|)^* *)
+ @eqP_union_l @eqP_sym @epsilon_cat_l
|>sem_pre_false >sem_pre_false >sem_star /2/
]
qed.
-
+