matita/matita/lib/re/re.ma

   1 (**************************************************************************)
   2 (*       ___                                                              *)
   3 (*      ||M||                                                             *)
   4 (*      ||A||       A project by Andrea Asperti                           *)
   5 (*      ||T||                                                             *)
   6 (*      ||I||       Developers:                                           *)
   7 (*      ||T||         The HELM team.                                      *)
   8 (*      ||A||         http://helm.cs.unibo.it                             *)
   9 (*      \   /                                                             *)
  10 (*       \ /        This file is distributed under the terms of the       *)
  11 (*        v         GNU General Public License Version 2                  *)
  12 (*                                                                        *)
  13 (**************************************************************************)
  14
  15 include "re/lang.ma".
  16
  17 (* The type re of regular expressions over an alphabet $S$ is the smallest
  18 collection of objects generated by the following constructors: *)
  19
  20 inductive re (S: DeqSet) : Type[0] ≝
  21    z: re S
  22  | e: re S
  23  | s: S → re S
  24  | c: re S → re S → re S
  25  | o: re S → re S → re S
  26  | k: re S → re S.
  27
  28 interpretation "re epsilon" 'epsilon = (e ?).
  29 interpretation "re or" 'plus a b = (o ? a b).
  30 interpretation "re cat" 'middot a b = (c ? a b).
  31 interpretation "re star" 'star a = (k ? a).
  32
  33 notation < "a" non associative with precedence 90 for @{ 'ps $a}.
  34 notation > "` term 90 a" non associative with precedence 90 for @{ 'ps $a}.
  35 interpretation "atom" 'ps a = (s ? a).
  36
  37 notation "`∅" non associative with precedence 90 for @{ 'empty }.
  38 interpretation "empty" 'empty = (z ?).
  39
  40 (* The language sem{e} associated with the regular expression e is inductively
  41 defined by the following function: *)
  42
  43 let rec in_l (S : DeqSet) (r : re S) on r : word S → Prop ≝
  44 match r with
  45 [ z ⇒ ∅
  46 | e ⇒ {ϵ}
  47 | s x ⇒ {[x]}
  48 | c r1 r2 ⇒ (in_l ? r1) · (in_l ? r2)
  49 | o r1 r2 ⇒ (in_l ? r1) ∪ (in_l ? r2)
  50 | k r1 ⇒ (in_l ? r1) ^*].
  51
  52 notation "\sem{term 19 E}" non associative with precedence 75 for @{'in_l $E}.
  53 interpretation "in_l" 'in_l E = (in_l ? E).
  54 interpretation "in_l mem" 'mem w l = (in_l ? l w).
  55
  56 lemma rsem_star : ∀S.∀r: re S. \sem{r^*} = \sem{r}^*.
  57 // qed.
  58
  59 (*
  60 Pointed Regular expressions
  61
  62 We now introduce pointed regular expressions, that are the main tool we shall
  63 use for the construction of the automaton.
  64 A pointed regular expression is just a regular expression internally labelled
  65 with some additional points. Intuitively, points mark the positions inside the
  66 regular expression which have been reached after reading some prefix of
  67 the input string, or better the positions where the processing of the remaining
  68 string has to be started. Each pointed expression for $e$ represents a state of
  69 the {\em deterministic} automaton associated with $e$; since we obviously have
  70 only a finite number of possible labellings, the number of states of the automaton
  71 is finite.
  72
  73 Pointed regular expressions provide the tool for an algebraic revisitation of
  74 McNaughton and Yamada's algorithm for position automata, making the proof of its
  75 correctness, that is far from trivial, particularly clear and simple. In particular,
  76 pointed expressions offer an appealing alternative to Brzozowski's derivatives,
  77 avoiding their weakest point, namely the fact of being forced to quotient derivatives
  78 w.r.t. a suitable notion of equivalence in order to get a finite number of states
  79 (that is not essential for recognizing strings, but is crucial for comparing regular
  80 expressions).
  81
  82 Our main data structure is the notion of pointed item, that is meant whose purpose
  83 is to encode a set of positions inside a regular expression.
  84 The idea of formalizing pointers inside a data type by means of a labelled version
  85 of the data type itself is probably one of the first, major lessons learned in the
  86 formalization of the metatheory of programming languages. For our purposes, it is
  87 enough to mark positions preceding individual characters, so we shall have two kinds
  88 of characters •a (pp a) and a (ps a) according to the case a is pointed or not. *)
  89
  90 inductive pitem (S: DeqSet) : Type[0] ≝
  91    pz: pitem S
  92  | pe: pitem S
  93  | ps: S → pitem S
  94  | pp: S → pitem S
  95  | pc: pitem S → pitem S → pitem S
  96  | po: pitem S → pitem S → pitem S
  97  | pk: pitem S → pitem S.
  98
  99 (* A pointed regular expression (pre) is just a pointed item with an additional
 100 boolean, that must be understood as the possibility to have a trailing point at
 101 the end of the expression. As we shall see, pointed regular expressions can be
 102 understood as states of a DFA, and the boolean indicates if
 103 the state is final or not. *)
 104
 105 definition pre ≝ λS.pitem S × bool.
 106
 107 interpretation "pitem star" 'star a = (pk ? a).
 108 interpretation "pitem or" 'plus a b = (po ? a b).
 109 interpretation "pitem cat" 'middot a b = (pc ? a b).
 110 notation < ".a" non associative with precedence 90 for @{ 'pp $a}.
 111 notation > "`. term 90 a" non associative with precedence 90 for @{ 'pp $a}.
 112 interpretation "pitem pp" 'pp a = (pp ? a).
 113 interpretation "pitem ps" 'ps a = (ps ? a).
 114 interpretation "pitem epsilon" 'epsilon = (pe ?).
 115 interpretation "pitem empty" 'empty = (pz ?).
 116
 117 (* The carrier $|i|$ of an item i is the regular expression obtained from i by
 118 removing all the points. Similarly, the carrier of a pointed regular expression
 119 is the carrier of its item. *)
 120
 121 let rec forget (S: DeqSet) (l : pitem S) on l: re S ≝
 122  match l with
 123   [ pz ⇒ `∅
 124   | pe ⇒ ϵ
 125   | ps x ⇒ `x
 126   | pp x ⇒ `x
 127   | pc E1 E2 ⇒ (forget ? E1) · (forget ? E2)
 128   | po E1 E2 ⇒ (forget ? E1) + (forget ? E2)
 129   | pk E ⇒ (forget ? E)^* ].
 130
 131 interpretation "forget" 'card a = (forget ? a).
 132
 133 lemma erase_dot : ∀S.∀e1,e2:pitem S. |e1 · e2| = c ? (|e1|) (|e2|).
 134 // qed.
 135
 136 lemma erase_plus : ∀S.∀i1,i2:pitem S.
 137   |i1 + i2| = |i1| + |i2|.
 138 // qed.
 139
 140 lemma erase_star : ∀S.∀i:pitem S.|i^*| = |i|^*.
 141 // qed.
 142
 143 (*
 144 Comparing items and pres
 145
 146 Items and pres are very concrete datatypes: they can be effectively compared,
 147 and enumerated. In particular, we can define a boolean equality beqitem and a proof
 148 beqitem_true that it refects propositional equality, enriching the set (pitem S)
 149 to a DeqSet. *)
 150
 151 let rec beqitem S (i1,i2: pitem S) on i1 ≝
 152   match i1 with
 153   [ pz ⇒ match i2 with [ pz ⇒ true | _ ⇒ false]
 154   | pe ⇒ match i2 with [ pe ⇒ true | _ ⇒ false]
 155   | ps y1 ⇒ match i2 with [ ps y2 ⇒ y1==y2 | _ ⇒ false]
 156   | pp y1 ⇒ match i2 with [ pp y2 ⇒ y1==y2 | _ ⇒ false]
 157   | po i11 i12 ⇒ match i2 with
 158     [ po i21 i22 ⇒ beqitem S i11 i21 ∧ beqitem S i12 i22
 159     | _ ⇒ false]
 160   | pc i11 i12 ⇒ match i2 with
 161     [ pc i21 i22 ⇒ beqitem S i11 i21 ∧ beqitem S i12 i22
 162     | _ ⇒ false]
 163   | pk i11 ⇒ match i2 with [ pk i21 ⇒ beqitem S i11 i21 | _ ⇒ false]
 164   ].
 165
 166 lemma beqitem_true: ∀S,i1,i2. iff (beqitem S i1 i2 = true) (i1 = i2).
 167 #S #i1 elim i1
 168   [#i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % // normalize #H destruct
 169   |#i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % // normalize #H destruct
 170   |#x #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % normalize #H destruct
 171     [>(\P H) // | @(\b (refl …))]
 172   |#x #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % normalize #H destruct
 173     [>(\P H) // | @(\b (refl …))]
 174   |#i11 #i12 #Hind1 #Hind2 #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] %
 175    normalize #H destruct
 176     [cases (true_or_false (beqitem S i11 i21)) #H1
 177       [>(proj1 … (Hind1 i21) H1) >(proj1 … (Hind2 i22)) // >H1 in H; #H @H
 178       |>H1 in H; normalize #abs @False_ind /2/
 179       ]
 180     |>(proj2 … (Hind1 i21) (refl …)) >(proj2 … (Hind2 i22) (refl …)) //
 181     ]
 182   |#i11 #i12 #Hind1 #Hind2 #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] %
 183    normalize #H destruct
 184     [cases (true_or_false (beqitem S i11 i21)) #H1
 185       [>(proj1 … (Hind1 i21) H1) >(proj1 … (Hind2 i22)) // >H1 in H; #H @H
 186       |>H1 in H; normalize #abs @False_ind /2/
 187       ]
 188     |>(proj2 … (Hind1 i21) (refl …)) >(proj2 … (Hind2 i22) (refl …)) //
 189     ]
 190   |#i3 #Hind #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i4] %
 191    normalize #H destruct
 192     [>(proj1 … (Hind i4) H) // |>(proj2 … (Hind i4) (refl …)) //]
 193   ]
 194 qed.
 195
 196 definition DeqItem ≝ λS.
 197   mk_DeqSet (pitem S) (beqitem S) (beqitem_true S).
 198
 199 (* We also add a couple of unification hints to allow the type inference system
 200 to look at (pitem S) as the carrier of a DeqSet, and at beqitem as if it was the
 201 equality function of a DeqSet. *)
 202
 203 unification hint  0 ≔ S;
 204     X ≟ mk_DeqSet (pitem S) (beqitem S) (beqitem_true S)
 205 (* ---------------------------------------- *) ⊢
 206     pitem S ≡ carr X.
 207
 208 unification hint  0 ≔ S,i1,i2;
 209     X ≟ mk_DeqSet (pitem S) (beqitem S) (beqitem_true S)
 210 (* ---------------------------------------- *) ⊢
 211     beqitem S i1 i2 ≡ eqb X i1 i2.
 212
 213 (*
 214 Semantics of pointed regular expressions
 215
 216 The intuitive semantic of a point is to mark the position where
 217 we should start reading the regular expression. The language associated
 218 to a pre is the union of the languages associated with its points. *)
 219
 220 let rec in_pl (S : DeqSet) (r : pitem S) on r : word S → Prop ≝
 221 match r with
 222 [ pz ⇒ ∅
 223 | pe ⇒ ∅
 224 | ps _ ⇒ ∅
 225 | pp x ⇒ { [x] }
 226 | pc r1 r2 ⇒ (in_pl ? r1) · \sem{forget ? r2} ∪ (in_pl ? r2)
 227 | po r1 r2 ⇒ (in_pl ? r1) ∪ (in_pl ? r2)
 228 | pk r1 ⇒ (in_pl ? r1) · \sem{forget ? r1}^*  ].
 229
 230 interpretation "in_pl" 'in_l E = (in_pl ? E).
 231 interpretation "in_pl mem" 'mem w l = (in_pl ? l w).
 232
 233 definition in_prl ≝ λS : DeqSet.λp:pre S.
 234   if (\snd p) then \sem{\fst p} ∪ {ϵ} else \sem{\fst p}.
 235
 236 interpretation "in_prl mem" 'mem w l = (in_prl ? l w).
 237 interpretation "in_prl" 'in_l E = (in_prl ? E).
 238
 239 (* The following, trivial lemmas are only meant for rewriting purposes. *)
 240
 241 lemma sem_pre_true : ∀S.∀i:pitem S.
 242   \sem{〈i,true〉} = \sem{i} ∪ {ϵ}.
 243 // qed.
 244
 245 lemma sem_pre_false : ∀S.∀i:pitem S.
 246   \sem{〈i,false〉} = \sem{i}.
 247 // qed.
 248
 249 lemma sem_cat: ∀S.∀i1,i2:pitem S.
 250   \sem{i1 · i2} = \sem{i1} · \sem{|i2|} ∪ \sem{i2}.
 251 // qed.
 252
 253 lemma sem_cat_w: ∀S.∀i1,i2:pitem S.∀w.
 254   \sem{i1 · i2} w = ((\sem{i1} · \sem{|i2|}) w ∨ \sem{i2} w).
 255 // qed.
 256
 257 lemma sem_plus: ∀S.∀i1,i2:pitem S.
 258   \sem{i1 + i2} = \sem{i1} ∪ \sem{i2}.
 259 // qed.
 260
 261 lemma sem_plus_w: ∀S.∀i1,i2:pitem S.∀w.
 262   \sem{i1 + i2} w = (\sem{i1} w ∨ \sem{i2} w).
 263 // qed.
 264
 265 lemma sem_star : ∀S.∀i:pitem S.
 266   \sem{i^*} = \sem{i} · \sem{|i|}^*.
 267 // qed.
 268
 269 lemma sem_star_w : ∀S.∀i:pitem S.∀w.
 270   \sem{i^*} w = (∃w1,w2.w1 @ w2 = w ∧ \sem{i} w1 ∧ \sem{|i|}^* w2).
 271 // qed.
 272
 273 (* Below are a few, simple, semantic properties of items. In particular:
 274 - not_epsilon_item : ∀S:DeqSet.∀i:pitem S. ¬ (\sem{i} ϵ).
 275 - epsilon_pre : ∀S.∀e:pre S. (\sem{i} ϵ) ↔ (\snd e = true).
 276 - minus_eps_item: ∀S.∀i:pitem S. \sem{i} ≐ \sem{i}-{[ ]}.
 277 - minus_eps_pre: ∀S.∀e:pre S. \sem{\fst e} ≐ \sem{e}-{[ ]}.
 278 The first property is proved by a simple induction on $i$; the other
 279 results are easy corollaries. We need an auxiliary lemma first. *)
 280
 281 lemma append_eq_nil : ∀S.∀w1,w2:word S. w1 @ w2 = ϵ → w1 = ϵ.
 282 #S #w1 #w2 cases w1 // #a #tl normalize #H destruct qed.
 283
 284 lemma not_epsilon_lp : ∀S:DeqSet.∀e:pitem S. ¬ (ϵ ∈ e).
 285 #S #e elim e normalize /2/
 286   [#r1 #r2 * #n1 #n2 % * /2/ * #w1 * #w2 * * #H
 287    >(append_eq_nil …H…) /2/
 288   |#r1 #r2 #n1 #n2 % * /2/
 289   |#r #n % * #w1 * #w2 * * #H >(append_eq_nil …H…) /2/
 290   ]
 291 qed.
 292
 293 lemma epsilon_to_true : ∀S.∀e:pre S. ϵ ∈ e → \snd e = true.
 294 #S * #i #b cases b // normalize #H @False_ind /2/
 295 qed.
 296
 297 lemma true_to_epsilon : ∀S.∀e:pre S. \snd e = true → ϵ ∈ e.
 298 #S * #i #b #btrue normalize in btrue; >btrue %2 //
 299 qed.
 300
 301 lemma minus_eps_item: ∀S.∀i:pitem S. \sem{i} ≐ \sem{i}-{[ ]}.
 302 #S #i #w %
 303   [#H whd % // normalize @(not_to_not … (not_epsilon_lp …i)) //
 304   |* //
 305   ]
 306 qed.
 307
 308 lemma minus_eps_pre: ∀S.∀e:pre S. \sem{\fst e} ≐ \sem{e}-{[ ]}.
 309 #S * #i *
 310   [>sem_pre_true normalize in ⊢ (??%?); #w %
 311     [/3/ | * * // #H1 #H2 @False_ind @(absurd …H1 H2)]
 312   |>sem_pre_false normalize in ⊢ (??%?); #w % [ /3/ | * // ]
 313   ]
 314 qed.
 315
 316 (*
 317 Broadcasting points
 318
 319 Intuitively, a regular expression e must be understood as a pointed expression with a single
 320 point in front of it. Since however we only allow points before symbols, we must broadcast
 321 this initial point inside e traversing all nullable subexpressions, that essentially corresponds
 322 to the ϵ-closure operation on automata. We use the notation •(_) to denote such an operation;
 323 its definition is the expected one: let us start discussing an example.
 324
 325 Example
 326 Let us broadcast a point inside (a + ϵ)(b*a + b)b. We start working in parallel on the
 327 first occurrence of a (where the point stops), and on ϵ that gets traversed. We have hence
 328 reached the end of a + ϵ and we must pursue broadcasting inside (b*a + b)b. Again, we work in
 329 parallel on the two additive subterms b^*a and b; the first point is allowed to both enter the
 330 star, and to traverse it, stopping in front of a; the second point just stops in front of b.
 331 No point reached that end of b^*a + b hence no further propagation is possible. In conclusion:
 332                •((a + ϵ)(b^*a + b)b) = 〈(•a + ϵ)((•b)^*•a + •b)b, false〉
 333 *)
 334
 335 (* Broadcasting a point inside an item generates a pre, since the point could possibly reach
 336 the end of the expression.
 337 Broadcasting inside a i1+i2 amounts to broadcast in parallel inside i1 and i2.
 338 If we define
 339                  〈i1,b1〉 ⊕ 〈i2,b2〉 = 〈i1 + i2, b1 ∨ b2〉
 340 then, we just have •(i1+i2) = •(i1)⊕ •(i2).
 341 *)
 342
 343 definition lo ≝ λS:DeqSet.λa,b:pre S.〈\fst a + \fst b,\snd a ∨ \snd b〉.
 344 notation "a ⊕ b" left associative with precedence 65 for @{'oplus $a $b}.
 345 interpretation "oplus" 'oplus a b = (lo ? a b).
 346
 347 lemma lo_def: ∀S.∀i1,i2:pitem S.∀b1,b2. 〈i1,b1〉⊕〈i2,b2〉=〈i1+i2,b1∨b2〉.
 348 // qed.
 349
 350 (*
 351 Concatenation is a bit more complex. In order to broadcast a point inside i1 · i2
 352 we should start broadcasting it inside i1 and then proceed into i2 if and only if a
 353 point reached the end of i1. This suggests to define •(i1 · i2) as •(i1) ▹ i2, where
 354 e ▹ i is a general operation of concatenation between a pre and an item, defined by
 355 cases on the boolean in e:
 356
 357        〈i1,true〉 ▹ i2  = i1 ◃ •(i_2)
 358        〈i1,false〉 ▹ i2 = i1 · i2
 359
 360 In turn, ◃ says how to concatenate an item with a pre, that is however extremely simple:
 361
 362         i1 ◃ 〈i1,b〉  = 〈i_1 · i2, b〉
 363
 364 Let us come to the formalized definitions:
 365 *)
 366
 367 definition pre_concat_r ≝ λS:DeqSet.λi:pitem S.λe:pre S.
 368   match e with [ mk_Prod i1 b ⇒ 〈i · i1, b〉].
 369
 370 notation "i ◃ e" left associative with precedence 65 for @{'lhd $i $e}.
 371 interpretation "pre_concat_r" 'lhd i e = (pre_concat_r ? i e).
 372
 373 (* The behaviour of ◃ is summarized by the following, easy lemma: *)
 374
 375 lemma eq_to_ex_eq: ∀S.∀A,B:word S → Prop.
 376   A = B → A ≐ B.
 377 #S #A #B #H >H /2/ qed.
 378
 379 lemma sem_pre_concat_r : ∀S,i.∀e:pre S.
 380   \sem{i ◃ e} ≐ \sem{i} · \sem{|\fst e|} ∪ \sem{e}.
 381 #S #i * #i1 #b1 cases b1 [2: @eq_to_ex_eq //]
 382 >sem_pre_true >sem_cat >sem_pre_true /2/
 383 qed.
 384
 385 (* The definition of $•(-)$ (eclose) and ▹ (pre_concat_l) are mutually recursive.
 386 In this situation, a viable alternative that is usually simpler to reason about,
 387 is to abstract one of the two functions with respect to the other. In particular
 388 we abstract pre_concat_l with respect to an input bcast function from items to
 389 pres. *)
 390
 391 definition pre_concat_l ≝ λS:DeqSet.λbcast:∀S:DeqSet.pitem S → pre S.λe1:pre S.λi2:pitem S.
 392   match e1 with
 393   [ mk_Prod i1 b1 ⇒ match b1 with
 394     [ true ⇒ (i1 ◃ (bcast ? i2))
 395     | false ⇒ 〈i1 · i2,false〉
 396     ]
 397   ].
 398
 399 notation "a ▹ b" left associative with precedence 65 for @{'tril eclose $a $b}.
 400 interpretation "item-pre concat" 'tril op a b = (pre_concat_l ? op a b).
 401
 402 notation "•" non associative with precedence 65 for @{eclose ?}.
 403
 404 (* We are ready to give the formal definition of the broadcasting operation. *)
 405
 406 let rec eclose (S: DeqSet) (i: pitem S) on i : pre S ≝
 407  match i with
 408   [ pz ⇒ 〈 `∅, false 〉
 409   | pe ⇒ 〈 ϵ,  true 〉
 410   | ps x ⇒ 〈 `.x, false〉
 411   | pp x ⇒ 〈 `.x, false 〉
 412   | po i1 i2 ⇒ •i1 ⊕ •i2
 413   | pc i1 i2 ⇒ •i1 ▹ i2
 414   | pk i ⇒ 〈(\fst (•i))^*,true〉].
 415
 416 notation "• x" non associative with precedence 65 for @{'eclose $x}.
 417 interpretation "eclose" 'eclose x = (eclose ? x).
 418
 419 (* Here are a few simple properties of ▹ and •(-) *)
 420
 421 lemma eclose_plus: ∀S:DeqSet.∀i1,i2:pitem S.
 422   •(i1 + i2) = •i1 ⊕ •i2.
 423 // qed.
 424
 425 lemma eclose_dot: ∀S:DeqSet.∀i1,i2:pitem S.
 426   •(i1 · i2) = •i1 ▹ i2.
 427 // qed.
 428
 429 lemma eclose_star: ∀S:DeqSet.∀i:pitem S.
 430   •i^* = 〈(\fst(•i))^*,true〉.
 431 // qed.
 432
 433 lemma odot_true :
 434   ∀S.∀i1,i2:pitem S.
 435   〈i1,true〉 ▹ i2 = i1 ◃ (•i2).
 436 // qed.
 437
 438 lemma odot_true_bis :
 439   ∀S.∀i1,i2:pitem S.
 440   〈i1,true〉 ▹ i2 = 〈i1 · \fst (•i2), \snd (•i2)〉.
 441 #S #i1 #i2 normalize cases (•i2) // qed.
 442
 443 lemma odot_false:
 444   ∀S.∀i1,i2:pitem S.
 445   〈i1,false〉 ▹ i2 = 〈i1 · i2, false〉.
 446 // qed.
 447
 448 (* The definition of •(-) (eclose) can then be lifted from items to pres
 449 in the obvious way. *)
 450
 451 definition lift ≝ λS.λf:pitem S →pre S.λe:pre S.
 452   match e with
 453   [ mk_Prod i b ⇒ 〈\fst (f i), \snd (f i) ∨ b〉].
 454
 455 definition preclose ≝ λS. lift S (eclose S).
 456 interpretation "preclose" 'eclose x = (preclose ? x).
 457
 458 (* Obviously, broadcasting does not change the carrier of the item,
 459 as it is easily proved by structural induction. *)
 460
 461 lemma erase_bull : ∀S.∀i:pitem S. |\fst (•i)| = |i|.
 462 #S #i elim i //
 463   [ #i1 #i2 #IH1 #IH2 >erase_dot <IH1 >eclose_dot
 464     cases (•i1) #i11 #b1 cases b1 // <IH2 >odot_true_bis //
 465   | #i1 #i2 #IH1 #IH2 >eclose_plus >(erase_plus … i1) <IH1 <IH2
 466     cases (•i1) #i11 #b1 cases (•i2) #i21 #b2 //
 467   | #i #IH >eclose_star >(erase_star … i) <IH cases (•i) //
 468   ]
 469 qed.
 470
 471 (* We are now ready to state the main semantic properties of ⊕, ◃ and •(-):
 472
 473 sem_oplus:     \sem{e1 ⊕ e2} ≐ \sem{e1} ∪ \sem{e2}
 474 sem_pcl:       \sem{e1 ▹ i2} ≐  \sem{e1} · \sem{|i2|} ∪ \sem{i2}
 475 sem_bullet     \sem{•i} ≐ \sem{i} ∪ \sem{|i|}
 476
 477 The proof of sem_oplus is straightforward. *)
 478
 479 lemma sem_oplus: ∀S:DeqSet.∀e1,e2:pre S.
 480   \sem{e1 ⊕ e2} ≐ \sem{e1} ∪ \sem{e2}.
 481 #S * #i1 #b1 * #i2 #b2 #w %
 482   [cases b1 cases b2 normalize /2/ * /3/ * /3/
 483   |cases b1 cases b2 normalize /2/ * /3/ * /3/
 484   ]
 485 qed.
 486
 487 (* For the others, we proceed as follow: we first prove the following
 488 auxiliary lemma, that assumes sem_bullet:
 489
 490 sem_pcl_aux:
 491    \sem{•i2} ≐  \sem{i2} ∪ \sem{|i2|} →
 492    \sem{e1 ▹ i2} ≐  \sem{e1} · \sem{|i2|} ∪ \sem{i2}.
 493
 494 Then, using the previous result, we prove sem_bullet by induction
 495 on i. Finally, sem_pcl_aux and sem_bullet give sem_pcl. *)
 496
 497 lemma LcatE : ∀S.∀e1,e2:pitem S.
 498   \sem{e1 · e2} = \sem{e1} · \sem{|e2|} ∪ \sem{e2}.
 499 // qed.
 500
 501 lemma odot_dot_aux : ∀S.∀e1:pre S.∀i2:pitem S.
 502    \sem{•i2} ≐  \sem{i2} ∪ \sem{|i2|} →
 503    \sem{e1 ▹ i2} ≐  \sem{e1} · \sem{|i2|} ∪ \sem{i2}.
 504 #S * #i1 #b1 #i2 cases b1
 505   [2:#th >odot_false >sem_pre_false >sem_pre_false >sem_cat /2/
 506   |#H >odot_true >sem_pre_true @(eqP_trans … (sem_pre_concat_r …))
 507    >erase_bull @eqP_trans [|@(eqP_union_l … H)]
 508     @eqP_trans [|@eqP_union_l[|@union_comm ]]
 509     @eqP_trans [|@eqP_sym @union_assoc ] /3/
 510   ]
 511 qed.
 512
 513 lemma minus_eps_pre_aux: ∀S.∀e:pre S.∀i:pitem S.∀A.
 514  \sem{e} ≐ \sem{i} ∪ A → \sem{\fst e} ≐ \sem{i} ∪ (A - {[ ]}).
 515 #S #e #i #A #seme
 516 @eqP_trans [|@minus_eps_pre]
 517 @eqP_trans [||@eqP_union_r [|@eqP_sym @minus_eps_item]]
 518 @eqP_trans [||@distribute_substract]
 519 @eqP_substract_r //
 520 qed.
 521
 522 theorem sem_bull: ∀S:DeqSet. ∀i:pitem S.  \sem{•i} ≐ \sem{i} ∪ \sem{|i|}.
 523 #S #e elim e
 524   [#w normalize % [/2/ | * //]
 525   |/2/
 526   |#x normalize #w % [ /2/ | * [@False_ind | //]]
 527   |#x normalize #w % [ /2/ | * // ]
 528   |#i1 #i2 #IH1 #IH2
 529    (* lhs = \sem{•(i1 ·i2)} *)
 530    >eclose_dot
 531    (* lhs =\sem{•(i1) ▹ i2)} *)
 532    @eqP_trans [|@odot_dot_aux //]
 533    (* lhs = \sem{•(i1)·\sem{|i2|}∪\sem{i2} *)
 534    @eqP_trans
 535      [|@eqP_union_r
 536        [|@eqP_trans [|@(cat_ext_l … IH1)] @distr_cat_r]]
 537    (* lhs = \sem{i1}·\sem{|i2|}∪\sem{|i1|}·\sem{|i2|}∪\sem{i2} *)
 538    @eqP_trans [|@union_assoc]
 539    (* lhs = \sem{i1}·\sem{|i2|}∪(\sem{|i1|}·\sem{|i2|}∪\sem{i2}) *)
 540    (* Now we work on the rhs that is
 541       rhs = \sem{i1·i2} ∪ \sem{|i1·i2|} *)
 542    >sem_cat
 543    (* rhs = \sem{i1}·\sem{|i2|} ∪ \sem{i2} ∪ \sem{|i1·i2|} *)
 544    @eqP_trans [||@eqP_sym @union_assoc]
 545    (* rhs = \sem{i1}·\sem{|i2|}∪ (\sem{i2} ∪ \sem{|i1·i2|}) *)
 546    @eqP_union_l @union_comm
 547   |#i1 #i2 #IH1 #IH2 >eclose_plus
 548    @eqP_trans [|@sem_oplus] >sem_plus >erase_plus
 549    @eqP_trans [|@(eqP_union_l … IH2)]
 550    @eqP_trans [|@eqP_sym @union_assoc]
 551    @eqP_trans [||@union_assoc] @eqP_union_r
 552    @eqP_trans [||@eqP_sym @union_assoc]
 553    @eqP_trans [||@eqP_union_l [|@union_comm]]
 554    @eqP_trans [||@union_assoc] /2/
 555   |#i #H >sem_pre_true >sem_star >erase_bull >sem_star
 556    @eqP_trans [|@eqP_union_r [|@cat_ext_l [|@minus_eps_pre_aux //]]]
 557    @eqP_trans [|@eqP_union_r [|@distr_cat_r]]
 558    @eqP_trans [|@union_assoc] @eqP_union_l >erase_star
 559    @eqP_sym @star_fix_eps
 560   ]
 561 qed.
 562
 563 (*
 564 Blank item
 565
 566 As a corollary of theorem sem_bullet, given a regular expression e, we can easily
 567 find an item with the same semantics of $e$: it is enough to get an item (blank e)
 568 having e as carrier and no point, and then broadcast a point in it. The semantics of
 569 (blank e) is obviously the empty language: from the point of view of the automaton,
 570 it corresponds with the pit state. *)
 571
 572 let rec blank (S: DeqSet) (i: re S) on i :pitem S ≝
 573  match i with
 574   [ z ⇒ `∅
 575   | e ⇒ ϵ
 576   | s y ⇒ `y
 577   | o e1 e2 ⇒ (blank S e1) + (blank S e2)
 578   | c e1 e2 ⇒ (blank S e1) · (blank S e2)
 579   | k e ⇒ (blank S e)^* ].
 580
 581 lemma forget_blank: ∀S.∀e:re S.|blank S e| = e.
 582 #S #e elim e normalize //
 583 qed.
 584
 585 lemma sem_blank: ∀S.∀e:re S.\sem{blank S e} ≐ ∅.
 586 #S #e elim e
 587   [1,2:@eq_to_ex_eq //
 588   |#s @eq_to_ex_eq //
 589   |#e1 #e2 #Hind1 #Hind2 >sem_cat
 590    @eqP_trans [||@(union_empty_r … ∅)]
 591    @eqP_trans [|@eqP_union_l[|@Hind2]] @eqP_union_r
 592    @eqP_trans [||@(cat_empty_l … ?)] @cat_ext_l @Hind1
 593   |#e1 #e2 #Hind1 #Hind2 >sem_plus
 594    @eqP_trans [||@(union_empty_r … ∅)]
 595    @eqP_trans [|@eqP_union_l[|@Hind2]] @eqP_union_r @Hind1
 596   |#e #Hind >sem_star
 597    @eqP_trans [||@(cat_empty_l … ?)] @cat_ext_l @Hind
 598   ]
 599 qed.
 600
 601 theorem re_embedding: ∀S.∀e:re S.
 602   \sem{•(blank S e)} ≐ \sem{e}.
 603 #S #e @eqP_trans [|@sem_bull] >forget_blank
 604 @eqP_trans [|@eqP_union_r [|@sem_blank]]
 605 @eqP_trans [|@union_comm] @union_empty_r.
 606 qed.
 607
 608 (*
 609 Lifted Operators
 610
 611 Plus and bullet have been already lifted from items to pres. We can now
 612 do a similar job for concatenation ⊙ and Kleene's star ⊛. *)
 613
 614 definition lifted_cat ≝ λS:DeqSet.λe:pre S.
 615   lift S (pre_concat_l S eclose e).
 616
 617 notation "e1 ⊙ e2" left associative with precedence 70 for @{'odot $e1 $e2}.
 618
 619 interpretation "lifted cat" 'odot e1 e2 = (lifted_cat ? e1 e2).
 620
 621 lemma odot_true_b : ∀S.∀i1,i2:pitem S.∀b.
 622   〈i1,true〉 ⊙ 〈i2,b〉 = 〈i1 · (\fst (•i2)),\snd (•i2) ∨ b〉.
 623 #S #i1 #i2 #b normalize in ⊢ (??%?); cases (•i2) //
 624 qed.
 625
 626 lemma odot_false_b : ∀S.∀i1,i2:pitem S.∀b.
 627   〈i1,false〉 ⊙ 〈i2,b〉 = 〈i1 · i2 ,b〉.
 628 //
 629 qed.
 630
 631 lemma erase_odot:∀S.∀e1,e2:pre S.
 632   |\fst (e1 ⊙ e2)| = |\fst e1| · (|\fst e2|).
 633 #S * #i1 * * #i2 #b2 // >odot_true_b //
 634 qed.
 635
 636 (* Let us come to the star operation: *)
 637
 638 definition lk ≝ λS:DeqSet.λe:pre S.
 639   match e with
 640   [ mk_Prod i1 b1 ⇒
 641     match b1 with
 642     [true ⇒ 〈(\fst (eclose ? i1))^*, true〉
 643     |false ⇒ 〈i1^*,false〉
 644     ]
 645   ].
 646
 647 (* notation < "a \sup ⊛" non associative with precedence 90 for @{'lk $a}.*)
 648 interpretation "lk" 'lk a = (lk ? a).
 649 notation "a^⊛" non associative with precedence 90 for @{'lk $a}.
 650
 651
 652 lemma ostar_true: ∀S.∀i:pitem S.
 653   〈i,true〉^⊛ = 〈(\fst (•i))^*, true〉.
 654 // qed.
 655
 656 lemma ostar_false: ∀S.∀i:pitem S.
 657   〈i,false〉^⊛ = 〈i^*, false〉.
 658 // qed.
 659
 660 lemma erase_ostar: ∀S.∀e:pre S.
 661   |\fst (e^⊛)| = |\fst e|^*.
 662 #S * #i * // qed.
 663
 664 lemma sem_odot_true: ∀S:DeqSet.∀e1:pre S.∀i.
 665   \sem{e1 ⊙ 〈i,true〉} ≐ \sem{e1 ▹ i} ∪ { [ ] }.
 666 #S #e1 #i
 667 cut (e1 ⊙ 〈i,true〉 = 〈\fst (e1 ▹ i), \snd(e1 ▹ i) ∨ true〉) [//]
 668 #H >H cases (e1 ▹ i) #i1 #b1 cases b1
 669   [>sem_pre_true @eqP_trans [||@eqP_sym @union_assoc]
 670    @eqP_union_l /2/
 671   |/2/
 672   ]
 673 qed.
 674
 675 lemma eq_odot_false: ∀S:DeqSet.∀e1:pre S.∀i.
 676   e1 ⊙ 〈i,false〉 = e1 ▹ i.
 677 #S #e1 #i
 678 cut (e1 ⊙ 〈i,false〉 = 〈\fst (e1 ▹ i), \snd(e1 ▹ i) ∨ false〉) [//]
 679 cases (e1 ▹ i) #i1 #b1 cases b1 #H @H
 680 qed.
 681
 682 (* We conclude this section with the proof of the main semantic properties
 683 of ⊙ and ⊛. *)
 684
 685 lemma sem_odot:
 686   ∀S.∀e1,e2: pre S. \sem{e1 ⊙ e2} ≐ \sem{e1}· \sem{|\fst e2|} ∪ \sem{e2}.
 687 #S #e1 * #i2 *
 688   [>sem_pre_true
 689    @eqP_trans [|@sem_odot_true]
 690    @eqP_trans [||@union_assoc] @eqP_union_r @odot_dot_aux //
 691   |>sem_pre_false >eq_odot_false @odot_dot_aux //
 692   ]
 693 qed.
 694
 695 theorem sem_ostar: ∀S.∀e:pre S.
 696   \sem{e^⊛} ≐  \sem{e} · \sem{|\fst e|}^*.
 697 #S * #i #b cases b
 698   [(* lhs = \sem{〈i,true〉^⊛} *)
 699    >sem_pre_true (* >sem_pre_true *)
 700    (* lhs = \sem{(\fst (•i))^*}∪{ϵ} *)
 701    >sem_star >erase_bull
 702    (* lhs = \sem{\fst (•i)}·(\sem{|i|)^*∪{ϵ} *)
 703    @eqP_trans [|@eqP_union_r[|@cat_ext_l [|@minus_eps_pre_aux //]]]
 704    (* lhs = (\sem{i}∪(\sem{|i|}-{ϵ})·(\sem{|i|)^*∪{ϵ} *)
 705    @eqP_trans [|@eqP_union_r [|@distr_cat_r]]
 706    (* lhs = (\sem{i}·(\sem{|i|)^*∪(\sem{|i|}-{ϵ})·(\sem{|i|)^*∪{ϵ} *)
 707    @eqP_trans [|@union_assoc]
 708    (* lhs = (\sem{i}·(\sem{|i|)^*∪((\sem{|i|}-{ϵ})·(\sem{|i|)^*∪{ϵ}) *)
 709    @eqP_trans [|@eqP_union_l[|@eqP_sym @star_fix_eps]]
 710    (* lhs = (\sem{i}·(\sem{|i|)^*∪(\sem{|i|)^* *)
 711    (* now we work on the right hand side, that is
 712       rhs = \sem{〈i,true〉}·(\sem{|i|}^* *)
 713    @eqP_trans [||@eqP_sym @distr_cat_r]
 714    (* rhs = (\sem{i}·(\sem{|i|)^*∪{ϵ}·(\sem{|i|)^* *)
 715    @eqP_union_l @eqP_sym @epsilon_cat_l
 716   |>sem_pre_false >sem_pre_false >sem_star /2/
 717   ]
 718 qed.
 719