matita/matita/lib/re/re.ma

   1 (**************************************************************************)
   2 (*       ___                                                              *)
   3 (*      ||M||                                                             *)
   4 (*      ||A||       A project by Andrea Asperti                           *)
   5 (*      ||T||                                                             *)
   6 (*      ||I||       Developers:                                           *)
   7 (*      ||T||         The HELM team.                                      *)
   8 (*      ||A||         http://helm.cs.unibo.it                             *)
   9 (*      \   /                                                             *)
  10 (*       \ /        This file is distributed under the terms of the       *)
  11 (*        v         GNU General Public License Version 2                  *)
  12 (*                                                                        *)
  13 (**************************************************************************)
  14
  15 include "re/lang.ma".
  16
  17 (* The type re of regular expressions over an alphabet $S$ is the smallest
  18 collection of objects generated by the following constructors: *)
  19
  20 inductive re (S: DeqSet) : Type[0] ≝
  21    z: re S
  22  | e: re S
  23  | s: S → re S
  24  | c: re S → re S → re S
  25  | o: re S → re S → re S
  26  | k: re S → re S.
  27
  28 interpretation "re epsilon" 'epsilon = (e ?).
  29 interpretation "re or" 'plus a b = (o ? a b).
  30 interpretation "re cat" 'middot a b = (c ? a b).
  31 interpretation "re star" 'star a = (k ? a).
  32
  33 notation < "a" non associative with precedence 90 for @{ 'ps $a}.
  34 notation > "` term 90 a" non associative with precedence 90 for @{ 'ps $a}.
  35 interpretation "atom" 'ps a = (s ? a).
  36
  37 notation "`∅" non associative with precedence 90 for @{ 'empty }.
  38 interpretation "empty" 'empty = (z ?).
  39
  40 (* The language sem{e} associated with the regular expression e is inductively
  41 defined by the following function: *)
  42
  43 let rec in_l (S : DeqSet) (r : re S) on r : word S → Prop ≝
  44 match r with
  45 [ z ⇒ ∅
  46 | e ⇒ {ϵ}
  47 | s x ⇒ {[x]}
  48 | c r1 r2 ⇒ (in_l ? r1) · (in_l ? r2)
  49 | o r1 r2 ⇒ (in_l ? r1) ∪ (in_l ? r2)
  50 | k r1 ⇒ (in_l ? r1) ^*].
  51
  52 notation "\sem{term 19 E}" non associative with precedence 75 for @{'in_l $E}.
  53 interpretation "in_l" 'in_l E = (in_l ? E).
  54 interpretation "in_l mem" 'mem w l = (in_l ? l w).
  55
  56 lemma rsem_star : ∀S.∀r: re S. \sem{r^*} = \sem{r}^*.
  57 // qed.
  58
  59 (*
  60 Pointed Regular expressions
  61
  62 We now introduce pointed regular expressions, that are the main tool we shall
  63 use for the construction of the automaton.
  64 A pointed regular expression is just a regular expression internally labelled
  65 with some additional points. Intuitively, points mark the positions inside the
  66 regular expression which have been reached after reading some prefix of
  67 the input string, or better the positions where the processing of the remaining
  68 string has to be started. Each pointed expression for $e$ represents a state of
  69 the {\em deterministic} automaton associated with $e$; since we obviously have
  70 only a finite number of possible labellings, the number of states of the automaton
  71 is finite.
  72
  73 Pointed regular expressions provide the tool for an algebraic revisitation of
  74 McNaughton and Yamada's algorithm for position automata, making the proof of its
  75 correctness, that is far from trivial, particularly clear and simple. In particular,
  76 pointed expressions offer an appealing alternative to Brzozowski's derivatives,
  77 avoiding their weakest point, namely the fact of being forced to quotient derivatives
  78 w.r.t. a suitable notion of equivalence in order to get a finite number of states
  79 (that is not essential for recognizing strings, but is crucial for comparing regular
  80 expressions).
  81
  82 Our main data structure is the notion of pointed item, that is meant whose purpose
  83 is to encode a set of positions inside a regular expression.
  84 The idea of formalizing pointers inside a data type by means of a labelled version
  85 of the data type itself is probably one of the first, major lessons learned in the
  86 formalization of the metatheory of programming languages. For our purposes, it is
  87 enough to mark positions preceding individual characters, so we shall have two kinds
  88 of characters •a (pp a) and a (ps a) according to the case a is pointed or not. *)
  89
  90 inductive pitem (S: DeqSet) : Type[0] ≝
  91    pz: pitem S
  92  | pe: pitem S
  93  | ps: S → pitem S
  94  | pp: S → pitem S
  95  | pc: pitem S → pitem S → pitem S
  96  | po: pitem S → pitem S → pitem S
  97  | pk: pitem S → pitem S.
  98
  99 (* A pointed regular expression (pre) is just a pointed item with an additional
 100 boolean, that must be understood as the possibility to have a trailing point at
 101 the end of the expression. As we shall see, pointed regular expressions can be
 102 understood as states of a DFA, and the boolean indicates if
 103 the state is final or not. *)
 104
 105 definition pre ≝ λS.pitem S × bool.
 106
 107 interpretation "pitem star" 'star a = (pk ? a).
 108 interpretation "pitem or" 'plus a b = (po ? a b).
 109 interpretation "pitem cat" 'middot a b = (pc ? a b).
 110 notation < ".a" non associative with precedence 90 for @{ 'pp $a}.
 111 notation > "`. term 90 a" non associative with precedence 90 for @{ 'pp $a}.
 112 interpretation "pitem pp" 'pp a = (pp ? a).
 113 interpretation "pitem ps" 'ps a = (ps ? a).
 114 interpretation "pitem epsilon" 'epsilon = (pe ?).
 115 interpretation "pitem empty" 'empty = (pz ?).
 116
 117 (* The carrier $|i|$ of an item i is the regular expression obtained from i by
 118 removing all the points. Similarly, the carrier of a pointed regular expression
 119 is the carrier of its item. *)
 120
 121 let rec forget (S: DeqSet) (l : pitem S) on l: re S ≝
 122  match l with
 123   [ pz ⇒ `∅
 124   | pe ⇒ ϵ
 125   | ps x ⇒ `x
 126   | pp x ⇒ `x
 127   | pc E1 E2 ⇒ (forget ? E1) · (forget ? E2)
 128   | po E1 E2 ⇒ (forget ? E1) + (forget ? E2)
 129   | pk E ⇒ (forget ? E)^* ].
 130
 131 (* notation < "|term 19 e|" non associative with precedence 70 for @{'forget $e}.*)
 132 interpretation "forget" 'norm a = (forget ? a).
 133
 134 lemma erase_dot : ∀S.∀e1,e2:pitem S. |e1 · e2| = c ? (|e1|) (|e2|).
 135 // qed.
 136
 137 lemma erase_plus : ∀S.∀i1,i2:pitem S.
 138   |i1 + i2| = |i1| + |i2|.
 139 // qed.
 140
 141 lemma erase_star : ∀S.∀i:pitem S.|i^*| = |i|^*.
 142 // qed.
 143
 144 (*
 145 Comparing items and pres
 146
 147 Items and pres are very concrete datatypes: they can be effectively compared,
 148 and enumerated. In particular, we can define a boolean equality beqitem and a proof
 149 beqitem_true that it refects propositional equality, enriching the set (pitem S)
 150 to a DeqSet. *)
 151
 152 let rec beqitem S (i1,i2: pitem S) on i1 ≝
 153   match i1 with
 154   [ pz ⇒ match i2 with [ pz ⇒ true | _ ⇒ false]
 155   | pe ⇒ match i2 with [ pe ⇒ true | _ ⇒ false]
 156   | ps y1 ⇒ match i2 with [ ps y2 ⇒ y1==y2 | _ ⇒ false]
 157   | pp y1 ⇒ match i2 with [ pp y2 ⇒ y1==y2 | _ ⇒ false]
 158   | po i11 i12 ⇒ match i2 with
 159     [ po i21 i22 ⇒ beqitem S i11 i21 ∧ beqitem S i12 i22
 160     | _ ⇒ false]
 161   | pc i11 i12 ⇒ match i2 with
 162     [ pc i21 i22 ⇒ beqitem S i11 i21 ∧ beqitem S i12 i22
 163     | _ ⇒ false]
 164   | pk i11 ⇒ match i2 with [ pk i21 ⇒ beqitem S i11 i21 | _ ⇒ false]
 165   ].
 166
 167 lemma beqitem_true: ∀S,i1,i2. iff (beqitem S i1 i2 = true) (i1 = i2).
 168 #S #i1 elim i1
 169   [#i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % // normalize #H destruct
 170   |#i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % // normalize #H destruct
 171   |#x #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % normalize #H destruct
 172     [>(\P H) // | @(\b (refl …))]
 173   |#x #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] % normalize #H destruct
 174     [>(\P H) // | @(\b (refl …))]
 175   |#i11 #i12 #Hind1 #Hind2 #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] %
 176    normalize #H destruct
 177     [cases (true_or_false (beqitem S i11 i21)) #H1
 178       [>(proj1 … (Hind1 i21) H1) >(proj1 … (Hind2 i22)) // >H1 in H; #H @H
 179       |>H1 in H; normalize #abs @False_ind /2/
 180       ]
 181     |>(proj2 … (Hind1 i21) (refl …)) >(proj2 … (Hind2 i22) (refl …)) //
 182     ]
 183   |#i11 #i12 #Hind1 #Hind2 #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i3] %
 184    normalize #H destruct
 185     [cases (true_or_false (beqitem S i11 i21)) #H1
 186       [>(proj1 … (Hind1 i21) H1) >(proj1 … (Hind2 i22)) // >H1 in H; #H @H
 187       |>H1 in H; normalize #abs @False_ind /2/
 188       ]
 189     |>(proj2 … (Hind1 i21) (refl …)) >(proj2 … (Hind2 i22) (refl …)) //
 190     ]
 191   |#i3 #Hind #i2 cases i2 [||#a|#a|#i21 #i22| #i21 #i22|#i4] %
 192    normalize #H destruct
 193     [>(proj1 … (Hind i4) H) // |>(proj2 … (Hind i4) (refl …)) //]
 194   ]
 195 qed.
 196
 197 definition DeqItem ≝ λS.
 198   mk_DeqSet (pitem S) (beqitem S) (beqitem_true S).
 199
 200 (* We also add a couple of unification hints to allow the type inference system
 201 to look at (pitem S) as the carrier of a DeqSet, and at beqitem as if it was the
 202 equality function of a DeqSet. *)
 203
 204 unification hint  0 ≔ S;
 205     X ≟ mk_DeqSet (pitem S) (beqitem S) (beqitem_true S)
 206 (* ---------------------------------------- *) ⊢
 207     pitem S ≡ carr X.
 208
 209 unification hint  0 ≔ S,i1,i2;
 210     X ≟ mk_DeqSet (pitem S) (beqitem S) (beqitem_true S)
 211 (* ---------------------------------------- *) ⊢
 212     beqitem S i1 i2 ≡ eqb X i1 i2.
 213
 214 (*
 215 Semantics of pointed regular expressions
 216
 217 The intuitive semantic of a point is to mark the position where
 218 we should start reading the regular expression. The language associated
 219 to a pre is the union of the languages associated with its points. *)
 220
 221 let rec in_pl (S : DeqSet) (r : pitem S) on r : word S → Prop ≝
 222 match r with
 223 [ pz ⇒ ∅
 224 | pe ⇒ ∅
 225 | ps _ ⇒ ∅
 226 | pp x ⇒ { [x] }
 227 | pc r1 r2 ⇒ (in_pl ? r1) · \sem{forget ? r2} ∪ (in_pl ? r2)
 228 | po r1 r2 ⇒ (in_pl ? r1) ∪ (in_pl ? r2)
 229 | pk r1 ⇒ (in_pl ? r1) · \sem{forget ? r1}^*  ].
 230
 231 interpretation "in_pl" 'in_l E = (in_pl ? E).
 232 interpretation "in_pl mem" 'mem w l = (in_pl ? l w).
 233
 234 definition in_prl ≝ λS : DeqSet.λp:pre S.
 235   if (\snd p) then \sem{\fst p} ∪ {ϵ} else \sem{\fst p}.
 236
 237 interpretation "in_prl mem" 'mem w l = (in_prl ? l w).
 238 interpretation "in_prl" 'in_l E = (in_prl ? E).
 239
 240 (* The following, trivial lemmas are only meant for rewriting purposes. *)
 241
 242 lemma sem_pre_true : ∀S.∀i:pitem S.
 243   \sem{〈i,true〉} = \sem{i} ∪ {ϵ}.
 244 // qed.
 245
 246 lemma sem_pre_false : ∀S.∀i:pitem S.
 247   \sem{〈i,false〉} = \sem{i}.
 248 // qed.
 249
 250 lemma sem_cat: ∀S.∀i1,i2:pitem S.
 251   \sem{i1 · i2} = \sem{i1} · \sem{|i2|} ∪ \sem{i2}.
 252 // qed.
 253
 254 lemma sem_cat_w: ∀S.∀i1,i2:pitem S.∀w.
 255   \sem{i1 · i2} w = ((\sem{i1} · \sem{|i2|}) w ∨ \sem{i2} w).
 256 // qed.
 257
 258 lemma sem_plus: ∀S.∀i1,i2:pitem S.
 259   \sem{i1 + i2} = \sem{i1} ∪ \sem{i2}.
 260 // qed.
 261
 262 lemma sem_plus_w: ∀S.∀i1,i2:pitem S.∀w.
 263   \sem{i1 + i2} w = (\sem{i1} w ∨ \sem{i2} w).
 264 // qed.
 265
 266 lemma sem_star : ∀S.∀i:pitem S.
 267   \sem{i^*} = \sem{i} · \sem{|i|}^*.
 268 // qed.
 269
 270 lemma sem_star_w : ∀S.∀i:pitem S.∀w.
 271   \sem{i^*} w = (∃w1,w2.w1 @ w2 = w ∧ \sem{i} w1 ∧ \sem{|i|}^* w2).
 272 // qed.
 273
 274 (* Below are a few, simple, semantic properties of items. In particular:
 275 - not_epsilon_item : ∀S:DeqSet.∀i:pitem S. ¬ (\sem{i} ϵ).
 276 - epsilon_pre : ∀S.∀e:pre S. (\sem{i} ϵ) ↔ (\snd e = true).
 277 - minus_eps_item: ∀S.∀i:pitem S. \sem{i} ≐ \sem{i}-{[ ]}.
 278 - minus_eps_pre: ∀S.∀e:pre S. \sem{\fst e} ≐ \sem{e}-{[ ]}.
 279 The first property is proved by a simple induction on $i$; the other
 280 results are easy corollaries. We need an auxiliary lemma first. *)
 281
 282 lemma append_eq_nil : ∀S.∀w1,w2:word S. w1 @ w2 = ϵ → w1 = ϵ.
 283 #S #w1 #w2 cases w1 // #a #tl normalize #H destruct qed.
 284
 285 lemma not_epsilon_lp : ∀S:DeqSet.∀e:pitem S. ¬ (ϵ ∈ e).
 286 #S #e elim e normalize /2/
 287   [#r1 #r2 * #n1 #n2 % * /2/ * #w1 * #w2 * * #H
 288    >(append_eq_nil …H…) /2/
 289   |#r1 #r2 #n1 #n2 % * /2/
 290   |#r #n % * #w1 * #w2 * * #H >(append_eq_nil …H…) /2/
 291   ]
 292 qed.
 293
 294 lemma epsilon_to_true : ∀S.∀e:pre S. ϵ ∈ e → \snd e = true.
 295 #S * #i #b cases b // normalize #H @False_ind /2/
 296 qed.
 297
 298 lemma true_to_epsilon : ∀S.∀e:pre S. \snd e = true → ϵ ∈ e.
 299 #S * #i #b #btrue normalize in btrue; >btrue %2 //
 300 qed.
 301
 302 lemma minus_eps_item: ∀S.∀i:pitem S. \sem{i} ≐ \sem{i}-{[ ]}.
 303 #S #i #w %
 304   [#H whd % // normalize @(not_to_not … (not_epsilon_lp …i)) //
 305   |* //
 306   ]
 307 qed.
 308
 309 lemma minus_eps_pre: ∀S.∀e:pre S. \sem{\fst e} ≐ \sem{e}-{[ ]}.
 310 #S * #i *
 311   [>sem_pre_true normalize in ⊢ (??%?); #w %
 312     [/3/ | * * // #H1 #H2 @False_ind @(absurd …H1 H2)]
 313   |>sem_pre_false normalize in ⊢ (??%?); #w % [ /3/ | * // ]
 314   ]
 315 qed.
 316
 317 (*
 318 Broadcasting points
 319
 320 Intuitively, a regular expression e must be understood as a pointed expression with a single
 321 point in front of it. Since however we only allow points before symbols, we must broadcast
 322 this initial point inside e traversing all nullable subexpressions, that essentially corresponds
 323 to the ϵ-closure operation on automata. We use the notation •(_) to denote such an operation;
 324 its definition is the expected one: let us start discussing an example.
 325
 326 Example
 327 Let us broadcast a point inside (a + ϵ)(b*a + b)b. We start working in parallel on the
 328 first occurrence of a (where the point stops), and on ϵ that gets traversed. We have hence
 329 reached the end of a + ϵ and we must pursue broadcasting inside (b*a + b)b. Again, we work in
 330 parallel on the two additive subterms b^*a and b; the first point is allowed to both enter the
 331 star, and to traverse it, stopping in front of a; the second point just stops in front of b.
 332 No point reached that end of b^*a + b hence no further propagation is possible. In conclusion:
 333                •((a + ϵ)(b^*a + b)b) = 〈(•a + ϵ)((•b)^*•a + •b)b, false〉
 334 *)
 335
 336 (* Broadcasting a point inside an item generates a pre, since the point could possibly reach
 337 the end of the expression.
 338 Broadcasting inside a i1+i2 amounts to broadcast in parallel inside i1 and i2.
 339 If we define
 340                  〈i1,b1〉 ⊕ 〈i2,b2〉 = 〈i1 + i2, b1 ∨ b2〉
 341 then, we just have •(i1+i2) = •(i1)⊕ •(i2).
 342 *)
 343
 344 definition lo ≝ λS:DeqSet.λa,b:pre S.〈\fst a + \fst b,\snd a ∨ \snd b〉.
 345 notation "a ⊕ b" left associative with precedence 65 for @{'oplus $a $b}.
 346 interpretation "oplus" 'oplus a b = (lo ? a b).
 347
 348 lemma lo_def: ∀S.∀i1,i2:pitem S.∀b1,b2. 〈i1,b1〉⊕〈i2,b2〉=〈i1+i2,b1∨b2〉.
 349 // qed.
 350
 351 (*
 352 Concatenation is a bit more complex. In order to broadcast a point inside i1 · i2
 353 we should start broadcasting it inside i1 and then proceed into i2 if and only if a
 354 point reached the end of i1. This suggests to define •(i1 · i2) as •(i1) ▹ i2, where
 355 e ▹ i is a general operation of concatenation between a pre and an item, defined by
 356 cases on the boolean in e:
 357
 358        〈i1,true〉 ▹ i2  = i1 ◃ •(i_2)
 359        〈i1,false〉 ▹ i2 = i1 · i2
 360
 361 In turn, ◃ says how to concatenate an item with a pre, that is however extremely simple:
 362
 363         i1 ◃ 〈i1,b〉  = 〈i_1 · i2, b〉
 364
 365 Let us come to the formalized definitions:
 366 *)
 367
 368 definition pre_concat_r ≝ λS:DeqSet.λi:pitem S.λe:pre S.
 369   match e with [ mk_Prod i1 b ⇒ 〈i · i1, b〉].
 370
 371 notation "i ◃ e" left associative with precedence 65 for @{'lhd $i $e}.
 372 interpretation "pre_concat_r" 'lhd i e = (pre_concat_r ? i e).
 373
 374 (* The behaviour of ◃ is summarized by the following, easy lemma: *)
 375
 376 lemma eq_to_ex_eq: ∀S.∀A,B:word S → Prop.
 377   A = B → A ≐ B.
 378 #S #A #B #H >H /2/ qed.
 379
 380 lemma sem_pre_concat_r : ∀S,i.∀e:pre S.
 381   \sem{i ◃ e} ≐ \sem{i} · \sem{|\fst e|} ∪ \sem{e}.
 382 #S #i * #i1 #b1 cases b1 [2: @eq_to_ex_eq //]
 383 >sem_pre_true >sem_cat >sem_pre_true /2/
 384 qed.
 385
 386 (* The definition of $•(-)$ (eclose) and ▹ (pre_concat_l) are mutually recursive.
 387 In this situation, a viable alternative that is usually simpler to reason about,
 388 is to abstract one of the two functions with respect to the other. In particular
 389 we abstract pre_concat_l with respect to an input bcast function from items to
 390 pres. *)
 391
 392 definition pre_concat_l ≝ λS:DeqSet.λbcast:∀S:DeqSet.pitem S → pre S.λe1:pre S.λi2:pitem S.
 393   match e1 with
 394   [ mk_Prod i1 b1 ⇒ match b1 with
 395     [ true ⇒ (i1 ◃ (bcast ? i2))
 396     | false ⇒ 〈i1 · i2,false〉
 397     ]
 398   ].
 399
 400 notation "a ▹ b" left associative with precedence 65 for @{'tril eclose $a $b}.
 401 interpretation "item-pre concat" 'tril op a b = (pre_concat_l ? op a b).
 402
 403 notation "•" non associative with precedence 65 for @{eclose ?}.
 404
 405 (* We are ready to give the formal definition of the broadcasting operation. *)
 406
 407 let rec eclose (S: DeqSet) (i: pitem S) on i : pre S ≝
 408  match i with
 409   [ pz ⇒ 〈 `∅, false 〉
 410   | pe ⇒ 〈 ϵ,  true 〉
 411   | ps x ⇒ 〈 `.x, false〉
 412   | pp x ⇒ 〈 `.x, false 〉
 413   | po i1 i2 ⇒ •i1 ⊕ •i2
 414   | pc i1 i2 ⇒ •i1 ▹ i2
 415   | pk i ⇒ 〈(\fst (•i))^*,true〉].
 416
 417 notation "• x" non associative with precedence 65 for @{'eclose $x}.
 418 interpretation "eclose" 'eclose x = (eclose ? x).
 419
 420 (* Here are a few simple properties of ▹ and •(-) *)
 421
 422 lemma eclose_plus: ∀S:DeqSet.∀i1,i2:pitem S.
 423   •(i1 + i2) = •i1 ⊕ •i2.
 424 // qed.
 425
 426 lemma eclose_dot: ∀S:DeqSet.∀i1,i2:pitem S.
 427   •(i1 · i2) = •i1 ▹ i2.
 428 // qed.
 429
 430 lemma eclose_star: ∀S:DeqSet.∀i:pitem S.
 431   •i^* = 〈(\fst(•i))^*,true〉.
 432 // qed.
 433
 434 lemma odot_true :
 435   ∀S.∀i1,i2:pitem S.
 436   〈i1,true〉 ▹ i2 = i1 ◃ (•i2).
 437 // qed.
 438
 439 lemma odot_true_bis :
 440   ∀S.∀i1,i2:pitem S.
 441   〈i1,true〉 ▹ i2 = 〈i1 · \fst (•i2), \snd (•i2)〉.
 442 #S #i1 #i2 normalize cases (•i2) // qed.
 443
 444 lemma odot_false:
 445   ∀S.∀i1,i2:pitem S.
 446   〈i1,false〉 ▹ i2 = 〈i1 · i2, false〉.
 447 // qed.
 448
 449 (* The definition of •(-) (eclose) can then be lifted from items to pres
 450 in the obvious way. *)
 451
 452 definition lift ≝ λS.λf:pitem S →pre S.λe:pre S.
 453   match e with
 454   [ mk_Prod i b ⇒ 〈\fst (f i), \snd (f i) ∨ b〉].
 455
 456 definition preclose ≝ λS. lift S (eclose S).
 457 interpretation "preclose" 'eclose x = (preclose ? x).
 458
 459 (* Obviously, broadcasting does not change the carrier of the item,
 460 as it is easily proved by structural induction. *)
 461
 462 lemma erase_bull : ∀S.∀i:pitem S. |\fst (•i)| = |i|.
 463 #S #i elim i //
 464   [ #i1 #i2 #IH1 #IH2 >erase_dot <IH1 >eclose_dot
 465     cases (•i1) #i11 #b1 cases b1 // <IH2 >odot_true_bis //
 466   | #i1 #i2 #IH1 #IH2 >eclose_plus >(erase_plus … i1) <IH1 <IH2
 467     cases (•i1) #i11 #b1 cases (•i2) #i21 #b2 //
 468   | #i #IH >eclose_star >(erase_star … i) <IH cases (•i) //
 469   ]
 470 qed.
 471
 472 (* We are now ready to state the main semantic properties of ⊕, ◃ and •(-):
 473
 474 sem_oplus:     \sem{e1 ⊕ e2} ≐ \sem{e1} ∪ \sem{e2}
 475 sem_pcl:       \sem{e1 ▹ i2} ≐  \sem{e1} · \sem{|i2|} ∪ \sem{i2}
 476 sem_bullet     \sem{•i} ≐ \sem{i} ∪ \sem{|i|}
 477
 478 The proof of sem_oplus is straightforward. *)
 479
 480 lemma sem_oplus: ∀S:DeqSet.∀e1,e2:pre S.
 481   \sem{e1 ⊕ e2} ≐ \sem{e1} ∪ \sem{e2}.
 482 #S * #i1 #b1 * #i2 #b2 #w %
 483   [cases b1 cases b2 normalize /2/ * /3/ * /3/
 484   |cases b1 cases b2 normalize /2/ * /3/ * /3/
 485   ]
 486 qed.
 487
 488 (* For the others, we proceed as follow: we first prove the following
 489 auxiliary lemma, that assumes sem_bullet:
 490
 491 sem_pcl_aux:
 492    \sem{•i2} ≐  \sem{i2} ∪ \sem{|i2|} →
 493    \sem{e1 ▹ i2} ≐  \sem{e1} · \sem{|i2|} ∪ \sem{i2}.
 494
 495 Then, using the previous result, we prove sem_bullet by induction
 496 on i. Finally, sem_pcl_aux and sem_bullet give sem_pcl. *)
 497
 498 lemma LcatE : ∀S.∀e1,e2:pitem S.
 499   \sem{e1 · e2} = \sem{e1} · \sem{|e2|} ∪ \sem{e2}.
 500 // qed.
 501
 502 lemma odot_dot_aux : ∀S.∀e1:pre S.∀i2:pitem S.
 503    \sem{•i2} ≐  \sem{i2} ∪ \sem{|i2|} →
 504    \sem{e1 ▹ i2} ≐  \sem{e1} · \sem{|i2|} ∪ \sem{i2}.
 505 #S * #i1 #b1 #i2 cases b1
 506   [2:#th >odot_false >sem_pre_false >sem_pre_false >sem_cat /2/
 507   |#H >odot_true >sem_pre_true @(eqP_trans … (sem_pre_concat_r …))
 508    >erase_bull @eqP_trans [|@(eqP_union_l … H)]
 509     @eqP_trans [|@eqP_union_l[|@union_comm ]]
 510     @eqP_trans [|@eqP_sym @union_assoc ] /3/
 511   ]
 512 qed.
 513
 514 lemma minus_eps_pre_aux: ∀S.∀e:pre S.∀i:pitem S.∀A.
 515  \sem{e} ≐ \sem{i} ∪ A → \sem{\fst e} ≐ \sem{i} ∪ (A - {[ ]}).
 516 #S #e #i #A #seme
 517 @eqP_trans [|@minus_eps_pre]
 518 @eqP_trans [||@eqP_union_r [|@eqP_sym @minus_eps_item]]
 519 @eqP_trans [||@distribute_substract]
 520 @eqP_substract_r //
 521 qed.
 522
 523 theorem sem_bull: ∀S:DeqSet. ∀i:pitem S.  \sem{•i} ≐ \sem{i} ∪ \sem{|i|}.
 524 #S #e elim e
 525   [#w normalize % [/2/ | * //]
 526   |/2/
 527   |#x normalize #w % [ /2/ | * [@False_ind | //]]
 528   |#x normalize #w % [ /2/ | * // ]
 529   |#i1 #i2 #IH1 #IH2
 530    (* lhs = \sem{•(i1 ·i2)} *)
 531    >eclose_dot
 532    (* lhs =\sem{•(i1) ▹ i2)} *)
 533    @eqP_trans [|@odot_dot_aux //]
 534    (* lhs = \sem{•(i1)·\sem{|i2|}∪\sem{i2} *)
 535    @eqP_trans
 536      [|@eqP_union_r
 537        [|@eqP_trans [|@(cat_ext_l … IH1)] @distr_cat_r]]
 538    (* lhs = \sem{i1}·\sem{|i2|}∪\sem{|i1|}·\sem{|i2|}∪\sem{i2} *)
 539    @eqP_trans [|@union_assoc]
 540    (* lhs = \sem{i1}·\sem{|i2|}∪(\sem{|i1|}·\sem{|i2|}∪\sem{i2}) *)
 541    (* Now we work on the rhs that is
 542       rhs = \sem{i1·i2} ∪ \sem{|i1·i2|} *)
 543    >sem_cat
 544    (* rhs = \sem{i1}·\sem{|i2|} ∪ \sem{i2} ∪ \sem{|i1·i2|} *)
 545    @eqP_trans [||@eqP_sym @union_assoc]
 546    (* rhs = \sem{i1}·\sem{|i2|}∪ (\sem{i2} ∪ \sem{|i1·i2|}) *)
 547    @eqP_union_l @union_comm
 548   |#i1 #i2 #IH1 #IH2 >eclose_plus
 549    @eqP_trans [|@sem_oplus] >sem_plus >erase_plus
 550    @eqP_trans [|@(eqP_union_l … IH2)]
 551    @eqP_trans [|@eqP_sym @union_assoc]
 552    @eqP_trans [||@union_assoc] @eqP_union_r
 553    @eqP_trans [||@eqP_sym @union_assoc]
 554    @eqP_trans [||@eqP_union_l [|@union_comm]]
 555    @eqP_trans [||@union_assoc] /2/
 556   |#i #H >sem_pre_true >sem_star >erase_bull >sem_star
 557    @eqP_trans [|@eqP_union_r [|@cat_ext_l [|@minus_eps_pre_aux //]]]
 558    @eqP_trans [|@eqP_union_r [|@distr_cat_r]]
 559    @eqP_trans [|@union_assoc] @eqP_union_l >erase_star
 560    @eqP_sym @star_fix_eps
 561   ]
 562 qed.
 563
 564 (*
 565 Blank item
 566
 567 As a corollary of theorem sem_bullet, given a regular expression e, we can easily
 568 find an item with the same semantics of $e$: it is enough to get an item (blank e)
 569 having e as carrier and no point, and then broadcast a point in it. The semantics of
 570 (blank e) is obviously the empty language: from the point of view of the automaton,
 571 it corresponds with the pit state. *)
 572
 573 let rec blank (S: DeqSet) (i: re S) on i :pitem S ≝
 574  match i with
 575   [ z ⇒ `∅
 576   | e ⇒ ϵ
 577   | s y ⇒ `y
 578   | o e1 e2 ⇒ (blank S e1) + (blank S e2)
 579   | c e1 e2 ⇒ (blank S e1) · (blank S e2)
 580   | k e ⇒ (blank S e)^* ].
 581
 582 lemma forget_blank: ∀S.∀e:re S.|blank S e| = e.
 583 #S #e elim e normalize //
 584 qed.
 585
 586 lemma sem_blank: ∀S.∀e:re S.\sem{blank S e} ≐ ∅.
 587 #S #e elim e
 588   [1,2:@eq_to_ex_eq //
 589   |#s @eq_to_ex_eq //
 590   |#e1 #e2 #Hind1 #Hind2 >sem_cat
 591    @eqP_trans [||@(union_empty_r … ∅)]
 592    @eqP_trans [|@eqP_union_l[|@Hind2]] @eqP_union_r
 593    @eqP_trans [||@(cat_empty_l … ?)] @cat_ext_l @Hind1
 594   |#e1 #e2 #Hind1 #Hind2 >sem_plus
 595    @eqP_trans [||@(union_empty_r … ∅)]
 596    @eqP_trans [|@eqP_union_l[|@Hind2]] @eqP_union_r @Hind1
 597   |#e #Hind >sem_star
 598    @eqP_trans [||@(cat_empty_l … ?)] @cat_ext_l @Hind
 599   ]
 600 qed.
 601
 602 theorem re_embedding: ∀S.∀e:re S.
 603   \sem{•(blank S e)} ≐ \sem{e}.
 604 #S #e @eqP_trans [|@sem_bull] >forget_blank
 605 @eqP_trans [|@eqP_union_r [|@sem_blank]]
 606 @eqP_trans [|@union_comm] @union_empty_r.
 607 qed.
 608
 609 (*
 610 Lifted Operators
 611
 612 Plus and bullet have been already lifted from items to pres. We can now
 613 do a similar job for concatenation ⊙ and Kleene's star ⊛. *)
 614
 615 definition lifted_cat ≝ λS:DeqSet.λe:pre S.
 616   lift S (pre_concat_l S eclose e).
 617
 618 notation "e1 ⊙ e2" left associative with precedence 70 for @{'odot $e1 $e2}.
 619
 620 interpretation "lifted cat" 'odot e1 e2 = (lifted_cat ? e1 e2).
 621
 622 lemma odot_true_b : ∀S.∀i1,i2:pitem S.∀b.
 623   〈i1,true〉 ⊙ 〈i2,b〉 = 〈i1 · (\fst (•i2)),\snd (•i2) ∨ b〉.
 624 #S #i1 #i2 #b normalize in ⊢ (??%?); cases (•i2) //
 625 qed.
 626
 627 lemma odot_false_b : ∀S.∀i1,i2:pitem S.∀b.
 628   〈i1,false〉 ⊙ 〈i2,b〉 = 〈i1 · i2 ,b〉.
 629 //
 630 qed.
 631
 632 lemma erase_odot:∀S.∀e1,e2:pre S.
 633   |\fst (e1 ⊙ e2)| = |\fst e1| · (|\fst e2|).
 634 #S * #i1 * * #i2 #b2 // >odot_true_b //
 635 qed.
 636
 637 (* Let us come to the star operation: *)
 638
 639 definition lk ≝ λS:DeqSet.λe:pre S.
 640   match e with
 641   [ mk_Prod i1 b1 ⇒
 642     match b1 with
 643     [true ⇒ 〈(\fst (eclose ? i1))^*, true〉
 644     |false ⇒ 〈i1^*,false〉
 645     ]
 646   ].
 647
 648 (* notation < "a \sup ⊛" non associative with precedence 90 for @{'lk $a}.*)
 649 interpretation "lk" 'lk a = (lk ? a).
 650 notation "a^⊛" non associative with precedence 90 for @{'lk $a}.
 651
 652
 653 lemma ostar_true: ∀S.∀i:pitem S.
 654   〈i,true〉^⊛ = 〈(\fst (•i))^*, true〉.
 655 // qed.
 656
 657 lemma ostar_false: ∀S.∀i:pitem S.
 658   〈i,false〉^⊛ = 〈i^*, false〉.
 659 // qed.
 660
 661 lemma erase_ostar: ∀S.∀e:pre S.
 662   |\fst (e^⊛)| = |\fst e|^*.
 663 #S * #i * // qed.
 664
 665 lemma sem_odot_true: ∀S:DeqSet.∀e1:pre S.∀i.
 666   \sem{e1 ⊙ 〈i,true〉} ≐ \sem{e1 ▹ i} ∪ { [ ] }.
 667 #S #e1 #i
 668 cut (e1 ⊙ 〈i,true〉 = 〈\fst (e1 ▹ i), \snd(e1 ▹ i) ∨ true〉) [//]
 669 #H >H cases (e1 ▹ i) #i1 #b1 cases b1
 670   [>sem_pre_true @eqP_trans [||@eqP_sym @union_assoc]
 671    @eqP_union_l /2/
 672   |/2/
 673   ]
 674 qed.
 675
 676 lemma eq_odot_false: ∀S:DeqSet.∀e1:pre S.∀i.
 677   e1 ⊙ 〈i,false〉 = e1 ▹ i.
 678 #S #e1 #i
 679 cut (e1 ⊙ 〈i,false〉 = 〈\fst (e1 ▹ i), \snd(e1 ▹ i) ∨ false〉) [//]
 680 cases (e1 ▹ i) #i1 #b1 cases b1 #H @H
 681 qed.
 682
 683 (* We conclude this section with the proof of the main semantic properties
 684 of ⊙ and ⊛. *)
 685
 686 lemma sem_odot:
 687   ∀S.∀e1,e2: pre S. \sem{e1 ⊙ e2} ≐ \sem{e1}· \sem{|\fst e2|} ∪ \sem{e2}.
 688 #S #e1 * #i2 *
 689   [>sem_pre_true
 690    @eqP_trans [|@sem_odot_true]
 691    @eqP_trans [||@union_assoc] @eqP_union_r @odot_dot_aux //
 692   |>sem_pre_false >eq_odot_false @odot_dot_aux //
 693   ]
 694 qed.
 695
 696 theorem sem_ostar: ∀S.∀e:pre S.
 697   \sem{e^⊛} ≐  \sem{e} · \sem{|\fst e|}^*.
 698 #S * #i #b cases b
 699   [(* lhs = \sem{〈i,true〉^⊛} *)
 700    >sem_pre_true (* >sem_pre_true *)
 701    (* lhs = \sem{(\fst (•i))^*}∪{ϵ} *)
 702    >sem_star >erase_bull
 703    (* lhs = \sem{\fst (•i)}·(\sem{|i|)^*∪{ϵ} *)
 704    @eqP_trans [|@eqP_union_r[|@cat_ext_l [|@minus_eps_pre_aux //]]]
 705    (* lhs = (\sem{i}∪(\sem{|i|}-{ϵ})·(\sem{|i|)^*∪{ϵ} *)
 706    @eqP_trans [|@eqP_union_r [|@distr_cat_r]]
 707    (* lhs = (\sem{i}·(\sem{|i|)^*∪(\sem{|i|}-{ϵ})·(\sem{|i|)^*∪{ϵ} *)
 708    @eqP_trans [|@union_assoc]
 709    (* lhs = (\sem{i}·(\sem{|i|)^*∪((\sem{|i|}-{ϵ})·(\sem{|i|)^*∪{ϵ}) *)
 710    @eqP_trans [|@eqP_union_l[|@eqP_sym @star_fix_eps]]
 711    (* lhs = (\sem{i}·(\sem{|i|)^*∪(\sem{|i|)^* *)
 712    (* now we work on the right hand side, that is
 713       rhs = \sem{〈i,true〉}·(\sem{|i|}^* *)
 714    @eqP_trans [||@eqP_sym @distr_cat_r]
 715    (* rhs = (\sem{i}·(\sem{|i|)^*∪{ϵ}·(\sem{|i|)^* *)
 716    @eqP_union_l @eqP_sym @epsilon_cat_l
 717   |>sem_pre_false >sem_pre_false >sem_star /2/
 718   ]
 719 qed.
 720