-(*
-Pointed Regular expressions
-
-We now introduce pointed regular expressions, that are the main tool we shall
-use for the construction of the automaton.
-A pointed regular expression is just a regular expression internally labelled
-with some additional points. Intuitively, points mark the positions inside the
-regular expression which have been reached after reading some prefix of
-the input string, or better the positions where the processing of the remaining
-string has to be started. Each pointed expression for $e$ represents a state of
-the {\em deterministic} automaton associated with $e$; since we obviously have
-only a finite number of possible labellings, the number of states of the automaton
-is finite.
-
-Pointed regular expressions provide the tool for an algebraic revisitation of
-McNaughton and Yamada's algorithm for position automata, making the proof of its
-correctness, that is far from trivial, particularly clear and simple. In particular,
-pointed expressions offer an appealing alternative to Brzozowski's derivatives,
-avoiding their weakest point, namely the fact of being forced to quotient derivatives
-w.r.t. a suitable notion of equivalence in order to get a finite number of states
-(that is not essential for recognizing strings, but is crucial for comparing regular
-expressions).
-
-Our main data structure is the notion of pointed item, that is meant whose purpose
-is to encode a set of positions inside a regular expression.
-The idea of formalizing pointers inside a data type by means of a labelled version
-of the data type itself is probably one of the first, major lessons learned in the
-formalization of the metatheory of programming languages. For our purposes, it is
-enough to mark positions preceding individual characters, so we shall have two kinds
-of characters •a (pp a) and a (ps a) according to the case a is pointed or not. *)
-
-inductive pitem (S: DeqSet) : Type[0] ≝
- pz: pitem S (* empty *)
- | pe: pitem S (* epsilon *)
- | ps: S → pitem S (* symbol *)
- | pp: S → pitem S (* pointed sysmbol *)
- | pc: pitem S → pitem S → pitem S (* concatenation *)
- | po: pitem S → pitem S → pitem S (* plus *)
- | pk: pitem S → pitem S. (* kleene's star *)
-
-(* A pointed regular expression (pre) is just a pointed item with an additional
-boolean, that must be understood as the possibility to have a trailing point at
-the end of the expression. As we shall see, pointed regular expressions can be
-understood as states of a DFA, and the boolean indicates if
-the state is final or not. *)
-
-definition pre ≝ λS.pitem S × bool.
-
-interpretation "pitem star" 'star a = (pk ? a).
-interpretation "pitem or" 'plus a b = (po ? a b).
-interpretation "pitem cat" 'middot a b = (pc ? a b).
-notation < ".a" non associative with precedence 90 for @{ 'pp $a}.
-notation > "`. term 90 a" non associative with precedence 90 for @{ 'pp $a}.
-interpretation "pitem pp" 'pp a = (pp ? a).
-interpretation "pitem ps" 'ps a = (ps ? a).
-interpretation "pitem epsilon" 'epsilon = (pe ?).
-interpretation "pitem empty" 'empty = (pz ?).
-
-(* The carrier $|i|$ of an item i is the regular expression obtained from i by
-removing all the points. Similarly, the carrier of a pointed regular expression
-is the carrier of its item. *)
-
-let rec forget (S: DeqSet) (l : pitem S) on l: re S ≝
- match l with
- [ pz ⇒ z ? (* `∅ *)
- | pe ⇒ ϵ
- | ps x ⇒ `x
- | pp x ⇒ `x
- | pc E1 E2 ⇒ (forget ? E1) · (forget ? E2)
- | po E1 E2 ⇒ (forget ? E1) + (forget ? E2)
- | pk E ⇒ (forget ? E)^* ].
-
-interpretation "forget" 'card a = (forget ? a).
-
-lemma erase_dot : ∀S.∀e1,e2:pitem S. |e1 · e2| = c ? (|e1|) (|e2|).
-// qed.
-
-lemma erase_plus : ∀S.∀i1,i2:pitem S.
- |i1 + i2| = |i1| + |i2|.
-// qed.
-
-lemma erase_star : ∀S.∀i:pitem S.|i^*| = |i|^*.
-// qed.