+(*
+Pointed Regular expressions
+
+We now introduce pointed regular expressions, that are the main tool we shall
+use for the construction of the automaton.
+A pointed regular expression is just a regular expression internally labelled
+with some additional points. Intuitively, points mark the positions inside the
+regular expression which have been reached after reading some prefix of
+the input string, or better the positions where the processing of the remaining
+string has to be started. Each pointed expression for $e$ represents a state of
+the {\em deterministic} automaton associated with $e$; since we obviously have
+only a finite number of possible labellings, the number of states of the automaton
+is finite.
+
+Pointed regular expressions provide the tool for an algebraic revisitation of
+McNaughton and Yamada's algorithm for position automata, making the proof of its
+correctness, that is far from trivial, particularly clear and simple. In particular,
+pointed expressions offer an appealing alternative to Brzozowski's derivatives,
+avoiding their weakest point, namely the fact of being forced to quotient derivatives
+w.r.t. a suitable notion of equivalence in order to get a finite number of states
+(that is not essential for recognizing strings, but is crucial for comparing regular
+expressions).
+
+Our main data structure is the notion of pointed item, that is meant whose purpose
+is to encode a set of positions inside a regular expression.
+The idea of formalizing pointers inside a data type by means of a labelled version
+of the data type itself is probably one of the first, major lessons learned in the
+formalization of the metatheory of programming languages. For our purposes, it is
+enough to mark positions preceding individual characters, so we shall have two kinds
+of characters •a (pp a) and a (ps a) according to the case a is pointed or not. *)
+
+inductive pitem (S: DeqSet) : Type[0] ≝
+ pz: pitem S (* empty *)
+ | pe: pitem S (* epsilon *)
+ | ps: S → pitem S (* symbol *)
+ | pp: S → pitem S (* pointed sysmbol *)
+ | pc: pitem S → pitem S → pitem S (* concatenation *)
+ | po: pitem S → pitem S → pitem S (* plus *)
+ | pk: pitem S → pitem S. (* kleene's star *)
+
+(* A pointed regular expression (pre) is just a pointed item with an additional
+boolean, that must be understood as the possibility to have a trailing point at
+the end of the expression. As we shall see, pointed regular expressions can be
+understood as states of a DFA, and the boolean indicates if
+the state is final or not. *)
+
+definition pre ≝ λS.pitem S × bool.
+
+interpretation "pitem star" 'star a = (pk ? a).
+interpretation "pitem or" 'plus a b = (po ? a b).
+interpretation "pitem cat" 'middot a b = (pc ? a b).
+notation < ".a" non associative with precedence 90 for @{ 'pp $a}.
+notation > "`. term 90 a" non associative with precedence 90 for @{ 'pp $a}.
+interpretation "pitem pp" 'pp a = (pp ? a).
+interpretation "pitem ps" 'ps a = (ps ? a).
+interpretation "pitem epsilon" 'epsilon = (pe ?).
+interpretation "pitem empty" 'empty = (pz ?).
+
+(* The carrier $|i|$ of an item i is the regular expression obtained from i by
+removing all the points. Similarly, the carrier of a pointed regular expression
+is the carrier of its item. *)
+
+let rec forget (S: DeqSet) (l : pitem S) on l: re S ≝
+ match l with
+ [ pz ⇒ z ? (* `∅ *)
+ | pe ⇒ ϵ
+ | ps x ⇒ `x
+ | pp x ⇒ `x
+ | pc E1 E2 ⇒ (forget ? E1) · (forget ? E2)
+ | po E1 E2 ⇒ (forget ? E1) + (forget ? E2)
+ | pk E ⇒ (forget ? E)^* ].
+
+(* Already in the library
+notation "| term 19 C |" with precedence 70 for @{ 'card $C }.
+*)
+interpretation "forget" 'card a = (forget ? a).