The operational semantics of the core language is ready

[helm.git] / helm / mathql / doc / mathql_operational.tex
diff --git a/helm/mathql/doc/mathql_operational.tex b/helm/mathql/doc/mathql_operational.tex

new file mode 100644 (file)

index 0000000..a1cbe95
--- /dev/null
+++ b/helm/mathql/doc/mathql_operational.tex
@@ -0,0 +1,713 @@
+\section {Operational semantics}
+
+This section describes {\MathQL} semantics, that we present in a natural
+operational style \cite{Lan98,Win93}. 
+Here we use a simple type system that includes basic types such as strings and
+Booleans, and some type constructors such as product and exponentiation.
+$ y \oft Y $ will denote a typing judgement.
+Note that this semantics is not meant as a formal system \emph{per se}, but
+should serve as a reference for implementors.
+
+\subsection {Mathematical background}
+
+As a mathematical background for the semantics, we take the one presented in
+\cite{Gui03}.
+
+{\Str} denotes the type of strings and its elements are the finite sequences
+of Unicode \cite{Unicode} characters.
+Grammatical productions, represented as strings in angle brackets, denote the
+subtype of {\Str} containing the produced sequences of characters.
+
+{\Num} denotes the type of numbers and is the subtype of {\Str} defined by the
+regular expression: \TT{'0 - 9' [ '0 - 9' ]*}.
+In this type, numbers are represented by their decimal expansion.
+
+$ \Setof\ Y $ denotes the type of finite sets ({\ie} unordered finite
+sequences without repetitions) over $ Y $.
+$ \Listof\ Y $ denotes the type of lists ({\ie} ordered finite sequences)
+over $ Y $.
+We will use the notation $ [y_1, \cdots, y_m] $ for the list whose elements 
+are $ y_1, \cdots, y_m $.
+
+{\Boole}, the type of Boolean values, is defined as
+$ \{\ES, \{("", \ES)\}\} \oft \Setof\ \Setof\ (\Str \times \Setof\ Y) $ 
+where the first element stands for \emph{false} (denoted by {\F}) and the
+second element stands for \emph{true} (denoted by {\T}).
+
+$ Y \times Z $ denotes the product of the types $ Y $ and $ Z $ whose elements
+are the ordered pairs $ (y, z) $ such that $ y \oft Y $ and $ z \oft Z $.
+The notation is also extended to a ternary product.
+
+$ Y \to Z $ denotes the type of functions from $ Y $ to $ Z $ and $ f\ y $
+denotes the application of $ f \oft Y \to Z $ to $ y \oft Y $.
+Relations over types, such as equality, are seen as functions to {\Boole}.
+
+With the above constructors we can give a formal meaning to most of the
+standard notation. For instance we will use the following:  
+
+\begin{itemize}
+
+\item
+$ {\ES} \oft (\Setof\ Y) $ 
+
+\item
+$ {\lex} \oft ((\Setof\ Y) \to \Boole) \to \Boole $
+
+\item
+$ {\lall} \oft ((\Setof\ Y) \to \Boole) \to \Boole $
+
+\item
+$ {\in} \oft Y \to (\Setof\ Y) \to \Boole $ (infix) 
+
+\item
+$ {\sub} \oft (\Setof\ Y) \to (\Setof\ Y) \to \Boole $ (infix) 
+
+\item
+$ {\meet} \oft (\Setof\ Y) \to (\Setof\ Y) \to \Boole $ (infix)
+
+%\item
+$ {\sand} \oft (\Setof\ Y) \to (\Setof\ Y) \to (\Setof\ Y) $ (infix)
+
+\item
+$ {\sor} \oft (\Setof\ Y) \to (\Setof\ Y) \to (\Setof\ Y) $ (infix)
+
+\item
+$ {\sdor} \oft (\Setof\ Y) \to (\Setof\ Y) \to (\Setof\ Y) $
+(the disjoint union, infix)
+
+\item
+$ \le \oft \Num \to \Num \to \Boole $ (infix)
+
+\item
+$ < \oft \Num \to \Num \to \Boole $ (infix)
+
+\item
+$ \# \oft (\Setof\ Y) \to \Num $ (the size operator)
+
+\item
+$ \app \oft (\Listof\ Y) \to (\Listof\ Y) \to (\Listof\ Y) $
+(the concatenation, infix)
+
+\item
+$ \lnot \oft \Boole \to \Boole $
+
+\end{itemize}
+
+\noindent
+Note that $ \lall $ and $ \lex $ are always decidable because the sets are
+finite by definition.
+
+\noindent
+$ U \meet W $ means $ (\lex u \in U)\ u \in W $ and expresses the fact that
+$ U \sand W $ is inhabited as a primitive notion, {\ie} without mentioning
+intersection and equality as for $ U \sand W \neq \ES $, which is equivalent
+but may be implemented less efficiently in real cases%
+\footnote{As for the Boolean condition $ \a \lor \b $ which may have a more
+efficient implementation than $ \lnot(\lnot \a \land \lnot \b) $.}.
+
+$ U \meet W $ is a natural companion of $ U \sub W $ being its logical dual
+(recall that $ U \sub W $ means $ (\lall u \in U)\ u \in W $)
+and is already being used successfully in the context of a constructive
+({\ie} intuitionistic and predicative) approach to point-free topology
+\cite{Sam00}.
+
+Sets of couples play a central role in our formalization and in particular we
+will use:
+
+\begin{itemize}
+
+\item
+$ \Fst \oft (Y \times Z) \to Y $ such that $ \Fst\ (y, z) = y $.
+
+\item
+$ \Snd \oft (Y \times Z) \to Z $ such that $ \Snd\ (y, z) = z $.
+
+\item
+$ \Fsts \oft \Setof\ (Y \times Z) \to \Setof\ Z $ such that 
+$ \Fsts\ U = \{\Fst\ u \st u \in U\} $.
+
+\item 
+With the same notation, if $ W $ contains just one couple whose first
+component is $ y $, then $ \get{W}{y} $ is the second component of that couple.
+In the other cases $ \get{W}{y} $ is not defined.
+This operator has type $ (\Setof\ (Y \times Z)) \to Y \to Z $.  
+
+\item
+Moreover $ \set{W}{y}{z} $ is the set obtained from $ W $ removing every
+couple whose first component is $ y $ and adding the couple $ (y, z) $.
+The type of this operator is \\
+$ (\Setof\ (Y \times Z)) \to Y \to Z \to (\Setof\ (Y \times Z)) $.  
+
+\item 
+Also $ U + W $ is the union of two sets of couples in the following sense:
+
+\begin{footnotesize}
+\begin{center} \begin{tabular}{rll}
+%
+$ U + \ES $ & rewrites to & $ U $ \\
+$ U + (W \sdor \{(y, z)\}) $ & rewrites to & $ \set{U}{y}{z} + W $   
+%
+\end{tabular} \end{center}
+\end{footnotesize}
+
+\end{itemize}
+
+The last three operators are used to read, write and join association sets,
+which are sets of couples such that the first components of two different
+elements are always different.
+These sets will be exploited to formalize the memories appearing in evaluation
+contexts. 
+
+\subsection {The core language}
+
+\subsubsection {Preliminaries}
+
+Wih the above background we are able to type the main objects needed in the
+formalization:
+
+\begin{itemize}
+
+\item
+A path $ s $ is a list of strings therefore its type is
+$ T_{0a} = \Listof\ \Str $.
+
+\item
+A multiple string value $ V $ is an object of type $ T_{0b} = \Setof\ \Str $.
+
+\item
+A attribute group $ G $ is an association set connecting the attribute names
+to their values, therefore its type is
+$ T_1 = \Setof\ (T_{0a} \times T_{0b}) $. 
+
+\item
+A subject string $ r $ is an object of type $ \Str $.
+
+\item
+A set $ A $ of attribute groups is an object of type $ T_2 = \Setof\ T_1 $.  
+
+\item
+An {\av} is a subject string with its attribute groups, so its type is
+$ T_3 = \Str \times T_2 $. 
+
+\item
+A set $ S $ of {\av}'s is an association set of type $ T_4 = \Setof\ T_3 $.
+
+\item
+A triple of an attributed relation is of type
+$ T_5 = \Str \times \Str \times (T_{0a} \to \Str) $.
+
+\end{itemize}
+
+When a constant string appearing in a {\MathQL} expression is unquoted, the
+surrounding double quotes are deleted and each escaped sequence is translated
+according \figref{EscTS}.
+
+This operation is formally performed by the function
+$ \Unquote $ of type $ \Str \to \Str $.
+Moreover $ \Name \oft \GP{path} \to T_{0a} $ is a helper function that
+converts a linearized path in its structured representation.
+Formally $ \Name\ (\TT{/}q_1\TT{/} \cdots \TT{/}q_m) $ 
+rewrites to $ [\Unquote\ q_1, \cdots, \Unquote\ q_m] $.
+
+The semantics of {\MathQL} expressions denoting queries is given by the infix
+relation $ \daq $ that evaluates a query to an {\av} set.
+These expressions are evaluated in a context $ \G = \g $
+which is a triple of association sets that connect
+svar's to {\av} sets, avar's to {\av}'s and avar's to attribute groups.
+Therefore the type $ K $ of the context $ \G $ is:
+
+\begin{footnotesize} \begin{center}
+$ 
+\Setof\ (\GP{svar} \times T_4)  \times
+\Setof\ (\GP{avar} \times T_3)\ \times % $ \\ $ \times\
+\Setof\ (\GP{avar} \times T_1)
+$
+\end{center} \end{footnotesize}
+
+\noindent
+and the evaluating relation is of the following type:
+
+\begin{footnotesize}
+\begin{center} \begin{tabular}{l}
+$ \mathord{\daq} \oft (K \times \GP{query}) \times (K \times T_4) \to \Boole $.
+\end{tabular} \end{center}
+\end{footnotesize}
+
+The context components $ \G_s $ and $ \G_a $ are used to store the contents of
+variables, while $ \G_g $ is used by the \TT{ex} operator to be presented
+below.
+
+In the following we will write $ (\G, x) \daq S $ to abbreviate
+$ (\G, x) \daq (\G, S) $. 
+
+The semantics of {\MathQL} expressions denoting results is given by the infix
+relation $ \dar \oft \GP{avs} \times T_4 \to \Boole $ that evaluates a result
+to an {\av} set.
+
+\subsubsection{Queries}
+
+The first \GP{query} expressions include explicit {\av} sets and syntactic
+grouping:
+
+\begin{itemize}
+
+\item 
+The syntactic grouping is obtained enclosing a \GP{query} between \TT{(}
+and \TT{)}.    
+An explicit {\av} set can be represented by a single string, which is
+converted into a single {\av} with no attributes, or by a \GP{xavs}
+(extended {\av} set) expression enclosed between \TT{[} and \TT{]}.
+Such an expression describes all the components of an {\av} set and is
+evaluated using the rules given below.    
+
+\begin{footnotesize} 
+\begin{center} 
+%
+\irule{(\G, x) \daq S}{}{(\G, (x)) \daq S} \spc
+\irule{q \oft \GP{string}}{}{(\G, q) \daq \{(\Unquote\ q, \ES)\}}
+%
+\end{center} 
+\begin{center} 
+%
+\irule{x_1, \cdots, x_m \in \GP{xav} \spc 
+       (\G, TT{[} x_1 \TT{]}) \daq S_1 \spc \cdots \spc (\G, \TT{[} x_m \TT{]}) \daq S_m}{}
+      {(\G, \TT{[} x_1 \TT{;} \cdots \TT{;} x_m \TT{]}) \daq S_1 \sum \cdots \sum S_m}
+%
+\end{center} 
+\begin{center} 
+%
+\irule{q \in \GP{string} \spc g_1, \cdots, g_m \in \GP{xgroup} \spc
+       (\G, \TT{[} q\ \TT{attr}\ g_1 \TT{]}) \daq S_1 \spc \cdots \spc
+       (\G, \TT{[} q\ \TT{attr}\ g_m \TT{]}) \daq S_m}{}
+      {(\G, \TT{[} q\ \TT{attr}\ g_1 \TT{,} \cdots \TT{,} g_m \TT{]}) \daq S_1 \sum \cdots \sum S_m}
+%
+\end{center} 
+\begin{center} 
+%
+\irule{q \in \GP{string} \spc a_1, \cdots, a_m \in \GP{xatr} \spc
+       (\G, \TT{[} q\ \TT{attr}\ \{ a_1 \} \TT{]}) \daq S_1 \spc \cdots \spc
+       (\G, \TT{[} q\ \TT{attr}\ \{ a_m \} \TT{]}) \daq S_m}{}
+      {(\G, \TT{[} q\ \TT{attr}\ \{ a_1 \TT{;} \cdots \TT{;} a_m \} \TT{]}) \daq S_1 \dsum \cdots \dsum S_m}
+%
+\end{center} 
+\begin{center} 
+%
+\irule{q \in \GP{string} \spc p \in \GP{path} \spc x \daq S}{}
+      {(\G, \TT{[} q\ \TT{attr}\ \{ p = x \} \TT{]}) \daq 
+       \{(\Unquote\ q, \{ \{ (\Name\ p, \Fsts\ S) \} \})\}}
+%
+\end{center} 
+\end{footnotesize}
+
+$ \dsum $ and $ \sum $ are helper functions describing the two union operations
+on {\av} sets: with and without attribute distribution respectively.
+$ \dsum $ and $ \sum $ have two rewrite rules each.
+
+\begin{footnotesize}
+\begin{center} \begin{tabular}{lrll}
+%
+1a &
+$ (S_1 \sdor \{(r, A_1)\}) \sum (S_2 \sdor \{(r, A_2)\}) $ & rewrites to &
+$ (S_1 \sum S_2) \sor \{(r, A_1 \sor A_2)\} $ \\
+1b & $ S_1 \sum S_2 $ & rewrites to & $ S_1 \sor S_2 $ \\ 
+2a &
+$ (S_1 \sdor \{(r, A_1)\}) \dsum (S_2 \sdor \{(r, A_2)\}) $ & rewrites to &
+$ (S_1 \dsum S_2) \sor \{(r, A_1 \distr A_2)\} $ \\
+2b & $ S_1 \dsum S_2 $ & rewrites to & $ S_1 \sor S_2 $
+%
+\end{tabular} \end{center}
+\end{footnotesize}
+
+Rules 1a, 2a override 1b, 2b respectively and
+$ A_1 \distr A_2 = \{G_1 \sum G_2 \st G_1 \in A_1, G_2 \in A_2\} $.
+
+\item 
+The semantics of \TT{property} operator is described below.
+
+In the following rule,
+$s$ is ``$ \TT{property}\ h\ p_1\ \TT{main}\ p_2\ \RM{attr}\ e_1, \cdots,
+e_m\ \TT{in}\ k\ x $'', $P$ is $ \Property\ h $ and
+$A_2$ is $ \{ \Exp\ P\ p_1\ r_1\ \{e_1, \cdots, e_m\}\} $:
+
+\begin{footnotesize}
+\begin{center}
+%
+\irule
+{h \oft \GP{refine} \spc p_1, p_2 \oft \GP{path} \spc 
+ e_1, \cdots, e_m \oft \GP{exp} \spc k \in \TT{["pattern"]?} \spc 
+ (\G, x) \daq S
+}{A}
+{(\G, s) \daq \bigsum \{ \{(r_2, A_2)\} \st  
+(\lex r_1 \in \Src\ k\ P\ (\Fsts\ S))\ 
+(r_1, p_1 \app p_2, r_2) \in P
+\}}
+%
+\end{center}
+\end{footnotesize}
+
+When the \TT{main} clause is not present, we assume $ p_2 = \TT{/} $.
+
+Here $ \Property\ h $ gives the appropriate access relation according to
+the $ h $ flag (this is the primitive function that inspects the {\RDF} graph,
+see \subsecref{}). 
+
+$ \Src\ k\ P\ V $ is a helper function giving the source set
+according to the $ k $ flag. $ \Src $ is based on $ \Match $, the helper
+function handling POSIX regular expressions. Formally:
+
+\begin{footnotesize}
+\begin{center} \begin{tabular}{rll}
+%
+$ \Src\ \TT{""}\ P\ V $ & rewrites to & $ V $ \\
+$ \Src\ \TT{"pattern"}\ P\ V $ & rewrites to &
+$ \Match\ \{r_1 \st (\lex p, r2)\ (r_1, p, r_2) \in P\} $\ V \\
+$ \Match\ W\ V $ & rewrites to & $ \bigsor \{\Pattern\ W\ s \st s \in V\} $
+%
+\end{tabular} \end{center}
+\end{footnotesize}
+
+Here $ \Pattern\ W\ s $ is the primitive function returning the subset of
+$ W \oft \Setof\ \Str $ whose element match the POSIX 1003.2-1992%
+\footnote{Included in POSIX 1003.1-2001:
+\URI{http://www.unix-systems.org/version3/ieee\_\,std.html}.}
+regular expression $ \verb+"^"+ \app s \app \TT{"\$"} $.
+
+$ \Exp\ P\ \p_1\ r_1\ E $ is the helper function that builds the group of
+attributes specified in the \TT{attr} clause.
+$ \Exp $ is based on $ \Exp\p $ which handles a single attribute. Formally:
+
+\begin{footnotesize}
+\begin{center} \begin{tabular}{rlll}
+%
+$ f\ P\ r_1\ p_1\ p $ & rewrites to &
+$ \{ r_2 \st (r_1, p_1 \app (\Name\ p), r_2) \in P \} $ &
+with $ p \oft \GP{path} $ \\
+$ \Exp\p\ P\ r_1\ p_1\ p $ & rewrites to &
+$ \{ (\Name\ p, f\ P\ r_1\ p_1\ p) \} $ & 
+with $ p \oft \GP{path} $ \\
+$ \Exp\p\ P\ r_1\ p_1\ (p\ \TT{as}\ p\p) $ & rewrites to &
+$ \{ (\Name\ p\p, f\ P\ r_1\ p_1\ p) \} $ & 
+with $ p, p\p \oft \GP{path} $ \\
+$ \Exp\ P\ r_1\ p_1\ E $ & rewrites to &
+$ \bigsum \{ \Exp\p\ P\ r_1\ p_1\ e \st e \in E \} $ &
+with $ E \oft \Setof\ \GP{exp} $ 
+\end{tabular} \end{center}
+\end{footnotesize}
+
+When $ c_1, \cdots, c_n \oft \GP{cons} $ and the clause
+``\TT{istrue} $ c_1, \cdots, c_n $'' is present, the set $ P $ must be replaced
+with $ \{ (r_1, p, r_2) \in P \st \Istrue\ P\ r_1\ p_1\ C \} $
+where $ C $ is $ \{c_1, \cdots, c_n\} $ and $ \Istrue $ is a helper function
+that checks the constraints in $ C $.
+$ \Istrue $ is based on $ \Istrue\p $ that handles a single constraint.
+Formally, if $ p \oft \GP{path} $ and $ (\G, x) \daq S $:
+
+\begin{footnotesize}
+\begin{center} \begin{tabular}{rll}
+%
+$ g\ P\ p_1\ p $ & rewrites to &
+$ \{ r_2 \st (\lex r_1)\ (r_1, p_1 \app (\Name\ p), r_2) \in P \} $ \\
+$ \Istrue\p\ P\ r_1\ p_1\ (p\ \TT{in}\ x) $ & rewrites to &
+$ (f\ P\ r_1\ p_1\ p) \meet \Fsts\ S $ \\
+$ \Istrue\p\ P\ r_1\ p_1\ (p\ \TT{match}\ x) $ & rewrites to &
+$ (f\ P\ r_1\ p_1\ p) \meet \Match\ (g\ P\ p_1\ p)\ (\Fsts\ S) $ \\
+$ \Istrue\ P\ r_1\ p_1\ C $ & rewrites to &
+$ (\lall c \in W)\ \Istrue\p\ P\ r_1\ p_1\ c $
+%
+\end{tabular} \end{center}
+\end{footnotesize}
+
+For each clause ``\TT{isfalse} $ c_1, \cdots, c_n $'' the set $ P $
+must be replaced with
+$ \{ (r_1, p, r_2) \in P \st \lnot (\Istrue\ P\ r_1\ p_1\ C) \} $
+(using the above notation).
+Note that these substitutions and the former must be composed if necessary.
+
+If the \TT{inverse} flag is present, also replace the instances of $ P $ in
+the rule $A$ and in the definition of $ \Src $ with
+$ \{ (r_2, p, r_1) \st (r_1, p, r_2) \in P \} $.
+
+\end{itemize}
+
+The second group of \GP{query} expressions includes the context manipulation
+facilities:
+
+\begin{itemize}
+
+\item
+The operators for reading variables:
+
+\begin{footnotesize} \begin{center}
+%
+\irule{i \oft \GP{svar}}{}{(\g, i) \daq \get{\G_s}{i}} \spc
+\irule{i \oft \GP{avar}}{}{(\g, i) \daq \{\get{\G_a}{i}\}}
+%
+\end{center} \end{footnotesize}
+
+$ \get{\G_s}{i} $ and $ \{\get{\G_a}{i}\} $ mean $ \ES $ if $ i $ is not defined.
+
+\item 
+The \TT{let} operator assigns an {\av} set variable (svar):
+
+\begin{footnotesize} 
+\begin{center}
+%
+\irule{i \oft \GP{svar} \spc (\G_1, x_1) \daq (\g, S_1) \spc
+       ((\set{\G_s}{i}{S_1}, \G_a, \G_g), x_2) \daq (\G_2, S_2)}
+{}{(\G_1, \TT{let}\ i\ \TT{=}\ x_1\ \TT{in}\ x_2) \daq (\G_2, S_2)} 
+%
+\end{center} 
+\end{footnotesize}
+
+The sequential composition operator \TT{;;} has the semantics of a \TT{let}
+introducing a fresh variable, so ``$ x_1\ \TT{;;}\ x_2 $'' revrites
+to ``$ \TT{let}\ i\ \TT{=}\ x_1\ \TT{in}\ x_2 $'' where $i$ does not occur in
+$x_2$.
+
+\item 
+The \TT{ex} and ``dot'' operators provide a way to read the attributes stored
+in avar's.
+
+The \TT{ex} (exists) operator gives access to the groups of attributes
+associated to the {\av}'s in the $ \G_a $ part of the context and does
+this by loading its $ \G_g $ part, which is used by the ``dot'' operator
+described below. 
+
+\TT{ex} is true if the query following it is successful for at least one
+pool of attribute groups, one for each {\av} in the $ \G_a $ part of the
+context. Formally we have the rules:
+
+\begin{footnotesize}
+\begin{center}
+%
+\irule{(\lall \D_g \in \All\ \G_a)\ ((\G_s, \G_a, \G_g + \D_g), y) \daq \F}
+      {1}{(\G, \TT{ex}\ y) \daq \F} \spc
+\irule{\Nop}{2}{(\G, \TT{ex}\ y) \daq \T} \spc
+%
+\end{center}
+\begin{center}
+%
+\irule {i \oft \GP{avar} \spc p \oft \GP{path} \spc \get{\get{\G_g}{i}}{\Name\ p} = \{s_1, \cdots, s_m\}}{}
+       {(\G, i\TT{.}p) \daq \{(s_1, \ES), \cdots, (s_m, \ES)\}}
+%
+\end{center}
+\end{footnotesize}
+
+where%
+\footnote{$\D_g$ has the type of $ \G_g $.}
+$ \All\ \G_a = \{\D_g \st \get{\D_g}{i} = G\ \RM{iff}\ G \in \Snd\ \get{\G_a}{i} \} $,
+and $ \G = \g $.
+
+Moreover $ \get{\get{\G_g}{i}}{\Name\ p} $ means $ \ES $
+if $ i $ or $ \Name\ p $ are not defined where appropriate.
+
+Here the first rule has higher precedence than the second one does.
+
+\end{itemize}
+
+The third group of \GP{query} expressions includes the {\av} set manipulation
+facilities:
+
+\begin{itemize}
+
+\item
+The \TT{add} operator adds a given set of attribute groups to the {\av}'s
+of an {\av} set using a union with or without attribute distribution
+according to the \TT{distr} flag.
+
+\begin{footnotesize} 
+\begin{center}
+%
+\irule
+{h \in \TT{["distr"]?} \spc a \in \GP{xgroups} \spc 
+ (\G, \TT{[} ""\ \TT{attr}\ a \TT{]}) \daq \{("", A)\} \spc 
+ (\G, x) \daq \{(r_1, A_1), \cdots, (r_m, A_m)\}}{}
+{(\G, \TT{add}\ a\ \TT{in}\ x) \daq \{(r_1, A_1 \jolly A), \cdots, (r_m, A_m \jolly A)\}}
+%
+\end{center}
+\begin{center}
+%
+\irule
+{h \in \TT{["distr"]?} \spc i \in \GP{avar} \spc  
+ (\g, x) \daq \{(r_1, A_1), \cdots, (r_m, A_m)\}}{}
+{(\g, \TT{add}\ i\ \TT{in}\ x) \daq \{(r_1, A_1 \jolly \Snd\ \get{\G_a}{i}), \cdots, (r_m, A_m \jolly \Snd\ \get{\G_a}{i})\}}
+%
+\end{center}
+\end{footnotesize}
+
+Where $ \jolly_{\tt""} = \sor $ and $ \jolly_{\tt"distr"} = \distr $.
+Moreover $ \Snd\ \get{\G_a}{i} = \ES $ if $i$ is not defined.
+
+\item
+The semantics of the \TT{for} operator is given in terms of the {\For} helper
+function:
+
+\begin{footnotesize}
+\begin{center}
+%
+\irule{i \oft \GP{avar} \spc (\G, x_1) \daq (\G_1, S_1) \spc h \in \TT{["sup"|"inf"]}}
+{}{(\G, \TT{for}\ i\ \TT{in}\ x_1\ h\ x_2) \daq \For\ h\ \G_1\ i\ x_2\ S_1} \spc
+\irule{i \oft \GP{avar} \spc x_2 \oft \GP{query}}{}
+      {\For\ h\ \G\ i\ x_2\ \ES\ \RM{rewrites to}\ (\G, \ES)}
+%
+\end{center} 
+\begin{center}
+%
+\irule{i \oft \GP{avar} \spc ((\G_s, \set{\G_a}{i}{R}, \G_g), x_2) \daq (\G_2, S_2)}
+      {}{\For\ h\ \G\ i\ x_2\ (S_1 \sdor \{R\})\ \RM{rewrites to}\
+      (\G_2 ,(\Snd\ (\For\ h\ \G_2\ i\ x_2\ S_1)) \jolly_h S_2)} 
+%
+\end{center}
+\end{footnotesize}
+
+Here we have $ R \oft T_3 $, $ \G = \g $, $ \jolly_{\tt"sup"} = \sum $ and
+$ \jolly_{\tt"inf"} = \prod $.
+
+$ \dprod $ and $ \prod $ are helper functions describing the two intersection
+operations on {\av} sets: with and without attribute distribution respectively.
+They are dual to $ \dsum $ and $ \sum $. $ \dprod $ does not appear in this
+version of {\MathQL} but was used in the erlier versions 
+\cite{Lor02, GS03, Gui03}.   
+
+\begin{footnotesize}
+\begin{center} \begin{tabular}{lrll}
+%
+1a &
+$ (S_1 \sdor \{(r, A_1)\}) \prod (S_2 \sdor \{(r, A_2)\}) $ & rewrites to &
+$ (S_1 \prod S_2) \sor \{(r, A_1 \sor A_2)\} $ \\
+1b & $ S_1 \prod S_2 $ & rewrites to & $ \ES $ \\ 
+2a &
+$ (S_1 \sdor \{(r, A_1)\}) \dprod (S_2 \sdor \{(r, A_2)\}) $ & rewrites to &
+$ (S_1 \dprod S_2) \sor \{(r, A_1 \distr A_2)\} $ \\
+2b & $ S_1 \dprod S_2 $ & rewrites to & $ \ES $
+%
+\end{tabular} \end{center}
+\end{footnotesize}
+
+As for $ \sum $ and $ \dsum $, rules 1a, 2a override rules 1b, 2b respectively.
+
+\item
+The semantics of the \TT{while} operator is given by the rules below:
+
+\begin{footnotesize}
+\begin{center}
+%
+\irule{h \in \TT{["sup"|"inf"]} \spc (\G, x_1) \daq (\G_1, \ES)}{1}
+{(\G, \TT{while}\ x_1\ h\ x_2) \daq (\G_1, \ES)}
+%
+\end{center} 
+\begin{center}
+%
+\irule
+{h \in \TT{["sup"|"inf"]} \spc (\G, x_1) \daq (\G_1, S_1) \spc
+ (\G_1, x_2) \daq (\G_2, S_2) \spc 
+ (\G_2, \TT{while}\ x_1\ h\ x_2) \daq (\G_3, S)}{2}
+{(\G, \TT{while}\ x_1\ h\ x_2) \daq (\G_3, S_2 \jolly_h S)}
+%
+\end{center} 
+\end{footnotesize}
+
+Again $ \jolly_{\tt"sup"} = \sum $ and $ \jolly_{\tt"inf"} = \prod $.
+Moreover rule 1 takes precedence over rule 2.
+
+\end{itemize}
+
+The forth group of \GP{query} constructions make {\MathQL} an extensible
+language.
+
+\begin{itemize}
+
+\item
+The ``function'' construction invokes an external function returning an {\av}
+set. The function is identified by a \GP{path} and its arguments are a set of 
+\GP{path}'s and a set of \GP{query}'s. It is a mistake to invoke a function
+with the wrong number of \GP{path}'s and \GP{query}'s as input (each
+particular function defines these numbers independently).
+
+\begin{footnotesize}
+\begin{center}
+%
+\irule
+{p, p_1, \cdots, p_m \in \GP{path} \spc x_1, \cdots, x_n \in \GP{query}}{}
+{(\G, p\ \{p_1 \TT{,} \cdots \TT{,} p_m\}\ \{x_1 \TT{,} \cdots \TT{,} x_m\})
+ \daq \Fun\ p\ [p_1, \cdots, p_m]\ [x_1, \cdots, x_n]\ \G} 
+%
+\end{center} 
+\end{footnotesize}
+
+where $ \Fun \oft \GP{path} \times (\Listof\ \GP{path}) \times (\Listof\
+\GP{query}) \times K \to T_4 $ is the primitive function performing the
+low level invocation. 
+The core language does not include any external function and it is a mistake
+to invoke an undefined function.
+
+\item
+The \TT{gen} construction invokes an external function returning a \GP{query}
+The function is identified by a \GP{path} and its arguments are a set of 
+\GP{query}'s. It is a mistake to invoke a function with the wrong number of
+\GP{query}'s as input (each particular function defines this number
+independently).
+
+\begin{footnotesize}
+\begin{center}
+%
+\irule
+{p \in \GP{path} \spc x_1, \cdots, x_n \in \GP{query} \spc 
+ (\G, \Gen\ p\ [x_1, \cdots, x_n]\ \G) \daq (\G\p, S)}{}
+{(\G, \TT{gen}\ p\ \{x_1 \TT{,} \cdots \TT{,} x_m\}) \daq (\G\p, S)} 
+%
+\end{center} 
+\end{footnotesize}
+
+where $ \Gen \oft \GP{path} \times (\Listof\ \GP{query}) \times K \to
+\GP{query} $ is the primitive function performing the low level invocation. 
+The core language does not include any external function of this kind and it
+is a mistake to invoke an undefined function.
+
+The construction ``\TT{gen} p \TT{in} x'' rewrites to ``\TT{gen} p \{x\}''
+for the user's convenience.
+
+\end{itemize}
+
+\subsubsection {Results}
+
+An \GP{avs} expression ({\ie} the explicit representation of an {\av} set that
+can denote a query result) is evaluated to an {\av} set according to the
+following rules.
+
+\begin{footnotesize}
+\begin{center} 
+%
+\irule{x_1, \cdots, x_m \in \GP{av} \spc 
+       x_1 \dar S_1 \spc \cdots \spc x_m \dar S_m}{}
+      {x_1 \TT{;} \cdots \TT{;} x_m \dar S_1 \sum \cdots \sum S_m}
+%
+\end{center} 
+\begin{center} 
+%
+\irule{q \in \GP{string} \spc g_1, \cdots, g_m \in \GP{group} \spc
+       q\ \TT{attr}\ g_1 \dar S_1 \spc \cdots \spc
+       q\ \TT{attr}\ g_m \dar S_m}{}
+      {q\ \TT{attr}\ g_1 \TT{,} \cdots \TT{,} g_m \dar S_1 \sum \cdots \sum S_m}
+%
+\end{center} 
+\begin{center} 
+%
+\irule{q \in \GP{string} \spc a_1, \cdots, a_m \in \GP{atr} \spc
+       q\ \TT{attr}\ \{ a_1 \} \dar S_1 \spc \cdots \spc
+       q\ \TT{attr}\ \{ a_m \} \dar S_m}{}
+      {(\G, q\ \TT{attr}\ \{ a_1 \TT{;} \cdots \TT{;} a_m \} \dar S_1 \dsum \cdots \dsum S_m}
+%
+\end{center} 
+\begin{center} 
+%
+\irule{q, q_1, \cdots, q_m \in \GP{string} \spc p \in \GP{path}}{}
+      {q\ \TT{attr}\ \{ p = \{ q_1 \TT{,} \cdots \TT{,} q_m \} \} \dar 
+       \{(\Unquote\ q, \{ \{ (\Name\ p, \{ \Unquote\ q_1, \cdots, \Unquote\ q_m \}) \} \})\}}
+%
+\end{center} 
+\end{footnotesize}
+
+
+\subsection {The basic library}
+