From 468da7af4b52d01451073ff1cca5aa1949b9657f Mon Sep 17 00:00:00 2001 From: Ferruccio Guidi Date: Sun, 28 Mar 2004 11:06:45 +0000 Subject: [PATCH] updating and structuring --- helm/mathql/doc/mathql.tex | 2 +- helm/mathql/doc/mathql_bib.tex | 14 +- helm/mathql/doc/mathql_introduction.tex | 176 +---- .../mathql/doc/mathql_introduction_avsets.tex | 301 ++++++++ helm/mathql/doc/mathql_introduction_core.tex | 67 ++ .../doc/mathql_introduction_property.tex | 203 +++++ .../doc/mathql_introduction_textual.tex | 173 +++++ helm/mathql/doc/mathql_macros.sty | 16 +- helm/mathql/doc/mathql_operational.tex | 705 +----------------- .../doc/mathql_operational_background.tex | 149 ++++ helm/mathql/doc/mathql_operational_core.tex | 549 ++++++++++++++ .../mathql/doc/mathql_operational_library.tex | 1 + helm/mathql/doc/mathql_overview.tex | 12 +- 13 files changed, 1474 insertions(+), 894 deletions(-) create mode 100644 helm/mathql/doc/mathql_introduction_avsets.tex create mode 100644 helm/mathql/doc/mathql_introduction_core.tex create mode 100644 helm/mathql/doc/mathql_introduction_property.tex create mode 100644 helm/mathql/doc/mathql_introduction_textual.tex create mode 100644 helm/mathql/doc/mathql_operational_background.tex create mode 100644 helm/mathql/doc/mathql_operational_core.tex create mode 100644 helm/mathql/doc/mathql_operational_library.tex diff --git a/helm/mathql/doc/mathql.tex b/helm/mathql/doc/mathql.tex index 62d395443..1dde6905f 100644 --- a/helm/mathql/doc/mathql.tex +++ b/helm/mathql/doc/mathql.tex @@ -8,7 +8,7 @@ \addtolength{\textwidth}{2.0cm} \addtolength{\topmargin}{-1.0cm} -\title{MathQL-1.4} +\title{MathQL-1 Version 4\\Reference Documentation} \author{Ferruccio Guidi} \bibliographystyle{numeric} diff --git a/helm/mathql/doc/mathql_bib.tex b/helm/mathql/doc/mathql_bib.tex index 374048459..32b71ed40 100644 --- a/helm/mathql/doc/mathql_bib.tex +++ b/helm/mathql/doc/mathql_bib.tex @@ -43,12 +43,12 @@ Advisor: A. Asperti. \bibitem {RDF} \emph{Resource Description Framework (RDF) Model and Syntax Specification}. W3C Recommendation. February 22, 1999. \\ -\URI{http://www.w3.org/TR/1999/REC-rdfsyntax-19990222/}. +\CURI{http://www.w3.org/TR/1999/REC-rdfsyntax-19990222/}. \bibitem {RDFS} \emph{RDF Vocabulary Description Language 1.0: RDF Schema}. W3C Working Draft. January 23, 2003 -\URI{http://www.w3.org/TR/rdf-schema/}. +\CURI{http://www.w3.org/TR/rdf-schema/}. \bibitem {Sam00} G. Sambin: \emph{Formal topology and domains}. @@ -62,17 +62,17 @@ Ph.D. dissertation. University of Bologna, 2002. Advisor: A. Asperti. \bibitem {Unicode} Unicode Consortium: \emph{The Unicode Standard, Version 3.2}. March 2002. \\ -\URI{http://www.unicode.org/unicode/standard/standard.html}. +\CURI{http://www.unicode.org/unicode/standard/standard.html}. \bibitem {URI} \emph{Uniform Resource Identifiers (URI): Generic Syntax (RFC 2396)}. August 1998. \\ -\URI{http://www.ietf.org/rfc/rfc2396.txt}. +\CURI{http://www.ietf.org/rfc/rfc2396.txt}. \bibitem {W3Ca} \emph{Character Model for the World Wide Web 1.0}, W3C Working Draft. April 30, 2002. -\URI{http://www.w3.org/TR/charmod/}. +\CURI{http://www.w3.org/TR/charmod/}. \bibitem {Win93} G. Winskel: @@ -81,10 +81,10 @@ MIT Press Series in the Foundations of Computing. London: MIT Press, 1993. \bibitem {XML} \emph{Extensible Markup Language (XML) 1.0 (Second Edition)}. -W3C Recommendation. October 6, 2000. \URI{http://www.w3.org/REC-xml}. +W3C Recommendation. October 6, 2000. \CURI{http://www.w3.org/REC-xml}. \bibitem {XQuery} \emph{XQuery 1.0: An XML Query Language}. -W3C Working Draft November 15, 2002. \URI{http://www.w3.org/TR/xquery/}. +W3C Working Draft November 15, 2002. \CURI{http://www.w3.org/TR/xquery/}. \end{thebibliography} diff --git a/helm/mathql/doc/mathql_introduction.tex b/helm/mathql/doc/mathql_introduction.tex index a989a5ece..07894cb18 100644 --- a/helm/mathql/doc/mathql_introduction.tex +++ b/helm/mathql/doc/mathql_introduction.tex @@ -13,180 +13,14 @@ functions and a more uniform textual syntax. {\MathQL}.4 incorporates the features of {\MathQL}.3 not documented on paper% \footnote {See the ``what's new'' section of {\MathQL} Web Site: -\URI{helm.cs.unibo.it/mathql}.} +\CURI{helm.cs.unibo.it/mathql}.} and adds some new features improving {\MathQL} capabilities to post-process the query results. -\subsection{Textual syntax} +\input{mathql_introduction_avsets} +\input{mathql_introduction_property} +\input{mathql_introduction_core} +\input{mathql_introduction_textual} -The syntax of grammatical productions resembles BNF and POSIX notation: -\begin{itemize} -\item -\TT{::=} defines a grammatical production by means of a regular expression. - -Regular expressions are made of the following elements -(here \TT{...} is a placeholder): - -% \item -% \TT{.} represents any character between U 0020 and U 007F inclusive; - -\item -\TT{`...`} represents any character in a character set; - -\item -\verb+`^ ...`+ represents any character (U+0020 to U+007E) not in a character -set; - -\item -\TT{"..."} represents a string to be matched verbatim; - -\item -\GP{...} represents a regular expression defined by a grammatical production; - -\item -\TT{... ...} represents a conjunctive regular expression; - -\item -\TT{... | ...} represents a disjunctive regular expression; - -\item -\TT{[ ... ]?} represents an optional regular expression; - -\item -\TT{[ ... ]+} represents a regular expression to be repeated one or more times; - -\item -\TT{[ ... ]*} represents a regular expression to be repeated zero or more times; - -\item -\TT{[ ... ]} represents a grouped regular expression. - -\end{itemize} - -\noindent -{\MathQL} Expressions can contain quoted constant strings with the syntax of -\figref {StrTS}.% -\footnote{Note that the first slash of the \GP{path} is not optional as -in {\MathQL}.3.} - -\begin{figure}[ht] -\begin{footnotesize} \begin{verbatim} - ::= '0 - 9' - ::= [ ]* - ::= | 'A - F' | 'a - f' - ::= "u" | '"' | "\" | "^" - ::= '"' [ "\" "^" | '^ "\^' ]* '"' - ::= "/" | [ "/" ]+ -\end{verbatim} \end{footnotesize} -\vskip-1pc -\caption{Textual syntax of numbers, strings and paths} \label{StrTS} -\end{figure} - -\noindent -The meaning of the escaped sequences is shown in \figref{EscTS} -(where $ .... $ is a 4-digit placeholder). - -\begin{figure}[ht] -\begin{footnotesize} -\begin{center} \begin{tabular}{|l|l|c|} -\hline {\bf Escape sequence} & {\bf Unicode character} & {\bf Text} \\ -\hline \verb+\u....^+ & U+.... & \\ -\hline \verb+\"^+ & U+0022 & \verb+"+ \\ -\hline \verb+\\^+ & U+005C & \verb+\+ \\ -\hline \verb+\^^+ & U+005E & \verb+^+ \\ -\hline -\end{tabular} \end{center} -\end{footnotesize} -\vskip-1pc -\caption{Textual syntax of escaped characters} \label{EscTS} -\end{figure} - -{\MathQL} character escaping syntax aims at complying with W3C character model -for the World Wide Web \cite{W3Ca} which recommends a support for standard -Unicode characters (U+0000 to U+FFFF) and escape sequences with -start/end delimiters. -In particular {\MathQL} escape delimiters (backslash and caret) are chosen -among the {\em unwise} characters for URI references (see \cite{URI}) because -URI references are the natural content of constant strings and these -characters should not be so frequent in them. - -Query expressions can contain variables for {\av}'s (production \GP{avar}) -and variables for {\av} sets, {\ie} for query results (production \GP{svar}) -according to the syntax of \figref{VarTS}. - -\begin{figure}[ht] -\begin{footnotesize} \begin{verbatim} - ::= [ 'A - Z' | 'a - z' | `_` ]+ - ::= [ | ]* - ::= "@" - ::= "$" -\end{verbatim}\end{footnotesize} %$ -\vskip-1pc -\caption{Textual syntax of variables} \label{VarTS} -\end{figure} - -\noindent -The syntax of query expressions (production \GP{query}) is described in -\figref{QueryTS}. - -\begin{figure}[ht] -\begin{footnotesize} \begin{verbatim} - ::= [ "inverse" ]? [ "sub" | "super" ]? -
::= [ "main" ]? - ::= [ "in" | "match" ] - ::= [ "istrue" [ "," ]* ]? - ::= [ "isfalse" [ "," ]* ]* - ::= [ "as" ]? - ::= [ "attr" [ "," ]* ]? - ::=
- ::= [ "pattern" ]? - ::= [ [ "," ]* ]? - ::= "(" ")" | | "[" "]" - | "property" "of" - | "let" "=" "in" - | ";;" | | - | "ex" | "." - | "add" [ "distr" ]? [ | ] "in" - | "for" "in" [ "sup" | "inf" ] - | "while" [ "sup" | "inf" ] - | "{" "}" "{" "}" - | "gen" [ "{" "}" | "in" ] - ::= [ [ "," ]* ]? - ::= "=" - ::= "{" [ ";" ]* "}" - ::= [ "," ]* - ::= [ "attr" ]? - ::= [ [ ";" ]* ]? -\end{verbatim} \end{footnotesize} -\vskip-1pc -\caption{Textual syntax of queries} \label{QueryTS} -\end{figure} - -\noindent -The syntax of result expressions (production \GP{avs}) is described in -\figref{ResultTS}. - -\begin{figure}[ht] -\begin{footnotesize} \begin{verbatim} - ::= "=" "{" [ [ "," ]* ]? "}" - ::= "{" [ ";" ]* "}" - ::= [ "attr" [ "," ]* ]? - ::= [ [ ";" ]* ]? -\end{verbatim} \end{footnotesize} -\vskip-1pc -\caption{Textual syntax of results} \label{ResultTS} -\end{figure} - -\xcomment { - -\begin{figure}[ht] -\begin{footnotesize} \begin{verbatim} - | "select" "from" "where" -\end{verbatim} \end{footnotesize} -\vskip-1pc -\caption{Textual syntax of basic query extensions} \label{BasicTS} -\end{figure} - -} diff --git a/helm/mathql/doc/mathql_introduction_avsets.tex b/helm/mathql/doc/mathql_introduction_avsets.tex new file mode 100644 index 000000000..f90ea9247 --- /dev/null +++ b/helm/mathql/doc/mathql_introduction_avsets.tex @@ -0,0 +1,301 @@ +\subsection {Sets of attributed values.} + +The data representation model used by {\MathQL} relies on the notion of +\emph{set of attributed values} ({\av} set for short) that is, in practice, +the only data type available in {\MathQL}.4. In this sense {\MathQL}.4 is a +statically untyped language.% +\footnote +{A type system that fits {\MathQL} as an {\RDF}-oriented query language, +should be driven from the {\RDFS} class system. This may be a future +improvement.} +Each {\av} in an {\av} set consists of a string% +\footnote{When we say \emph{string}, we mean a finite sequence of characters.} +(that we call the \emph{head string} or \emph{value}) and a (possibly emty) +multiset of named attributes whose content is a set of strings. +Attribute names are made of a (possibly empty) list of string components, so +they can be hierarchically structured. +Moreover the attributes of a value are partitioned into a set of \emph{groups} +({\ie} subsets) to improve its structure. + +In the above description a \emph{set} is an \emph{unordered} finite +sequence \emph{without} repetitions wheras a \emph{multiset} is an +\emph{unordered} finite sequence \emph{with} repetitions. + +In the present context repetitions are defined as follows: +two {\av}'s are repeated if they share the same head string without any +condition on their attributes, two groups are repeated of they contain the +same attributes (equal both in name and content), two attributes of a group +are repeated if they share the same name without any condition on their +content, and two strings are always compared in a case-sensitive manner.% +\footnote +{The Author's experience with {\MathQL} seems to show that the above +definition of an {\av} set is just the right one among the many alternatives +that were tried.} + +As we said, {\MathQL}.4 uses {\av} sets to represent many kinds of +information, namely: + +\begin{enumerate} + +\item +A pool of {\RDF} triples having a common subject $r$, which in general is a +{\URI} reference \cite{URI}% +\footnote +{A {\URI} \emph {reference} is a {\URI} with an optional fragment identifier.}, +is encoded in a single {\av} placing $r$ in the head string. +The predicates of the triples are encoded as attribute names and their objects +are placed in the attributes' contents. +These contents are structured as multiple strings with the aim of holding the +objects of repeated predicates. +Moreover structured attribute names can encode various components of +structured properties preserving their semantics. + +\begin{figure}[ht] +\begin{footnotesize} \begin{verbatim} +The RDF triples: +("http://www.w3.org/2002/01/rdf-databases/protocol", "dc:creator", "Sandro Hawke") +("http://www.w3.org/2002/01/rdf-databases/protocol", "dc:creator", "Eric Prud'hommeaux") +("http://www.w3.org/2002/01/rdf-databases/protocol", "dc:date", "2002-01-08") + +The corresponding attributed value: +"http://www.w3.org/2002/01/rdf-databases/protocol" attr + {"dc:creator" = {"Sandro Hawke", "Eric Prud'hommeaux"}; "dc:date" = "2002-01-08"} +\end{verbatim} \end{footnotesize} +\vskip-1pc +\caption{The representation of a pool of {\RDF} triples} \label{AVOne} +\end{figure} + +\figref{AVOne} shows how a set of triples can be coded in an {\av}. +Note that the word \emph{attr} separates the head string from its attributes, +braces enclose an attribute group in which attributes are separated by +semicolons, and an equal sign separates an attribute name from its contents +(see \subsecref{Textual} for the complete {\av} syntax). + +In this setting the grouping feature can be used to separate semantically +different classes of properties associated to a resource (as for instance +Dublin Core metadata, Euler metadata and user-defined metadata). + +\item +A pool of arbitrarily chosen {\RDF} triples is encoded in an {\av} set +placing different {\av}'s the subset of triples sharing the same subject. + +Note that the use of {\av} sets to build query results allows {\MathQL} queries +to return sets of {\RDF} triples instead of mere sets of resources, in the +spirit of what is currently done by other {\RDF}-oriented query languages. + +If the {\av}'s of an {\av} set share the same attribute names and grouping +structure, this set can be represented as a table in which each row encodes +an {\av} and each column is associated to an attribute (except the first one +which holds the head strings). +\figref{Table} shows an {\av} set describing the properties of two resources +``A'' and ``B'' giving its table representation, in which the columns +corresponding to attributes in the same group are clustered between +double-line delimiters% +\footnote{A table with grouped labelled columns like the one above resembles a +set of relational database tables.}. + +%Another possible use of a {\MathQL} query result is for the encoding of a +%relational database table: in this sense the indexed column is stored in the +%subject strings, the names of the other columns are stored in attribute names +%and cell contents are stored in attribute values. + +\begin{figure}[ht] +\begin{footnotesize} \begin{verbatim} +"A" attr {"major" = "1"; "minor" = "2"}, {"first" = "2002-01-01"; "modified" = "2002-03-01"}; +"B" attr {"major" = "1"; "minor" = "7"}, {"first" = "2002-02-01"; "modified" = "2002-04-01"} +\end{verbatim} +\begin{center} \begin{tabular}{|c||c|c||c|c||} +\hline & {\bf ``major''} & {\bf ``minor''} & {\bf ``first''} & {\bf ``modified''} \\ +\hline ``A'' & ``1'' & ``2'' & ``2002-01-01'' & ``2002-03-01'' \\ +\hline ``B'' & ``1'' & ``7'' & ``2002-02-01'' & ``2002-04-01'' \\ +\hline +\end{tabular} \end{center} \end{footnotesize} +\caption{A set of attributed values displayed as a table} \label{Table} +\end{figure} + +The above example gives a spatial idea of the geometry of an {\av} set ({\ie} +a query result) which fits in 4 dimensions: namely we can extend independently +the set of the head strings (dimension 1), the attributes in each group +(dimension 2), the groups in each {\av} (dimension 3) and the contents of each +attribute (dimension 4). + +The metadata defined in the table of \figref{Table} will be used in subsequent +examples. +For this purpose assume that \TT{first} and \TT{modified} are the components +of a structured property \TT{date} available for the resources ``A'' and ``B''. + +\item +The value of an {\RDF} property is encoded in a single {\av} distinguishing +three situations: + +\begin{itemize} + +\item +If the property is unstructured, its value is placed in the {\av} head +string and no attributes are defined. + +\item +If the property is structured and its value has a main component% +\footnote{Which is set by the \emph{rdf:value} property or defined by a +specific application.}, +the content of this component is placed in the {\av} head string and the +other components are stored in the {\av} attributes as in the case 1. + +\item +If the property is structured and its value does not have a main component, +the {\av} head string is empty and the components are stored in the +attributes. + +\end{itemize} + +\begin{figure}[ht] +\begin{footnotesize} \begin{verbatim} +First example, one instance: +"" attr {"major" = "1"; "minor" = "2"}; no main component +"1" attr {"minor" = "2"}; main component is "major" +"2" attr {"major" = "1"} main component is "minor" + +Second example: two separate instances: +"" attr {"major" = "1"; "minor" = "2"}, {"major" = "1"; "minor" = "7"}; no main component +"1" attr {"minor" = "2"}, {"minor" = "7"} main component is "major" + +Third example: two mixed instances: +"" attr {"major" = "3", "6"; "minor" = "4", "9"} no main component +\end{verbatim} \end{footnotesize} +\vskip-1pc +\caption{The representation of the structured value of a property} +\label{AVTwo} +\end{figure} + +\figref{AVTwo} (first example) shows three possible ways of representing in +{\av}'s an instance of a structured property \TT{id} whose value has two +fields ({\ie} properties) \TT{major} and \TT{minor}. +In this instance, \TT{major} is set to ``1'' and \TT{minor} is set to ``2''. +The representations depend on which component of \TT{id} is chosen as the +main component (none, \TT{major} or \TT{minor} respectively). +Several structured property values sharing a common main component can be +encodes in a single {\av} exploiting the grouping facility: in this case the +attributes of every instance are enclosed in separate groups. +\figref{AVTwo} (second example) shows the representations of two instances of +\TT{id}: the previous one and a new one for which \TT{major} is ``1'' and +\TT{minor} is ``7''. + +Note that if the attributes of the two groups are encoded in a single group, +the notion of which components belong to the same property value can not be +recovered in the general case because the values of an attribute form a set +and thus are unordered. \newline +As an example think of two instances of \TT{id} encoded as in \figref{AVTwo} +(third example). + +\item +A natural number is stored, using its decimal representation, in the head +string of a single {\av} with no attributes. + +\item +The boolean value \emph{false} is stored as an empty {\av} set, whereas +a non-empty {\av} set may be interpreted as the boolean value \emph{true}. +The default representation of \emph{true} is a single {\av} with an empty +head string and no attributes. + +\end{enumerate} + +{\MathQL} defines five binary operations on {\av} sets: two unions, two +intersections and a difference. The first four are defined in terms of an +operation, that we call \emph{addition}, involving two {\av}'s with the same +head string. +The result is an {\av} with the same head string of the operands but there are +two ways to compose the attribute groups: + +\begin{itemize} + +\item +With the \emph{set-theoretic} addition, the set of attribute groups in the +resulting {\av} is the set-theoretic union of the sets of attribute groups in +the operands. + +\item +With the \emph{distributive} addition, the set of attribute groups in the +resulting {\av} is the ``Cartesian product'' of the sets of attribute groups +in the two operands. +In this context, an element of the ``Cartesian product'' is not a pair of +groups but it is the set-theoretic union of these groups where the contents of +homonymous attributes are clustered together using set-theoretic unions. + +\end{itemize} + +\figref{Addition} shows an example of the two kinds of addition. + +\begin{figure}[ht] +\begin{footnotesize} \begin{verbatim} +Attributed values used as operands for the addition: +"1" attr {"A" = "a"}, {"B" = "b1"} +"1" attr {"A" = "a"}, {"B" = "b2"} + +Set-theoretic addition: +"1" attr {"A" = "a"}, {"B" = "b1"}, {"B" = "b2"} + +Distributive addition: +"1" attr {"A" = "a"}, {"A" = "a"; "B" = "b2"}, {"B" = "b1"; "A" = "a"}, {"B" = {"b1", "b2"}} +\end{verbatim} \end{footnotesize} +\vskip-1pc +\caption{The addition of attributed values} +\label{Addition} +\end{figure} + +Now we can discuss the five operations between {\av} sets that we mentioned +above: + +\begin{itemize} + +\item +The two unions ocorresponds to the set-theoretic union of their operand where +the {\av}'s sharing the head string are are added either set-theoretically or +distributively as explained above (thus we have a set-theoretic union and a +distributive union in the two cases). In this context the empty {\av} set +plays the role of the neutral element. +These operations play a central role {\MathQL} architecture and allow to +compose the attributes of the operands preserving their group structure. + +\item +The two intersections are the dual of the above unions: they contain the +{\av}'s whose head string appears in each argument where {\av}'s sharing the +head string are added either set-theoretically or distributively as before. + +The distributive intersection has the double benefit of filtering the +common values of the given {\av} sets, and of merging their attribute groups +in every possible way. This feature enables the possibility of performing +additional filtering operations checking the content of the merged groups. + +\item +The difference of two {\av} sets contains the {\av}'s of the first +argument whose head string does not appear in the second argument. + +\end{itemize} + +\figref{Binary} shows how the above operations work in a simple example. + +\begin{figure}[ht] +\begin{footnotesize} \begin{verbatim} +Sets of attributed values used as operands for the operations: +"1" attr {"A" = "a"}; "2" attr {"B" = "b1"} +"2" attr {"B" = "b2"} + +Set-theoretic union: +"1" attr {"A" = "a"}; "2" attr {"B" = "b1"}, {"B" = "b2"} + +Distributive union: +"1" attr {"A" = "a"}; "2" attr {"B" = {"b1", "b2"}} + +Set-theoretic intersection: +"2" attr {"B" = "b1"}, {"B" = "b2"} + +Distributive intersection: +"2" attr {"B" = {"b1", "b2"}} + +Difference: +"1" attr {"A" = "a"} +\end{verbatim} \end{footnotesize} +\vskip-1pc +\caption{The binary operations on sets of attributed values} +\label{Binary} +\end{figure} diff --git a/helm/mathql/doc/mathql_introduction_core.tex b/helm/mathql/doc/mathql_introduction_core.tex new file mode 100644 index 000000000..fbb2d95f5 --- /dev/null +++ b/helm/mathql/doc/mathql_introduction_core.tex @@ -0,0 +1,67 @@ +\subsection{The core language} + +{\MathQL}.4 consists of a core language and of a basic library. Other +user-defined libraries can be added at will. The core language includes the +\TT{property} operator mentioned in \subsecref{HighAccess} that queries the +underlying {\RDF} database and the infrastructure to post-process the +query results. The components of this infrastructure are listed below: + +\begin{itemize} + +\item +Explicit sets of attributed values. + +An explicit {\av} set can be placed in a query in two forms: +as a single quoted string, like \verb+"this is a query result"+, that +evaluates in a single {\av} with that value and no attributes, or as a full +{\av} set in the syntax shown in the previous sections but sorrounded by +square brackets, like \verb+["head" attr {"attribute-name" = "contents"}]+. +\newline +In the second form, the contents of an attribute can be the result of a query, +like +\verb+["head" attr {"attribute-name" = property /"metadata" of "resource"}]+. +\newline +In this case the contents of the attribute are the head strings of the query +result, whose attributes (if any) are discarded. + +\item +Variable assignment. + +Variables for {\av} sets (called \emph{set variables}) can be assigned using +a standard \emph{let-in} construction and may appear wherever an {\av} set +({\ie} a query result) is allowed. +\newline +The assignment has the form: +\TT{let \$}\EM{variable} \TT{=} \EM{av-set} \TT{in} \EM{av-set} +so we can write: +\newline +\verb+let $var = "contents" in ["head" attr {"attribute-name" = $var}]+. + +The scope rules of {\MathQL} variables are tipical for an imperative +programming language and any case of assignment propagation will be indicated. + +\item +Sequential composition. + +This construction has the form: \EM{av-set} \TT{;;} \EM{av-set} and works as +follows: the two {\av} sets are evaluated one after the other and the first +one is discarded but the variables assigned in the first {\av} set are +available to the second one. + +\item +Bounded iteration. + + +\end{itemize} + +\xcomment { + +Attributed values can be used to store any auxiliary information needed during +query execution. +In particular, {\MathQL} provides variables for {\av}'s which, in its textual +syntax, are identifiers% +\footnote{To be understood as in programming languages.} +preceded by the @ sign, as in \TT{@variable}, and that are introduced by the +\TT{for} and \TT{select} constructions to be explained below. + +} % \xcomment diff --git a/helm/mathql/doc/mathql_introduction_property.tex b/helm/mathql/doc/mathql_introduction_property.tex new file mode 100644 index 000000000..f73f5bc78 --- /dev/null +++ b/helm/mathql/doc/mathql_introduction_property.tex @@ -0,0 +1,203 @@ +\subsection{High level access to metadata} \label{HighAccess} + +{\MathQL} high level access to an {\RDF} database is \emph{graph-oriented} and +is delegated to its \TT{property} operator, that formally accesses an {\RDF} +graph% +\footnote +{When we say {\RDF} graph, we actually mean both the {\RDFM} graph and the +{\RDFS} graph.} +through an \emph{access relation} which is better understood by explaining +the informal semantics of the operator itself. + +This operator builds a \emph{result} {\av} set starting from two mandatory +arguments: the \emph{source} {\av} set and the \emph{head path}. +Other optional arguments may be used to change its default behaviour or to +request advanced functionalities. +Its textual syntax is (see \subsecref{Textual}): + +\begin{center} +\TT{property} \EM{optional-flags} \EM{head-path} \EM{optional-clauses} \TT{of} +\EM{optional-flag} \EM{av-set} +\end{center} + +A path has the structure of an attribute name ({\ie} a list of strings) and +denotes a (possibly empty) finite sequence of contiguous arcs (describing +properties in the {\RDF} graph). + +\begin{figure}[ht] +\begin{footnotesize} \begin{verbatim} +These examples refer to the resources "A" and "B" of Figure 2. + +Example 1: reading an unstructured property - simple case: +property "id"/"major" of {"A", "B"} returns "1" +property "id"/"minor" of {"A", "B"} returns "2"; "7" + +Example 2: reading an unstructured property - use of pattern: +property "id"/"minor" of pattern ".*" returns "2"; "7" + +Example 3: reading a structured property without main component: +property "id" attr "major", "minor" of {"A", "B"} +generates the following attributed values: +"" attr {"major" = "1"; "minor" = "2"}; "" attr {"major" = "1"; "minor" = "7"} +that are composed using MathQL-1 set-theoretic union giving the one-element set: +"" attr {"major" = "1"; "minor" = "2"}, {"major" = "1"; "minor" = "7"} + +Example 4: reading a structured property specifying a main component: +property "id" main "major" attr "minor" of {"A", "B"} gives +"1" attr {"minor" = "2"}, {"minor" = "7"} + +Example 5: the renaming mechanism: +property "id" attr "minor" as "new-name" of {"A", "B"} gives +"" attr {"new-name" = "2"}, {"new-name" = "7"} + +Example 6: imposing constraints on property values: +property "date" istrue "first" in "2002-01-01" attr "modified" of {"A", "B"} and +property "date" istrue "first" match ".*01.*" attr "modified" of {"A", "B"} give +"" attr {"modified" = "2002-03-01"} +Only the instance of "date" with "first" set to "2002-01-01" is considered. + +Example 7: inverse traversal of the head path: +property inverse "date" attr "first" in subj "" gives +"A" attr {"first" = "2002-01-01"}; "B" attr {"first" = "2002-02-01"} + +Example 8: some triples of an access relation: +The triples formalizing the property "date" of the resource "A": +("A", "date", ""); +("A", "date"/"first", "2002-01-01"); ("A", "date"/"modified", "2002-03-01") +\end{verbatim} \end{footnotesize} +\vskip-1pc +\caption{The ``property'' operator} +\label{Property} +\end{figure} + +In the simplest case \TT{property} is used to read the values of a (possibly +compound) property with an unstructured value and does the following: + +\begin{enumerate} + +\item +It computes the instances of the given path in the {\RDF} graph available to +the query engine, using the resources specified in the head strings of the +source {\av} set (call them source resources) as start-nodes. + +\item +The computation gives a set of nodes in the {\RDF} graph ({\ie} the end-nodes +of the instantiated paths) which are the values of the instances of the +(possibly compound) property specified by the path and concerning the source +resources. + +\item +These values, encoded into {\av}'s as explained above, are composed by means +of the {\MathQL} set-theoretic union to form the result {\av} set. + +\end{enumerate} + +\figref{Property} (example 1) shows an instance of this procedure. +Note that the result sets of this example have no attributes and that a path +is represented by a slash-separated list of strings denoting the path's arcs.% +\footnote{If needed, the empty path is represented by a single slash.} + +Using the \TT{pattern} flag, \TT{property} can be instructed to regard the +values of the source {\av} set as POSIX regular expressions rather than as +constant strings. +In this case \TT{pattern} selects the set of resources matching at least one +of the given expressions. +See for instance \figref{Property} (example 2). + +If we want to read the value of a structured property we can specify the +value's main component in the \TT{main} \EM{optional-clause} (this +specification overrides the default setting inferred from the {\RDF} graph +through the \emph{rdf:value} property) and the list of the value's secondary +components in the \TT{attr} \EM{optional-clause}. +Note that if a secondary component is not listed in the \TT{attr} clause, it +will not be read. +Also recall that, when the result {\av}'s are formed, the main component is +is read in the head string, whereas the secondary components are +encoded using the attributes of a single group. +See for instance \figref{Property} (examples 3 and 4). +As a component of a property's value may be a structured property, its +specification (appearing in the \TT{main} or \TT{attr} clause) is +actually a path in the {\RDF} graph starting from the end-node of the head +path. + +Note that the name of an attribute, which by default is its defining path in +the \TT{attr} clause, can be changed with an optional \TT{as} clause for the +user's convenience. See for instance \figref{Property} (example 5). +The alternative could be a simple string but it needs to be a path for typing +reasons. In any case a string can be seen as a one-element path. + +In the default case \TT{property} builds its result considering every +component of the {\RDFM} graph ({\ie} every {\RDFM}) but we can constrain +some nodes of the inspected components to have (or not to have) a given value, +with the aim of improving the performance of the inspection procedure. +The constrained nodes are specified in the \TT{istrue} and \TT{istrue} +\EM{optional-clauses} and the constraining values are expressed by \TT{in} or +\TT{match} constructions depending on their semantics (constant values or +POSIX regular expressions respectively). +See for instance \figref{Property} (example 6). +Again a constrained node may be the value of a compound property, therefore +its specification (appearing in the \TT{istrue} or \TT{isfalse} clause) is +a path in the {\RDF} graph starting from the end-node of the head path. + +\TT{property} allows to access the {\RDFS} property hierarchy by specifying +a flag named \TT{sub} or \TT{super}. +If the \TT{sub} flag is present, \TT{property} inspects the instances of the +default tree (made by the head path and by the \EM{optional-clauses} paths) +and every other tree obtained by substituting an arc $ p $ with the arc of a +subproperty of $ p $. +If the \TT{super} flag is present, super-property arcs are substituted instead. + +\TT{property} also allows the inverse traversal of its head path if the +\TT{inverse} flag is specified. +In this case the operator works as follows: + +\begin{enumerate} + +\item +It instantiates the head path using as end-nodes the values whose main +component is specified in head strings of the source {\av} set. + +\item +It encodes the resources corresponding to the instances of the start-nodes into +{\av}'s assigning the attributes obtained instantiating the attribute paths% +\footnote{The path in \EM{optional-clauses} are never traversed backward.} +and composes these {\av}'s using the {\MathQL} set-theoretic union to build +the result set. + +\end{enumerate} + +See for instance \figref{Property} (example 7). + +Now we can present \emph{access relations} which are the formal tools used by +{\MathQL} semantics to access the {\RDF} graph. +An access relation is a set of triples $ (r_1, p, r_2) $ where $ r_1 $ and +$ r_2 $ are strings, $ p $ is a path (encoded as a list of strings). +Each triple is a sort of ``extended {\RDF} triple'' in the sense that $ r_1 $ +is is a resource for which metadata is provided, $ p $ is a path in the {\RDF} +graph and $ r_2 $ is the main value of the end-node of the instance of $ p $ +starting from $ r_1 $ (this includes the instances of sub- and super-arcs of +$ p $ if necessary). +See for instance \figref{Property} (example 8). + +{\MathQL} does not provide for any built-in access relation so any query +engine can freely define the access relations that are appropriate with +respect to the metadata it can access. +In particular, \secref{Interpreter} describes the access relations implemented +by the {\HELM} query engine. + +It is worth remarking, as it was already stressed in \cite{GS03, Gui03}, that +the concept of access relation corresponds to the abstract concept of +property in the basic {\RDF} data model which draws on well established +principles from various data representation communities. +In this sense an {\RDF} property can be thought of either as an attribute of a +resource (traditional attribute-value pairs model), or as a relation between +a resource and a value (entity-relationship model). +This observation leads us to conclude that {\MathQL} is sound and complete +with respect to querying an abstract {\RDF} metadata model. + +Finally note that access relations are close to {\RDF} entity-relationship +model, but they do not work if we allow paths with an arbitrary number of +loops ({\ie} with an arbitrary length) because this would lead to creating +infinite sets of triples. +If we want to handle this case, we need to turn these relations into +multivalued functions. diff --git a/helm/mathql/doc/mathql_introduction_textual.tex b/helm/mathql/doc/mathql_introduction_textual.tex new file mode 100644 index 000000000..dd225479c --- /dev/null +++ b/helm/mathql/doc/mathql_introduction_textual.tex @@ -0,0 +1,173 @@ +\subsection{Textual syntax} \label{Textual} + +The syntax of grammatical productions resembles BNF and POSIX notation: + +\begin{itemize} + +\item +\TT{::=} defines a grammatical production by means of a regular expression. + +Regular expressions are made of the following elements +(here \TT{...} is a placeholder): + +% \item +% \TT{.} represents any character between U 0020 and U 007F inclusive; + +\item +\TT{`...`} represents any character in a character set; + +\item +\verb+`^ ...`+ represents any character (U+0020 to U+007E) not in a character +set; + +\item +\TT{"..."} represents a string to be matched verbatim; + +\item +\GP{...} represents a regular expression defined by a grammatical production; + +\item +\TT{... ...} represents a conjunctive regular expression; + +\item +\TT{... | ...} represents a disjunctive regular expression; + +\item +\TT{[ ... ]?} represents an optional regular expression; + +\item +\TT{[ ... ]+} represents a regular expression to be repeated one or more times; + +\item +\TT{[ ... ]*} represents a regular expression to be repeated zero or more times; + +\item +\TT{[ ... ]} represents a grouped regular expression. + +\end{itemize} + +\noindent +{\MathQL} Expressions can contain quoted constant strings with the syntax of +\figref {StrTS}.% +\footnote{Note that the first slash of the \GP{path} is not optional as +in {\MathQL}.3.} + +\begin{figure}[ht] +\begin{footnotesize} \begin{verbatim} + ::= '0 - 9' + ::= [ ]* + ::= | 'A - F' | 'a - f' + ::= "u" | '"' | "\" | "^" + ::= '"' [ "\" "^" | '^ "\^' ]* '"' + ::= "/" | [ "/" ]+ +\end{verbatim} \end{footnotesize} +\vskip-1pc +\caption{Textual syntax of numbers, strings and paths} \label{StrTS} +\end{figure} + +\noindent +The meaning of the escaped sequences is shown in \figref{EscTS} +(where $ .... $ is a 4-digit placeholder). + +\begin{figure}[ht] +\begin{footnotesize} +\begin{center} \begin{tabular}{|l|l|c|} +\hline {\bf Escape sequence} & {\bf Unicode character} & {\bf Text} \\ +\hline \verb+\u....^+ & U+.... & \\ +\hline \verb+\"^+ & U+0022 & \verb+"+ \\ +\hline \verb+\\^+ & U+005C & \verb+\+ \\ +\hline \verb+\^^+ & U+005E & \verb+^+ \\ +\hline +\end{tabular} \end{center} +\end{footnotesize} +\vskip-1pc +\caption{Textual syntax of escaped characters} \label{EscTS} +\end{figure} + +{\MathQL} character escaping syntax aims at complying with W3C character model +for the World Wide Web \cite{W3Ca} which recommends a support for standard +Unicode characters (U+0000 to U+FFFF) and escape sequences with +start/end delimiters. +In particular {\MathQL} escape delimiters (backslash and caret) are chosen +among the {\em unwise} characters for URI references (see \cite{URI}) because +URI references are the natural content of constant strings and these +characters should not be so frequent in them. + +Query expressions can contain variables for {\av}'s (production \GP{avar}) +and variables for {\av} sets, {\ie} for query results (production \GP{svar}) +according to the syntax of \figref{VarTS}. + +\begin{figure}[ht] +\begin{footnotesize} \begin{verbatim} + ::= [ 'A - Z' | 'a - z' | `_` ]+ + ::= [ | ]* + ::= "@" + ::= "$" +\end{verbatim}\end{footnotesize} %$ +\vskip-1pc +\caption{Textual syntax of variables} \label{VarTS} +\end{figure} + +\noindent +The syntax of query expressions (production \GP{query}) is described in +\figref{QueryTS}. + +\begin{figure}[ht] +\begin{footnotesize} \begin{verbatim} + ::= [ "inverse" ]? [ "sub" | "super" ]? +
::= [ "main" ]? + ::= [ "in" | "match" ] + ::= [ "istrue" [ "," ]* ]? + ::= [ "isfalse" [ "," ]* ]* + ::= [ "as" ]? + ::= [ "attr" [ "," ]* ]? + ::=
+ ::= [ "pattern" ]? + ::= [ [ "," ]* ]? + ::= "(" ")" | | "[" "]" + | "property" "of" + | "let" "=" "in" + | ";;" | | + | "ex" | "." + | "add" [ "distr" ]? [ | ] "in" + | "for" "in" [ "sup" | "inf" ] + | "while" [ "sup" | "inf" ] + | "{" "}" "{" "}" + | "gen" [ "{" "}" | "in" ] + ::= [ [ "," ]* ]? + ::= "=" + ::= "{" [ ";" ]* "}" + ::= [ "," ]* + ::= [ "attr" ]? + ::= [ [ ";" ]* ]? +\end{verbatim} \end{footnotesize} +\vskip-1pc +\caption{Textual syntax of queries} \label{QueryTS} +\end{figure} + +\noindent +The syntax of result expressions (production \GP{avs}) is described in +\figref{ResultTS}. + +\begin{figure}[ht] +\begin{footnotesize} \begin{verbatim} + ::= "=" | "{" [ "," ]* "}" + ::= "{" [ ";" ]* "}" + ::= [ "attr" [ "," ]* ]? + ::= [ [ ";" ]* ]? +\end{verbatim} \end{footnotesize} +\vskip-1pc +\caption{Textual syntax of results} \label{ResultTS} +\end{figure} + +\xcomment { + +\begin{figure}[ht] +\begin{footnotesize} \begin{verbatim} + | "select" "from" "where" +\end{verbatim} \end{footnotesize} +\vskip-1pc +\caption{Textual syntax of basic query extensions} \label{BasicTS} +\end{figure} + +} diff --git a/helm/mathql/doc/mathql_macros.sty b/helm/mathql/doc/mathql_macros.sty index a6701208e..01b563f8d 100644 --- a/helm/mathql/doc/mathql_macros.sty +++ b/helm/mathql/doc/mathql_macros.sty @@ -4,17 +4,19 @@ \newcommand{\subsecref}[1]{Subsection~\ref{#1}} \newcommand{\figref}[1]{Figure~\ref{#1}} +\newcommand\CAML{\textsc{caml}} +\newcommand\Galax{\textsc{galax}} +\newcommand\HELM{\textsc{helm}} \newcommand\MathQL{\textsc{mathql-1}} +\newcommand\MySQL{\textsc{mysql}} +\newcommand\POSIX{\textsc{posix}} +\newcommand\PostgreSQL{\textsc{postgresql}} \newcommand\RDF{\textsc{rdf}} +\newcommand\RDFM{\textsc{rdf model}} \newcommand\RDFS{\textsc{rdf schema}} -\newcommand\HELM{\textsc{helm}} -\newcommand\POSIX{\textsc{posix}} -\newcommand\XML{\textsc{xml}} -\newcommand\CAML{\textsc{caml}} \newcommand\SQL{\textsc{sql}} -\newcommand\MySQL{\textsc{mysql}} -\newcommand\PostgreSQL{\textsc{postgresql}} -\newcommand\Galax{\textsc{galax}} +\newcommand\URI{\textsc{uri}} +\newcommand\XML{\textsc{xml}} \newcommand\XQuery{\textsc{xquery}} \def\av{{\frenchspacing a.v.}} diff --git a/helm/mathql/doc/mathql_operational.tex b/helm/mathql/doc/mathql_operational.tex index f37225073..7868d3545 100644 --- a/helm/mathql/doc/mathql_operational.tex +++ b/helm/mathql/doc/mathql_operational.tex @@ -8,706 +8,7 @@ $ y \oft Y $ will denote a typing judgement. Note that this semantics is not meant as a formal system \emph{per se}, but should serve as a reference for implementors. -\subsection {Mathematical background} - -As a mathematical background for the semantics, we take the one presented in -\cite{Gui03}. - -{\Str} denotes the type of strings and its elements are the finite sequences -of Unicode \cite{Unicode} characters. -Grammatical productions, represented as strings in angle brackets, denote the -subtype of {\Str} containing the produced sequences of characters. - -{\Num} denotes the type of numbers and is the subtype of {\Str} defined by the -regular expression: \TT{'0 - 9' [ '0 - 9' ]*}. -In this type, numbers are represented by their decimal expansion. - -$ \Setof\ Y $ denotes the type of finite sets ({\ie} unordered finite -sequences without repetitions) over $ Y $. -$ \Listof\ Y $ denotes the type of lists ({\ie} ordered finite sequences) -over $ Y $. -We will use the notation $ [y_1, \cdots, y_m] $ for the list whose elements -are $ y_1, \cdots, y_m $. - -{\Boole}, the type of Boolean values, is defined as -$ \{\ES, \{("", \ES)\}\} \oft \Setof\ \Setof\ (\Str \times \Setof\ Y) $ -where the first element stands for \emph{false} (denoted by {\F}) and the -second element stands for \emph{true} (denoted by {\T}). - -$ Y \times Z $ denotes the product of the types $ Y $ and $ Z $ whose elements -are the ordered pairs $ (y, z) $ such that $ y \oft Y $ and $ z \oft Z $. -The notation is also extended to a ternary product. - -$ Y \to Z $ denotes the type of functions from $ Y $ to $ Z $ and $ f\ y $ -denotes the application of $ f \oft Y \to Z $ to $ y \oft Y $. -Relations over types, such as equality, are seen as functions to {\Boole}. - -With the above constructors we can give a formal meaning to most of the -standard notation. For instance we will use the following: - -\begin{itemize} - -\item -$ {\ES} \oft (\Setof\ Y) $ - -\item -$ {\lex} \oft ((\Setof\ Y) \to \Boole) \to \Boole $ - -\item -$ {\lall} \oft ((\Setof\ Y) \to \Boole) \to \Boole $ - -\item -$ {\in} \oft Y \to (\Setof\ Y) \to \Boole $ (infix) - -\item -$ {\sub} \oft (\Setof\ Y) \to (\Setof\ Y) \to \Boole $ (infix) - -\item -$ {\meet} \oft (\Setof\ Y) \to (\Setof\ Y) \to \Boole $ (infix) - -%\item -$ {\sand} \oft (\Setof\ Y) \to (\Setof\ Y) \to (\Setof\ Y) $ (infix) - -\item -$ {\sor} \oft (\Setof\ Y) \to (\Setof\ Y) \to (\Setof\ Y) $ (infix) - -\item -$ {\sdor} \oft (\Setof\ Y) \to (\Setof\ Y) \to (\Setof\ Y) $ -(the disjoint union, infix) - -\item -$ \le \oft \Num \to \Num \to \Boole $ (infix) - -\item -$ < \oft \Num \to \Num \to \Boole $ (infix) - -\item -$ \# \oft (\Setof\ Y) \to \Num $ (the size operator) - -\item -$ \app \oft (\Listof\ Y) \to (\Listof\ Y) \to (\Listof\ Y) $ -(the concatenation, infix) - -\item -$ \lnot \oft \Boole \to \Boole $ - -\end{itemize} - -\noindent -Note that $ \lall $ and $ \lex $ are always decidable because the sets are -finite by definition. - -\noindent -$ U \meet W $ means $ (\lex u \in U)\ u \in W $ and expresses the fact that -$ U \sand W $ is inhabited as a primitive notion, {\ie} without mentioning -intersection and equality as for $ U \sand W \neq \ES $, which is equivalent -but may be implemented less efficiently in real cases% -\footnote{As for the Boolean condition $ \a \lor \b $ which may have a more -efficient implementation than $ \lnot(\lnot \a \land \lnot \b) $.}. - -$ U \meet W $ is a natural companion of $ U \sub W $ being its logical dual -(recall that $ U \sub W $ means $ (\lall u \in U)\ u \in W $) -and is already being used successfully in the context of a constructive -({\ie} intuitionistic and predicative) approach to point-free topology -\cite{Sam00}. - -Sets of couples play a central role in our formalization and in particular we -will use: - -\begin{itemize} - -\item -$ \Fst \oft (Y \times Z) \to Y $ such that $ \Fst\ (y, z) = y $. - -\item -$ \Snd \oft (Y \times Z) \to Z $ such that $ \Snd\ (y, z) = z $. - -\item -$ \Fsts \oft \Setof\ (Y \times Z) \to \Setof\ Z $ such that -$ \Fsts\ U = \{\Fst\ u \st u \in U\} $. - -\item -With the same notation, if $ W $ contains just one couple whose first -component is $ y $, then $ \get{W}{y} $ is the second component of that couple. -In the other cases $ \get{W}{y} $ is not defined. -This operator has type $ (\Setof\ (Y \times Z)) \to Y \to Z $. - -\item -Moreover $ \set{W}{y}{z} $ is the set obtained from $ W $ removing every -couple whose first component is $ y $ and adding the couple $ (y, z) $. -The type of this operator is \\ -$ (\Setof\ (Y \times Z)) \to Y \to Z \to (\Setof\ (Y \times Z)) $. - -\item -Also $ U + W $ is the union of two sets of couples in the following sense: - -\begin{footnotesize} -\begin{center} \begin{tabular}{rll} -% -$ U + \ES $ & rewrites to & $ U $ \\ -$ U + (W \sdor \{(y, z)\}) $ & rewrites to & $ \set{U}{y}{z} + W $ -% -\end{tabular} \end{center} -\end{footnotesize} - -\end{itemize} - -The last three operators are used to read, write and join association sets, -which are sets of couples such that the first components of two different -elements are always different. -These sets will be exploited to formalize the memories appearing in evaluation -contexts. - -\subsection {The core language} - -\subsubsection {Preliminaries} - -Wih the above background we are able to type the main objects needed in the -formalization: - -\begin{itemize} - -\item -A path $ s $ is a list of strings therefore its type is -$ T_{0a} = \Listof\ \Str $. - -\item -A multiple string value $ V $ is an object of type $ T_{0b} = \Setof\ \Str $. - -\item -A attribute group $ G $ is an association set connecting the attribute names -to their values, therefore its type is -$ T_1 = \Setof\ (T_{0a} \times T_{0b}) $. - -\item -A subject string $ r $ is an object of type $ \Str $. - -\item -A set $ A $ of attribute groups is an object of type $ T_2 = \Setof\ T_1 $. - -\item -An {\av} is a subject string with its attribute groups, so its type is -$ T_3 = \Str \times T_2 $. - -\item -A set $ S $ of {\av}'s is an association set of type $ T_4 = \Setof\ T_3 $. - -\item -A triple of an attributed relation is of type -$ T_5 = \Str \times \Str \times (T_{0a} \to \Str) $. - -\end{itemize} - -When a constant string appearing in a {\MathQL} expression is unquoted, the -surrounding double quotes are deleted and each escaped sequence is translated -according \figref{EscTS}. - -This operation is formally performed by the function -$ \Unquote $ of type $ \Str \to \Str $. -Moreover $ \Name \oft \GP{path} \to T_{0a} $ is a helper function that -converts a linearized path in its structured representation. -Formally $ \Name\ (\TT{/}q_1\TT{/} \cdots \TT{/}q_m) $ -rewrites to $ [\Unquote\ q_1, \cdots, \Unquote\ q_m] $. - -The semantics of {\MathQL} expressions denoting queries is given by the infix -relation $ \daq $ that evaluates a query to an {\av} set. -These expressions are evaluated in a context $ \G = \g $ -which is a triple of association sets that connect -svar's to {\av} sets, avar's to {\av}'s and avar's to attribute groups. -Therefore the type $ K $ of the context $ \G $ is: - -\begin{footnotesize} \begin{center} -$ -\Setof\ (\GP{svar} \times T_4) \times -\Setof\ (\GP{avar} \times T_3)\ \times % $ \\ $ \times\ -\Setof\ (\GP{avar} \times T_1) -$ -\end{center} \end{footnotesize} - -\noindent -and the evaluating relation is of the following type: - -\begin{footnotesize} -\begin{center} \begin{tabular}{l} -$ \mathord{\daq} \oft (K \times \GP{query}) \times (K \times T_4) \to \Boole $. -\end{tabular} \end{center} -\end{footnotesize} - -The context components $ \G_s $ and $ \G_a $ are used to store the contents of -variables, while $ \G_g $ is used by the \TT{ex} operator to be presented -below. - -In the following we will write $ (\G, x) \daq S $ to abbreviate -$ (\G, x) \daq (\G, S) $. - -The semantics of {\MathQL} expressions denoting results is given by the infix -relation $ \dar \oft \GP{avs} \times T_4 \to \Boole $ that evaluates a result -to an {\av} set. - -\subsubsection{Queries} - -The first \GP{query} expressions include explicit {\av} sets and syntactic -grouping: - -\begin{itemize} - -\item -The syntactic grouping is obtained enclosing a \GP{query} between \TT{(} -and \TT{)}. -An explicit {\av} set can be represented by a single string, which is -converted into a single {\av} with no attributes, or by a \GP{xavs} -(extended {\av} set) expression enclosed between \TT{[} and \TT{]}. -Such an expression describes all the components of an {\av} set and is -evaluated using the rules given below. - -\begin{footnotesize} -\begin{center} -% -\irule{(\G, x) \daq S}{}{(\G, (x)) \daq S} \spc -\irule{q \oft \GP{string}}{}{(\G, q) \daq \{(\Unquote\ q, \ES)\}} -% -\end{center} -\begin{center} -% -\irule{x_1, \cdots, x_m \in \GP{xav} \spc - (\G, TT{[} x_1 \TT{]}) \daq S_1 \spc \cdots \spc (\G, \TT{[} x_m \TT{]}) \daq S_m}{} - {(\G, \TT{[} x_1 \TT{;} \cdots \TT{;} x_m \TT{]}) \daq S_1 \sum \cdots \sum S_m} -% -\end{center} -\begin{center} -% -\irule{q \in \GP{string} \spc g_1, \cdots, g_m \in \GP{xgroup} \spc - (\G, \TT{[} q\ \TT{attr}\ g_1 \TT{]}) \daq S_1 \spc \cdots \spc - (\G, \TT{[} q\ \TT{attr}\ g_m \TT{]}) \daq S_m}{} - {(\G, \TT{[} q\ \TT{attr}\ g_1 \TT{,} \cdots \TT{,} g_m \TT{]}) \daq S_1 \sum \cdots \sum S_m} -% -\end{center} -\begin{center} -% -\irule{q \in \GP{string} \spc a_1, \cdots, a_m \in \GP{xatr} \spc - (\G, \TT{[} q\ \TT{attr}\ \{ a_1 \} \TT{]}) \daq S_1 \spc \cdots \spc - (\G, \TT{[} q\ \TT{attr}\ \{ a_m \} \TT{]}) \daq S_m}{} - {(\G, \TT{[} q\ \TT{attr}\ \{ a_1 \TT{;} \cdots \TT{;} a_m \} \TT{]}) \daq S_1 \dsum \cdots \dsum S_m} -% -\end{center} -\begin{center} -% -\irule{q \in \GP{string} \spc p \in \GP{path} \spc x \daq S}{} - {(\G, \TT{[} q\ \TT{attr}\ \{ p = x \} \TT{]}) \daq - \{(\Unquote\ q, \{ \{ (\Name\ p, \Fsts\ S) \} \})\}} -% -\end{center} -\end{footnotesize} - -$ \dsum $ and $ \sum $ are helper functions describing the two union operations -on {\av} sets: with and without attribute distribution respectively. -$ \dsum $ and $ \sum $ have two rewrite rules each. - -\begin{footnotesize} -\begin{center} \begin{tabular}{lrll} -% -1a & -$ (S_1 \sdor \{(r, A_1)\}) \sum (S_2 \sdor \{(r, A_2)\}) $ & rewrites to & -$ (S_1 \sum S_2) \sor \{(r, A_1 \sor A_2)\} $ \\ -1b & $ S_1 \sum S_2 $ & rewrites to & $ S_1 \sor S_2 $ \\ -2a & -$ (S_1 \sdor \{(r, A_1)\}) \dsum (S_2 \sdor \{(r, A_2)\}) $ & rewrites to & -$ (S_1 \dsum S_2) \sor \{(r, A_1 \distr A_2)\} $ \\ -2b & $ S_1 \dsum S_2 $ & rewrites to & $ S_1 \sor S_2 $ -% -\end{tabular} \end{center} -\end{footnotesize} - -Rules 1a, 2a override 1b, 2b respectively and -$ A_1 \distr A_2 = \{G_1 \sum G_2 \st G_1 \in A_1, G_2 \in A_2\} $. - -\item -The semantics of \TT{property} operator is described below. - -In the following rule, -$s$ is ``$ \TT{property}\ h\ p_1\ \TT{main}\ p_2\ \RM{attr}\ e_1, \cdots, -e_m\ \TT{in}\ k\ x $'', $P$ is $ \Property\ h $ and -$A_2$ is $ \{ \Exp\ P\ p_1\ r_1\ \{e_1, \cdots, e_m\}\} $: - -\begin{footnotesize} -\begin{center} -% -\irule -{h \oft \GP{refine} \spc p_1, p_2 \oft \GP{path} \spc - e_1, \cdots, e_m \oft \GP{exp} \spc k \in \TT{["pattern"]?} \spc - (\G, x) \daq S -}{A} -{(\G, s) \daq \bigsum \{ \{(r_2, A_2)\} \st -(\lex r_1 \in \Src\ k\ P\ (\Fsts\ S))\ -(r_1, p_1 \app p_2, r_2) \in P -\}} -% -\end{center} -\end{footnotesize} - -When the \TT{main} clause is not present, we assume $ p_2 = \TT{/} $. - -Here $ \Property\ h $ gives the appropriate access relation according to -the $ h $ flag (this is the primitive function that inspects the {\RDF} graph, -see \subsecref{}). - -$ \Src\ k\ P\ V $ is a helper function giving the source set -according to the $ k $ flag. $ \Src $ is based on $ \Match $, the helper -function handling POSIX regular expressions. Formally: - -\begin{footnotesize} -\begin{center} \begin{tabular}{rll} -% -$ \Src\ \TT{""}\ P\ V $ & rewrites to & $ V $ \\ -$ \Src\ \TT{"pattern"}\ P\ V $ & rewrites to & -$ \Match\ \{r_1 \st (\lex p, r2)\ (r_1, p, r_2) \in P\} $\ V \\ -$ \Match\ W\ V $ & rewrites to & $ \bigsor \{\Pattern\ W\ s \st s \in V\} $ -% -\end{tabular} \end{center} -\end{footnotesize} - -Here $ \Pattern\ W\ s $ is the primitive function returning the subset of -$ W \oft \Setof\ \Str $ whose element match the POSIX 1003.2-1992% -\footnote{Included in POSIX 1003.1-2001: -\URI{http://www.unix-systems.org/version3/ieee\_\,std.html}.} -regular expression $ \verb+"^"+ \app s \app \TT{"\$"} $. - -$ \Exp\ P\ \p_1\ r_1\ E $ is the helper function that builds the group of -attributes specified in the \TT{attr} clause. -$ \Exp $ is based on $ \Exp\p $ which handles a single attribute. Formally: - -\begin{footnotesize} -\begin{center} \begin{tabular}{rlll} -% -$ f\ P\ r_1\ p_1\ p $ & rewrites to & -$ \{ r_2 \st (r_1, p_1 \app (\Name\ p), r_2) \in P \} $ & -with $ p \oft \GP{path} $ \\ -$ \Exp\p\ P\ r_1\ p_1\ p $ & rewrites to & -$ \{ (\Name\ p, f\ P\ r_1\ p_1\ p) \} $ & -with $ p \oft \GP{path} $ \\ -$ \Exp\p\ P\ r_1\ p_1\ (p\ \TT{as}\ p\p) $ & rewrites to & -$ \{ (\Name\ p\p, f\ P\ r_1\ p_1\ p) \} $ & -with $ p, p\p \oft \GP{path} $ \\ -$ \Exp\ P\ r_1\ p_1\ E $ & rewrites to & -$ \bigsum \{ \Exp\p\ P\ r_1\ p_1\ e \st e \in E \} $ & -with $ E \oft \Setof\ \GP{exp} $ -\end{tabular} \end{center} -\end{footnotesize} - -When $ c_1, \cdots, c_n \oft \GP{cons} $ and the clause -``\TT{istrue} $ c_1, \cdots, c_n $'' is present, the set $ P $ must be replaced -with $ \{ (r_1, p, r_2) \in P \st \Istrue\ P\ r_1\ p_1\ C \} $ -where $ C $ is $ \{c_1, \cdots, c_n\} $ and $ \Istrue $ is a helper function -that checks the constraints in $ C $. -$ \Istrue $ is based on $ \Istrue\p $ that handles a single constraint. -Formally, if $ p \oft \GP{path} $ and $ (\G, x) \daq S $: - -\begin{footnotesize} -\begin{center} \begin{tabular}{rll} -% -$ g\ P\ p_1\ p $ & rewrites to & -$ \{ r_2 \st (\lex r_1)\ (r_1, p_1 \app (\Name\ p), r_2) \in P \} $ \\ -$ \Istrue\p\ P\ r_1\ p_1\ (p\ \TT{in}\ x) $ & rewrites to & -$ (f\ P\ r_1\ p_1\ p) \meet \Fsts\ S $ \\ -$ \Istrue\p\ P\ r_1\ p_1\ (p\ \TT{match}\ x) $ & rewrites to & -$ (f\ P\ r_1\ p_1\ p) \meet \Match\ (g\ P\ p_1\ p)\ (\Fsts\ S) $ \\ -$ \Istrue\ P\ r_1\ p_1\ C $ & rewrites to & -$ (\lall c \in W)\ \Istrue\p\ P\ r_1\ p_1\ c $ -% -\end{tabular} \end{center} -\end{footnotesize} - -For each clause ``\TT{isfalse} $ c_1, \cdots, c_n $'' the set $ P $ -must be replaced with -$ \{ (r_1, p, r_2) \in P \st \lnot (\Istrue\ P\ r_1\ p_1\ C) \} $ -(using the above notation). -Note that these substitutions and the former must be composed if necessary. - -If the \TT{inverse} flag is present, also replace the instances of $ P $ in -the rule $A$ and in the definition of $ \Src $ with -$ \{ (r_2, p, r_1) \st (r_1, p, r_2) \in P \} $. - -\end{itemize} - -The second group of \GP{query} expressions includes the context manipulation -facilities: - -\begin{itemize} - -\item -The operators for reading variables: - -\begin{footnotesize} \begin{center} -% -\irule{i \oft \GP{svar}}{}{(\g, i) \daq \get{\G_s}{i}} \spc -\irule{i \oft \GP{avar}}{}{(\g, i) \daq \{\get{\G_a}{i}\}} -% -\end{center} \end{footnotesize} - -$ \get{\G_s}{i} $ and $ \{\get{\G_a}{i}\} $ mean $ \ES $ if $ i $ is not defined. - -\item -The \TT{let} operator assigns an {\av} set variable (svar): - -\begin{footnotesize} -\begin{center} -% -\irule{i \oft \GP{svar} \spc (\G_1, x_1) \daq (\g, S_1) \spc - ((\set{\G_s}{i}{S_1}, \G_a, \G_g), x_2) \daq (\G_2, S_2)} -{}{(\G_1, \TT{let}\ i\ \TT{=}\ x_1\ \TT{in}\ x_2) \daq (\G_2, S_2)} -% -\end{center} -\end{footnotesize} - -The sequential composition operator \TT{;;} has the semantics of a \TT{let} -introducing a fresh variable, so ``$ x_1\ \TT{;;}\ x_2 $'' revrites -to ``$ \TT{let}\ i\ \TT{=}\ x_1\ \TT{in}\ x_2 $'' where $i$ does not occur in -$x_2$. - -\item -The \TT{ex} and ``dot'' operators provide a way to read the attributes stored -in avar's. - -The \TT{ex} (exists) operator gives access to the groups of attributes -associated to the {\av}'s in the $ \G_a $ part of the context and does -this by loading its $ \G_g $ part, which is used by the ``dot'' operator -described below. - -\TT{ex} is true if the query following it is successful for at least one -pool of attribute groups, one for each {\av} in the $ \G_a $ part of the -context. Formally we have the rules: - -\begin{footnotesize} -\begin{center} -% -\irule{(\lall \D_g \in \All\ \G_a)\ ((\G_s, \G_a, \G_g + \D_g), y) \daq \F} - {1}{(\G, \TT{ex}\ y) \daq \F} \spc -\irule{\Nop}{2}{(\G, \TT{ex}\ y) \daq \T} \spc -% -\end{center} -\begin{center} -% -\irule {i \oft \GP{avar} \spc p \oft \GP{path} \spc \get{\get{\G_g}{i}}{\Name\ p} = \{s_1, \cdots, s_m\}}{} - {(\G, i\TT{.}p) \daq \{(s_1, \ES), \cdots, (s_m, \ES)\}} -% -\end{center} -\end{footnotesize} - -where% -\footnote{$\D_g$ has the type of $ \G_g $.} -$ \All\ \G_a = \{\D_g \st \get{\D_g}{i} = G\ \RM{iff}\ G \in \Snd\ \get{\G_a}{i} \} $, -and $ \G = \g $. - -Moreover $ \get{\get{\G_g}{i}}{\Name\ p} $ means $ \ES $ -if $ i $ or $ \Name\ p $ are not defined where appropriate. - -Here the first rule has higher precedence than the second one does. - -\end{itemize} - -The third group of \GP{query} expressions includes the {\av} set manipulation -facilities: - -\begin{itemize} - -\item -The \TT{add} operator adds a given set of attribute groups to the {\av}'s -of an {\av} set using a union with or without attribute distribution -according to the \TT{distr} flag. - -\begin{footnotesize} -\begin{center} -% -\irule -{h \in \TT{["distr"]?} \spc a \in \GP{xgroups} \spc - (\G, \TT{[} ""\ \TT{attr}\ a \TT{]}) \daq \{("", A)\} \spc - (\G, x) \daq \{(r_1, A_1), \cdots, (r_m, A_m)\}}{} -{(\G, \TT{add}\ a\ \TT{in}\ x) \daq \{(r_1, A_1 \jolly A), \cdots, (r_m, A_m \jolly A)\}} -% -\end{center} -\begin{center} -% -\irule -{h \in \TT{["distr"]?} \spc i \in \GP{avar} \spc - (\g, x) \daq \{(r_1, A_1), \cdots, (r_m, A_m)\}}{} -{(\g, \TT{add}\ i\ \TT{in}\ x) \daq \{(r_1, A_1 \jolly \Snd\ \get{\G_a}{i}), \cdots, (r_m, A_m \jolly \Snd\ \get{\G_a}{i})\}} -% -\end{center} -\end{footnotesize} - -Where $ \jolly_{\tt""} = \sor $ and $ \jolly_{\tt"distr"} = \distr $. -Moreover $ \Snd\ \get{\G_a}{i} = \ES $ if $i$ is not defined. - -\item -The semantics of the \TT{for} operator is given in terms of the {\For} helper -function: - -\begin{footnotesize} -\begin{center} -% -\irule{i \oft \GP{avar} \spc (\G, x_1) \daq (\G_1, S_1) \spc h \in \TT{["sup"|"inf"]}} -{}{(\G, \TT{for}\ i\ \TT{in}\ x_1\ h\ x_2) \daq \For\ h\ \G_1\ i\ x_2\ S_1} \spc -\irule{i \oft \GP{avar} \spc x_2 \oft \GP{query}}{} - {\For\ h\ \G\ i\ x_2\ \ES\ \RM{rewrites to}\ (\G, \ES)} -% -\end{center} -\begin{center} -% -\irule{i \oft \GP{avar} \spc ((\G_s, \set{\G_a}{i}{R}, \G_g), x_2) \daq (\G_2, S_2)} - {}{\For\ h\ \G\ i\ x_2\ (S_1 \sdor \{R\})\ \RM{rewrites to}\ - (\G_2 ,(\Snd\ (\For\ h\ \G_2\ i\ x_2\ S_1)) \jolly_h S_2)} -% -\end{center} -\end{footnotesize} - -Here we have $ R \oft T_3 $, $ \G = \g $, $ \jolly_{\tt"sup"} = \sum $ and -$ \jolly_{\tt"inf"} = \prod $. - -$ \dprod $ and $ \prod $ are helper functions describing the two intersection -operations on {\av} sets: with and without attribute distribution respectively. -They are dual to $ \dsum $ and $ \sum $. $ \dprod $ does not appear in this -version of {\MathQL} but was used in the erlier versions -\cite{Lor02, GS03, Gui03}. - -\begin{footnotesize} -\begin{center} \begin{tabular}{lrll} -% -1a & -$ (S_1 \sdor \{(r, A_1)\}) \prod (S_2 \sdor \{(r, A_2)\}) $ & rewrites to & -$ (S_1 \prod S_2) \sor \{(r, A_1 \sor A_2)\} $ \\ -1b & $ S_1 \prod S_2 $ & rewrites to & $ \ES $ \\ -2a & -$ (S_1 \sdor \{(r, A_1)\}) \dprod (S_2 \sdor \{(r, A_2)\}) $ & rewrites to & -$ (S_1 \dprod S_2) \sor \{(r, A_1 \distr A_2)\} $ \\ -2b & $ S_1 \dprod S_2 $ & rewrites to & $ \ES $ -% -\end{tabular} \end{center} -\end{footnotesize} - -As for $ \sum $ and $ \dsum $, rules 1a, 2a override rules 1b, 2b respectively. - -\item -The semantics of the \TT{while} operator is given by the rules below: - -\begin{footnotesize} -\begin{center} -% -\irule{h \in \TT{["sup"|"inf"]} \spc (\G, x_1) \daq (\G_1, \ES)}{1} -{(\G, \TT{while}\ x_1\ h\ x_2) \daq (\G_1, \ES)} -% -\end{center} -\begin{center} -% -\irule -{h \in \TT{["sup"|"inf"]} \spc (\G, x_1) \daq (\G_1, S_1) \spc - (\G_1, x_2) \daq (\G_2, S_2) \spc - (\G_2, \TT{while}\ x_1\ h\ x_2) \daq (\G_3, S)}{2} -{(\G, \TT{while}\ x_1\ h\ x_2) \daq (\G_3, S_2 \jolly_h S)} -% -\end{center} -\end{footnotesize} - -Again $ \jolly_{\tt"sup"} = \sum $ and $ \jolly_{\tt"inf"} = \prod $. -Moreover rule 1 takes precedence over rule 2. - -\end{itemize} - -The forth group of \GP{query} constructions make {\MathQL} an extensible -language. - -\begin{itemize} - -\item -The ``function'' construction invokes an external function returning an {\av} -set. The function is identified by a \GP{path} and its arguments are a set of -\GP{path}'s and a set of \GP{query}'s. It is a mistake to invoke a function -with the wrong number of \GP{path}'s and \GP{query}'s as input (each -particular function defines these numbers independently). - -\begin{footnotesize} -\begin{center} -% -\irule -{p, p_1, \cdots, p_m \in \GP{path} \spc x_1, \cdots, x_n \in \GP{query}}{} -{(\G, p\ \{p_1 \TT{,} \cdots \TT{,} p_m\}\ \{x_1 \TT{,} \cdots \TT{,} x_m\}) - \daq \Fun\ p\ [p_1, \cdots, p_m]\ [x_1, \cdots, x_n]\ \G} -% -\end{center} -\end{footnotesize} - -where $ \Fun \oft \GP{path} \times (\Listof\ \GP{path}) \times (\Listof\ -\GP{query}) \times K \to T_4 $ is the primitive function performing the -low level invocation. -The core language does not include any external function and it is a mistake -to invoke an undefined function. - -\item -The \TT{gen} construction invokes an external function returning a \GP{query} -The function is identified by a \GP{path} and its arguments are a set of -\GP{query}'s. It is a mistake to invoke a function with the wrong number of -\GP{query}'s as input (each particular function defines this number -independently). - -\begin{footnotesize} -\begin{center} -% -\irule -{p \in \GP{path} \spc x_1, \cdots, x_n \in \GP{query} \spc - (\G, \Gen\ p\ [x_1, \cdots, x_n]\ \G) \daq (\G\p, S)}{} -{(\G, \TT{gen}\ p\ \{x_1 \TT{,} \cdots \TT{,} x_m\}) \daq (\G\p, S)} -% -\end{center} -\end{footnotesize} - -where $ \Gen \oft \GP{path} \times (\Listof\ \GP{query}) \times K \to -\GP{query} $ is the primitive function performing the low level invocation. -The core language does not include any external function of this kind and it -is a mistake to invoke an undefined function. - -The construction ``\TT{gen} p \TT{in} x'' rewrites to ``\TT{gen} p \{x\}'' -for the user's convenience. - -\end{itemize} - -\subsubsection {Results} - -An \GP{avs} expression ({\ie} the explicit representation of an {\av} set that -can denote a query result) is evaluated to an {\av} set according to the -following rules. - -\begin{footnotesize} -\begin{center} -% -\irule{x_1, \cdots, x_m \in \GP{av} \spc - x_1 \dar S_1 \spc \cdots \spc x_m \dar S_m}{} - {x_1 \TT{;} \cdots \TT{;} x_m \dar S_1 \sum \cdots \sum S_m} -% -\end{center} -\begin{center} -% -\irule{q \in \GP{string} \spc g_1, \cdots, g_m \in \GP{group} \spc - q\ \TT{attr}\ g_1 \dar S_1 \spc \cdots \spc - q\ \TT{attr}\ g_m \dar S_m}{} - {q\ \TT{attr}\ g_1 \TT{,} \cdots \TT{,} g_m \dar S_1 \sum \cdots \sum S_m} -% -\end{center} -\begin{center} -% -\irule{q \in \GP{string} \spc a_1, \cdots, a_m \in \GP{atr} \spc - q\ \TT{attr}\ \{ a_1 \} \dar S_1 \spc \cdots \spc - q\ \TT{attr}\ \{ a_m \} \dar S_m}{} - {(\G, q\ \TT{attr}\ \{ a_1 \TT{;} \cdots \TT{;} a_m \} \dar S_1 \dsum \cdots \dsum S_m} -% -\end{center} -\begin{center} -% -\irule{q, q_1, \cdots, q_m \in \GP{string} \spc p \in \GP{path}}{} - {q\ \TT{attr}\ \{ p = \{ q_1 \TT{,} \cdots \TT{,} q_m \} \} \dar - \{(\Unquote\ q, \{ \{ (\Name\ p, \{ \Unquote\ q_1, \cdots, \Unquote\ q_m \}) \} \})\}} -% -\end{center} -\end{footnotesize} - - -\subsection {The basic library} +\input{mathql_operational_background} +\input{mathql_operational_core} +\input{mathql_operational_library} diff --git a/helm/mathql/doc/mathql_operational_background.tex b/helm/mathql/doc/mathql_operational_background.tex new file mode 100644 index 000000000..0c831f812 --- /dev/null +++ b/helm/mathql/doc/mathql_operational_background.tex @@ -0,0 +1,149 @@ +\subsection {Mathematical background} + +As a mathematical background for the semantics, we take the one presented in +\cite{Gui03}. + +{\Str} denotes the type of strings and its elements are the finite sequences +of Unicode \cite{Unicode} characters. +Grammatical productions, represented as strings in angle brackets, denote the +subtype of {\Str} containing the produced sequences of characters. + +{\Num} denotes the type of numbers and is the subtype of {\Str} defined by the +regular expression: \TT{'0 - 9' [ '0 - 9' ]*}. +In this type, numbers are represented by their decimal expansion. + +$ \Setof\ Y $ denotes the type of finite sets ({\ie} unordered finite +sequences without repetitions) over $ Y $. +$ \Listof\ Y $ denotes the type of lists ({\ie} ordered finite sequences) +over $ Y $. +We will use the notation $ [y_1, \cdots, y_m] $ for the list whose elements +are $ y_1, \cdots, y_m $. + +{\Boole}, the type of Boolean values, is defined as +$ \{\ES, \{("", \ES)\}\} \oft \Setof\ \Setof\ (\Str \times \Setof\ Y) $ +where the first element stands for \emph{false} (denoted by {\F}) and the +second element stands for \emph{true} (denoted by {\T}). + +$ Y \times Z $ denotes the product of the types $ Y $ and $ Z $ whose elements +are the ordered pairs $ (y, z) $ such that $ y \oft Y $ and $ z \oft Z $. +The notation is also extended to a ternary product. + +$ Y \to Z $ denotes the type of functions from $ Y $ to $ Z $ and $ f\ y $ +denotes the application of $ f \oft Y \to Z $ to $ y \oft Y $. +Relations over types, such as equality, are seen as functions to {\Boole}. + +With the above constructors we can give a formal meaning to most of the +standard notation. For instance we will use the following: + +\begin{itemize} + +\item +$ {\ES} \oft (\Setof\ Y) $ + +\item +$ {\lex} \oft ((\Setof\ Y) \to \Boole) \to \Boole $ + +\item +$ {\lall} \oft ((\Setof\ Y) \to \Boole) \to \Boole $ + +\item +$ {\in} \oft Y \to (\Setof\ Y) \to \Boole $ (infix) + +\item +$ {\sub} \oft (\Setof\ Y) \to (\Setof\ Y) \to \Boole $ (infix) + +\item +$ {\meet} \oft (\Setof\ Y) \to (\Setof\ Y) \to \Boole $ (infix) + +%\item +$ {\sand} \oft (\Setof\ Y) \to (\Setof\ Y) \to (\Setof\ Y) $ (infix) + +\item +$ {\sor} \oft (\Setof\ Y) \to (\Setof\ Y) \to (\Setof\ Y) $ (infix) + +\item +$ {\sdor} \oft (\Setof\ Y) \to (\Setof\ Y) \to (\Setof\ Y) $ +(the disjoint union, infix) + +\item +$ \le \oft \Num \to \Num \to \Boole $ (infix) + +\item +$ < \oft \Num \to \Num \to \Boole $ (infix) + +\item +$ \# \oft (\Setof\ Y) \to \Num $ (the size operator) + +\item +$ \app \oft (\Listof\ Y) \to (\Listof\ Y) \to (\Listof\ Y) $ +(the concatenation, infix) + +\item +$ \lnot \oft \Boole \to \Boole $ + +\end{itemize} + +\noindent +Note that $ \lall $ and $ \lex $ are always decidable because the sets are +finite by definition. + +\noindent +$ U \meet W $ means $ (\lex u \in U)\ u \in W $ and expresses the fact that +$ U \sand W $ is inhabited as a primitive notion, {\ie} without mentioning +intersection and equality as for $ U \sand W \neq \ES $, which is equivalent +but may be implemented less efficiently in real cases% +\footnote{As for the Boolean condition $ \a \lor \b $ which may have a more +efficient implementation than $ \lnot(\lnot \a \land \lnot \b) $.}. + +$ U \meet W $ is a natural companion of $ U \sub W $ being its logical dual +(recall that $ U \sub W $ means $ (\lall u \in U)\ u \in W $) +and is already being used successfully in the context of a constructive +({\ie} intuitionistic and predicative) approach to point-free topology +\cite{Sam00}. + +Sets of couples play a central role in our formalization and in particular we +will use: + +\begin{itemize} + +\item +$ \Fst \oft (Y \times Z) \to Y $ such that $ \Fst\ (y, z) = y $. + +\item +$ \Snd \oft (Y \times Z) \to Z $ such that $ \Snd\ (y, z) = z $. + +\item +$ \Fsts \oft \Setof\ (Y \times Z) \to \Setof\ Z $ such that +$ \Fsts\ U = \{\Fst\ u \st u \in U\} $. + +\item +With the same notation, if $ W $ contains just one couple whose first +component is $ y $, then $ \get{W}{y} $ is the second component of that couple. +In the other cases $ \get{W}{y} $ is not defined. +This operator has type $ (\Setof\ (Y \times Z)) \to Y \to Z $. + +\item +Moreover $ \set{W}{y}{z} $ is the set obtained from $ W $ removing every +couple whose first component is $ y $ and adding the couple $ (y, z) $. +The type of this operator is \\ +$ (\Setof\ (Y \times Z)) \to Y \to Z \to (\Setof\ (Y \times Z)) $. + +\item +Also $ U + W $ is the union of two sets of couples in the following sense: + +\begin{footnotesize} +\begin{center} \begin{tabular}{rll} +% +$ U + \ES $ & rewrites to & $ U $ \\ +$ U + (W \sdor \{(y, z)\}) $ & rewrites to & $ \set{U}{y}{z} + W $ +% +\end{tabular} \end{center} +\end{footnotesize} + +\end{itemize} + +The last three operators are used to read, write and join association sets, +which are sets of couples such that the first components of two different +elements are always different. +These sets will be exploited to formalize the memories appearing in evaluation +contexts. diff --git a/helm/mathql/doc/mathql_operational_core.tex b/helm/mathql/doc/mathql_operational_core.tex new file mode 100644 index 000000000..f9471292b --- /dev/null +++ b/helm/mathql/doc/mathql_operational_core.tex @@ -0,0 +1,549 @@ +\subsection {The core language} + +\subsubsection*{Preliminaries} + +Wih the above background we are able to type the main objects needed in the +formalization: + +\begin{itemize} + +\item +A path $ s $ is a list of strings therefore its type is +$ T_{0a} = \Listof\ \Str $. + +\item +A multiple string value $ V $ is an object of type $ T_{0b} = \Setof\ \Str $. + +\item +A attribute group $ G $ is an association set connecting the attribute names +to their values, therefore its type is +$ T_1 = \Setof\ (T_{0a} \times T_{0b}) $. + +\item +A subject string $ r $ is an object of type $ \Str $. + +\item +A set $ A $ of attribute groups is an object of type $ T_2 = \Setof\ T_1 $. + +\item +An {\av} is a subject string with its attribute groups, so its type is +$ T_3 = \Str \times T_2 $. + +\item +A set $ S $ of {\av}'s is an association set of type $ T_4 = \Setof\ T_3 $. + +\item +A triple of an attributed relation is of type +$ T_5 = \Str \times \Str \times (T_{0a} \to \Str) $. + +\end{itemize} + +When a constant string appearing in a {\MathQL} expression is unquoted, the +surrounding double quotes are deleted and each escaped sequence is translated +according \figref{EscTS}. + +This operation is formally performed by the function +$ \Unquote $ of type $ \Str \to \Str $. +Moreover $ \Name \oft \GP{path} \to T_{0a} $ is a helper function that +converts a linearized path in its structured representation. +Formally $ \Name\ (\TT{/}q_1\TT{/} \cdots \TT{/}q_m) $ +rewrites to $ [\Unquote\ q_1, \cdots, \Unquote\ q_m] $. + +The semantics of {\MathQL} expressions denoting queries is given by the infix +relation $ \daq $ that evaluates a query to an {\av} set. +These expressions are evaluated in a context $ \G = \g $ +which is a triple of association sets that connect +svar's to {\av} sets, avar's to {\av}'s and avar's to attribute groups. +Therefore the type $ K $ of the context $ \G $ is: + +\begin{footnotesize} \begin{center} +$ +\Setof\ (\GP{svar} \times T_4) \times +\Setof\ (\GP{avar} \times T_3)\ \times % $ \\ $ \times\ +\Setof\ (\GP{avar} \times T_1) +$ +\end{center} \end{footnotesize} + +\noindent +and the evaluating relation is of the following type: + +\begin{footnotesize} +\begin{center} \begin{tabular}{l} +$ \mathord{\daq} \oft (K \times \GP{query}) \times (K \times T_4) \to \Boole $. +\end{tabular} \end{center} +\end{footnotesize} + +The context components $ \G_s $ and $ \G_a $ are used to store the contents of +variables, while $ \G_g $ is used by the \TT{ex} operator to be presented +below. + +In the following we will write $ (\G, x) \daq S $ to abbreviate +$ (\G, x) \daq (\G, S) $. + +The semantics of {\MathQL} expressions denoting results is given by the infix +relation $ \dar \oft \GP{avs} \times T_4 \to \Boole $ that evaluates a result +to an {\av} set. + +\subsubsection*{Queries} + +The first \GP{query} expressions include explicit {\av} sets and syntactic +grouping: + +\begin{itemize} + +\item +The syntactic grouping is obtained enclosing a \GP{query} between \TT{(} +and \TT{)}. +An explicit {\av} set can be represented by a single string, which is +converted into a single {\av} with no attributes, or by a \GP{xavs} +(extended {\av} set) expression enclosed between \TT{[} and \TT{]}. +Such an expression describes all the components of an {\av} set and is +evaluated using the rules given below. + +\begin{footnotesize} +\begin{center} +% +\irule{(\G, x) \daq S}{}{(\G, (x)) \daq S} \spc +\irule{q \oft \GP{string}}{}{(\G, q) \daq \{(\Unquote\ q, \ES)\}} +% +\end{center} +\begin{center} +% +\irule{x_1, \cdots, x_m \in \GP{xav} \spc + (\G, TT{[} x_1 \TT{]}) \daq S_1 \spc \cdots \spc (\G, \TT{[} x_m \TT{]}) \daq S_m}{} + {(\G, \TT{[} x_1 \TT{;} \cdots \TT{;} x_m \TT{]}) \daq S_1 \sum \cdots \sum S_m} +% +\end{center} +\begin{center} +% +\irule{q \in \GP{string} \spc g_1, \cdots, g_m \in \GP{xgroup} \spc + (\G, \TT{[} q\ \TT{attr}\ g_1 \TT{]}) \daq S_1 \spc \cdots \spc + (\G, \TT{[} q\ \TT{attr}\ g_m \TT{]}) \daq S_m}{} + {(\G, \TT{[} q\ \TT{attr}\ g_1 \TT{,} \cdots \TT{,} g_m \TT{]}) \daq S_1 \sum \cdots \sum S_m} +% +\end{center} +\begin{center} +% +\irule{q \in \GP{string} \spc a_1, \cdots, a_m \in \GP{xatr} \spc + (\G, \TT{[} q\ \TT{attr}\ \{ a_1 \} \TT{]}) \daq S_1 \spc \cdots \spc + (\G, \TT{[} q\ \TT{attr}\ \{ a_m \} \TT{]}) \daq S_m}{} + {(\G, \TT{[} q\ \TT{attr}\ \{ a_1 \TT{;} \cdots \TT{;} a_m \} \TT{]}) \daq S_1 \dsum \cdots \dsum S_m} +% +\end{center} +\begin{center} +% +\irule{q \in \GP{string} \spc p \in \GP{path} \spc x \daq S}{} + {(\G, \TT{[} q\ \TT{attr}\ \{ p = x \} \TT{]}) \daq + \{(\Unquote\ q, \{ \{ (\Name\ p, \Fsts\ S) \} \})\}} +% +\end{center} +\end{footnotesize} + +$ \dsum $ and $ \sum $ are helper functions describing the two union operations +on {\av} sets: with and without attribute distribution respectively. +$ \dsum $ and $ \sum $ have two rewrite rules each. + +\begin{footnotesize} +\begin{center} \begin{tabular}{lrll} +% +1a & +$ (S_1 \sdor \{(r, A_1)\}) \sum (S_2 \sdor \{(r, A_2)\}) $ & rewrites to & +$ (S_1 \sum S_2) \sor \{(r, A_1 \sor A_2)\} $ \\ +1b & $ S_1 \sum S_2 $ & rewrites to & $ S_1 \sor S_2 $ \\ +2a & +$ (S_1 \sdor \{(r, A_1)\}) \dsum (S_2 \sdor \{(r, A_2)\}) $ & rewrites to & +$ (S_1 \dsum S_2) \sor \{(r, A_1 \distr A_2)\} $ \\ +2b & $ S_1 \dsum S_2 $ & rewrites to & $ S_1 \sor S_2 $ +% +\end{tabular} \end{center} +\end{footnotesize} + +Rules 1a, 2a override 1b, 2b respectively and +$ A_1 \distr A_2 = \{G_1 \sum G_2 \st G_1 \in A_1, G_2 \in A_2\} $. + +\item +The semantics of \TT{property} operator is described below. + +In the following rule, +$s$ is ``$ \TT{property}\ h\ p_1\ \TT{main}\ p_2\ \RM{attr}\ e_1, \cdots, +e_m\ \TT{in}\ k\ x $'', $P$ is $ \Property\ h $ and +$A_2$ is $ \{ \Exp\ P\ p_1\ r_1\ \{e_1, \cdots, e_m\}\} $: + +\begin{footnotesize} +\begin{center} +% +\irule +{h \oft \GP{refine} \spc p_1, p_2 \oft \GP{path} \spc + e_1, \cdots, e_m \oft \GP{exp} \spc k \in \TT{["pattern"]?} \spc + (\G, x) \daq S +}{A} +{(\G, s) \daq \bigsum \{ \{(r_2, A_2)\} \st +(\lex r_1 \in \Src\ k\ P\ (\Fsts\ S))\ +(r_1, p_1 \app p_2, r_2) \in P +\}} +% +\end{center} +\end{footnotesize} + +When the \TT{main} clause is not present, we assume $ p_2 = \TT{/} $. + +Here $ \Property\ h $ gives the appropriate access relation according to +the $ h $ flag (this is the primitive function that inspects the {\RDF} graph, +see \subsecref{HighAccess}). + +$ \Src\ k\ P\ V $ is a helper function giving the source set +according to the $ k $ flag. $ \Src $ is based on $ \Match $, the helper +function handling POSIX regular expressions. Formally: + +\begin{footnotesize} +\begin{center} \begin{tabular}{rll} +% +$ \Src\ \TT{""}\ P\ V $ & rewrites to & $ V $ \\ +$ \Src\ \TT{"pattern"}\ P\ V $ & rewrites to & +$ \Match\ \{r_1 \st (\lex p, r2)\ (r_1, p, r_2) \in P\} $\ V \\ +$ \Match\ W\ V $ & rewrites to & $ \bigsor \{\Pattern\ W\ s \st s \in V\} $ +% +\end{tabular} \end{center} +\end{footnotesize} + +Here $ \Pattern\ W\ s $ is the primitive function returning the subset of +$ W \oft \Setof\ \Str $ whose element match the POSIX 1003.2-1992% +\footnote{Included in POSIX 1003.1-2001: +\CURI{http://www.unix-systems.org/version3/ieee\_\,std.html}.} +regular expression $ \verb+"^"+ \app s \app \TT{"\$"} $. + +$ \Exp\ P\ \p_1\ r_1\ E $ is the helper function that builds the group of +attributes specified in the \TT{attr} clause. +$ \Exp $ is based on $ \Exp\p $ which handles a single attribute. Formally: + +\begin{footnotesize} +\begin{center} \begin{tabular}{rlll} +% +$ f\ P\ r_1\ p_1\ p $ & rewrites to & +$ \{ r_2 \st (r_1, p_1 \app (\Name\ p), r_2) \in P \} $ & +with $ p \oft \GP{path} $ \\ +$ \Exp\p\ P\ r_1\ p_1\ p $ & rewrites to & +$ \{ (\Name\ p, f\ P\ r_1\ p_1\ p) \} $ & +with $ p \oft \GP{path} $ \\ +$ \Exp\p\ P\ r_1\ p_1\ (p\ \TT{as}\ p\p) $ & rewrites to & +$ \{ (\Name\ p\p, f\ P\ r_1\ p_1\ p) \} $ & +with $ p, p\p \oft \GP{path} $ \\ +$ \Exp\ P\ r_1\ p_1\ E $ & rewrites to & +$ \bigsum \{ \Exp\p\ P\ r_1\ p_1\ e \st e \in E \} $ & +with $ E \oft \Setof\ \GP{exp} $ +\end{tabular} \end{center} +\end{footnotesize} + +When $ c_1, \cdots, c_n \oft \GP{cons} $ and the clause +``\TT{istrue} $ c_1, \cdots, c_n $'' is present, the set $ P $ must be replaced +with $ \{ (r_1, p, r_2) \in P \st \Istrue\ P\ r_1\ p_1\ C \} $ +where $ C $ is $ \{c_1, \cdots, c_n\} $ and $ \Istrue $ is a helper function +that checks the constraints in $ C $. +$ \Istrue $ is based on $ \Istrue\p $ that handles a single constraint. +Formally, if $ p \oft \GP{path} $ and $ (\G, x) \daq S $: + +\begin{footnotesize} +\begin{center} \begin{tabular}{rll} +% +$ g\ P\ p_1\ p $ & rewrites to & +$ \{ r_2 \st (\lex r_1)\ (r_1, p_1 \app (\Name\ p), r_2) \in P \} $ \\ +$ \Istrue\p\ P\ r_1\ p_1\ (p\ \TT{in}\ x) $ & rewrites to & +$ (f\ P\ r_1\ p_1\ p) \meet \Fsts\ S $ \\ +$ \Istrue\p\ P\ r_1\ p_1\ (p\ \TT{match}\ x) $ & rewrites to & +$ (f\ P\ r_1\ p_1\ p) \meet \Match\ (g\ P\ p_1\ p)\ (\Fsts\ S) $ \\ +$ \Istrue\ P\ r_1\ p_1\ C $ & rewrites to & +$ (\lall c \in W)\ \Istrue\p\ P\ r_1\ p_1\ c $ +% +\end{tabular} \end{center} +\end{footnotesize} + +For each clause ``\TT{isfalse} $ c_1, \cdots, c_n $'' the set $ P $ +must be replaced with +$ \{ (r_1, p, r_2) \in P \st \lnot (\Istrue\ P\ r_1\ p_1\ C) \} $ +(using the above notation). +Note that these substitutions and the former must be composed if necessary. + +If the \TT{inverse} flag is present, also replace the instances of $ P $ in +the rule $A$ and in the definition of $ \Src $ with +$ \{ (r_2, p, r_1) \st (r_1, p, r_2) \in P \} $. + +\end{itemize} + +The second group of \GP{query} expressions includes the context manipulation +facilities: + +\begin{itemize} + +\item +The operators for reading variables: + +\begin{footnotesize} \begin{center} +% +\irule{i \oft \GP{svar}}{}{(\g, i) \daq \get{\G_s}{i}} \spc +\irule{i \oft \GP{avar}}{}{(\g, i) \daq \{\get{\G_a}{i}\}} +% +\end{center} \end{footnotesize} + +$ \get{\G_s}{i} $ and $ \{\get{\G_a}{i}\} $ mean $ \ES $ if $ i $ is not defined. + +\item +The \TT{let} operator assigns an {\av} set variable (svar): + +\begin{footnotesize} +\begin{center} +% +\irule{i \oft \GP{svar} \spc (\G_1, x_1) \daq (\g, S_1) \spc + ((\set{\G_s}{i}{S_1}, \G_a, \G_g), x_2) \daq (\G_2, S_2)} +{}{(\G_1, \TT{let}\ i\ \TT{=}\ x_1\ \TT{in}\ x_2) \daq (\G_2, S_2)} +% +\end{center} +\end{footnotesize} + +The sequential composition operator \TT{;;} has the semantics of a \TT{let} +introducing a fresh variable, so ``$ x_1\ \TT{;;}\ x_2 $'' revrites +to ``$ \TT{let}\ i\ \TT{=}\ x_1\ \TT{in}\ x_2 $'' where $i$ does not occur in +$x_2$. + +\item +The \TT{ex} and ``dot'' operators provide a way to read the attributes stored +in avar's. + +The \TT{ex} (exists) operator gives access to the groups of attributes +associated to the {\av}'s in the $ \G_a $ part of the context and does +this by loading its $ \G_g $ part, which is used by the ``dot'' operator +described below. + +\TT{ex} is true if the query following it is successful for at least one +pool of attribute groups, one for each {\av} in the $ \G_a $ part of the +context. Formally we have the rules: + +\begin{footnotesize} +\begin{center} +% +\irule{(\lall \D_g \in \All\ \G_a)\ ((\G_s, \G_a, \G_g + \D_g), y) \daq \F} + {1}{(\G, \TT{ex}\ y) \daq \F} \spc +\irule{\Nop}{2}{(\G, \TT{ex}\ y) \daq \T} \spc +% +\end{center} +\begin{center} +% +\irule {i \oft \GP{avar} \spc p \oft \GP{path} \spc \get{\get{\G_g}{i}}{\Name\ p} = \{s_1, \cdots, s_m\}}{} + {(\G, i\TT{.}p) \daq \{(s_1, \ES), \cdots, (s_m, \ES)\}} +% +\end{center} +\end{footnotesize} + +where% +\footnote{$\D_g$ has the type of $ \G_g $.} +$ \All\ \G_a = \{\D_g \st \get{\D_g}{i} = G\ \RM{iff}\ G \in \Snd\ \get{\G_a}{i} \} $, +and $ \G = \g $. + +Moreover $ \get{\get{\G_g}{i}}{\Name\ p} $ means $ \ES $ +if $ i $ or $ \Name\ p $ are not defined where appropriate. + +Here the first rule has higher precedence than the second one does. + +\end{itemize} + +The third group of \GP{query} expressions includes the {\av} set manipulation +facilities: + +\begin{itemize} + +\item +The \TT{add} operator adds a given set of attribute groups to the {\av}'s +of an {\av} set using a union with or without attribute distribution +according to the \TT{distr} flag. + +\begin{footnotesize} +\begin{center} +% +\irule +{h \in \TT{["distr"]?} \spc a \in \GP{xgroups} \spc + (\G, \TT{[} ""\ \TT{attr}\ a \TT{]}) \daq \{("", A)\} \spc + (\G, x) \daq \{(r_1, A_1), \cdots, (r_m, A_m)\}}{} +{(\G, \TT{add}\ a\ \TT{in}\ x) \daq \{(r_1, A_1 \jolly A), \cdots, (r_m, A_m \jolly A)\}} +% +\end{center} +\begin{center} +% +\irule +{h \in \TT{["distr"]?} \spc i \in \GP{avar} \spc + (\g, x) \daq \{(r_1, A_1), \cdots, (r_m, A_m)\}}{} +{(\g, \TT{add}\ i\ \TT{in}\ x) \daq \{(r_1, A_1 \jolly \Snd\ \get{\G_a}{i}), \cdots, (r_m, A_m \jolly \Snd\ \get{\G_a}{i})\}} +% +\end{center} +\end{footnotesize} + +Where $ \jolly_{\tt""} = \sor $ and $ \jolly_{\tt"distr"} = \distr $. +Moreover $ \Snd\ \get{\G_a}{i} = \ES $ if $i$ is not defined. + +\item +The semantics of the \TT{for} operator is given in terms of the {\For} helper +function: + +\begin{footnotesize} +\begin{center} +% +\irule{i \oft \GP{avar} \spc (\G, x_1) \daq (\G_1, S_1) \spc h \in \TT{["sup"|"inf"]}} +{}{(\G, \TT{for}\ i\ \TT{in}\ x_1\ h\ x_2) \daq \For\ h\ \G_1\ i\ x_2\ S_1} \spc +\irule{i \oft \GP{avar} \spc x_2 \oft \GP{query}}{} + {\For\ h\ \G\ i\ x_2\ \ES\ \RM{rewrites to}\ (\G, \ES)} +% +\end{center} +\begin{center} +% +\irule{i \oft \GP{avar} \spc ((\G_s, \set{\G_a}{i}{R}, \G_g), x_2) \daq (\G_2, S_2)} + {}{\For\ h\ \G\ i\ x_2\ (S_1 \sdor \{R\})\ \RM{rewrites to}\ + (\G_2 ,(\Snd\ (\For\ h\ \G_2\ i\ x_2\ S_1)) \jolly_h S_2)} +% +\end{center} +\end{footnotesize} + +Here we have $ R \oft T_3 $, $ \G = \g $, $ \jolly_{\tt"sup"} = \sum $ and +$ \jolly_{\tt"inf"} = \prod $. + +$ \dprod $ and $ \prod $ are helper functions describing the two intersection +operations on {\av} sets: with and without attribute distribution respectively. +They are dual to $ \dsum $ and $ \sum $. $ \dprod $ does not appear in this +version of {\MathQL} but was used in the erlier versions +\cite{Lor02, GS03, Gui03}. + +\begin{footnotesize} +\begin{center} \begin{tabular}{lrll} +% +1a & +$ (S_1 \sdor \{(r, A_1)\}) \prod (S_2 \sdor \{(r, A_2)\}) $ & rewrites to & +$ (S_1 \prod S_2) \sor \{(r, A_1 \sor A_2)\} $ \\ +1b & $ S_1 \prod S_2 $ & rewrites to & $ \ES $ \\ +2a & +$ (S_1 \sdor \{(r, A_1)\}) \dprod (S_2 \sdor \{(r, A_2)\}) $ & rewrites to & +$ (S_1 \dprod S_2) \sor \{(r, A_1 \distr A_2)\} $ \\ +2b & $ S_1 \dprod S_2 $ & rewrites to & $ \ES $ +% +\end{tabular} \end{center} +\end{footnotesize} + +As for $ \sum $ and $ \dsum $, rules 1a, 2a override rules 1b, 2b respectively. + +\item +The semantics of the \TT{while} operator is given by the rules below: + +\begin{footnotesize} +\begin{center} +% +\irule{h \in \TT{["sup"|"inf"]} \spc (\G, x_1) \daq (\G_1, \ES)}{1} +{(\G, \TT{while}\ x_1\ h\ x_2) \daq (\G_1, \ES)} +% +\end{center} +\begin{center} +% +\irule +{h \in \TT{["sup"|"inf"]} \spc (\G, x_1) \daq (\G_1, S_1) \spc + (\G_1, x_2) \daq (\G_2, S_2) \spc + (\G_2, \TT{while}\ x_1\ h\ x_2) \daq (\G_3, S)}{2} +{(\G, \TT{while}\ x_1\ h\ x_2) \daq (\G_3, S_2 \jolly_h S)} +% +\end{center} +\end{footnotesize} + +Again $ \jolly_{\tt"sup"} = \sum $ and $ \jolly_{\tt"inf"} = \prod $. +Moreover rule 1 takes precedence over rule 2. + +\end{itemize} + +The forth group of \GP{query} constructions make {\MathQL} an extensible +language. + +\begin{itemize} + +\item +The ``function'' construction invokes an external function returning an {\av} +set. The function is identified by a \GP{path} and its arguments are a set of +\GP{path}'s and a set of \GP{query}'s. It is a mistake to invoke a function +with the wrong number of \GP{path}'s and \GP{query}'s as input (each +particular function defines these numbers independently). + +\begin{footnotesize} +\begin{center} +% +\irule +{p, p_1, \cdots, p_m \in \GP{path} \spc x_1, \cdots, x_n \in \GP{query}}{} +{(\G, p\ \{p_1 \TT{,} \cdots \TT{,} p_m\}\ \{x_1 \TT{,} \cdots \TT{,} x_m\}) + \daq \Fun\ p\ [p_1, \cdots, p_m]\ [x_1, \cdots, x_n]\ \G} +% +\end{center} +\end{footnotesize} + +where $ \Fun \oft \GP{path} \times (\Listof\ \GP{path}) \times (\Listof\ +\GP{query}) \times K \to T_4 $ is the primitive function performing the +low level invocation. +The core language does not include any external function and it is a mistake +to invoke an undefined function. + +\item +The \TT{gen} construction invokes an external function returning a \GP{query} +The function is identified by a \GP{path} and its arguments are a set of +\GP{query}'s. It is a mistake to invoke a function with the wrong number of +\GP{query}'s as input (each particular function defines this number +independently). + +\begin{footnotesize} +\begin{center} +% +\irule +{p \in \GP{path} \spc x_1, \cdots, x_n \in \GP{query} \spc + (\G, \Gen\ p\ [x_1, \cdots, x_n]\ \G) \daq (\G\p, S)}{} +{(\G, \TT{gen}\ p\ \{x_1 \TT{,} \cdots \TT{,} x_m\}) \daq (\G\p, S)} +% +\end{center} +\end{footnotesize} + +where $ \Gen \oft \GP{path} \times (\Listof\ \GP{query}) \times K \to +\GP{query} $ is the primitive function performing the low level invocation. +The core language does not include any external function of this kind and it +is a mistake to invoke an undefined function. + +The construction ``\TT{gen} p \TT{in} x'' rewrites to ``\TT{gen} p \{x\}'' +for the user's convenience. + +\end{itemize} + +\subsubsection*{Results} + +An \GP{avs} expression ({\ie} the explicit representation of an {\av} set that +can denote a query result) is evaluated to an {\av} set according to the +following rules. + +\begin{footnotesize} +\begin{center} +% +\irule{x_1, \cdots, x_m \in \GP{av} \spc + x_1 \dar S_1 \spc \cdots \spc x_m \dar S_m}{} + {x_1 \TT{;} \cdots \TT{;} x_m \dar S_1 \sum \cdots \sum S_m} +% +\end{center} +\begin{center} +% +\irule{q \in \GP{string} \spc g_1, \cdots, g_m \in \GP{group} \spc + q\ \TT{attr}\ g_1 \dar S_1 \spc \cdots \spc + q\ \TT{attr}\ g_m \dar S_m}{} + {q\ \TT{attr}\ g_1 \TT{,} \cdots \TT{,} g_m \dar S_1 \sum \cdots \sum S_m} +% +\end{center} +\begin{center} +% +\irule{q \in \GP{string} \spc a_1, \cdots, a_m \in \GP{atr} \spc + q\ \TT{attr}\ \{ a_1 \} \dar S_1 \spc \cdots \spc + q\ \TT{attr}\ \{ a_m \} \dar S_m}{} + {(\G, q\ \TT{attr}\ \{ a_1 \TT{;} \cdots \TT{;} a_m \} \dar S_1 \dsum \cdots \dsum S_m} +% +\end{center} +\begin{center} +% +\irule{q, q_1, \cdots, q_m \in \GP{string} \spc p \in \GP{path}}{} + {q\ \TT{attr}\ \{ p = \{ q_1 \TT{,} \cdots \TT{,} q_m \} \} \dar + \{(\Unquote\ q, \{ \{ (\Name\ p, \{ \Unquote\ q_1, \cdots, \Unquote\ q_m \}) \} \})\}} +% +\end{center} +\end{footnotesize} diff --git a/helm/mathql/doc/mathql_operational_library.tex b/helm/mathql/doc/mathql_operational_library.tex new file mode 100644 index 000000000..d5c782872 --- /dev/null +++ b/helm/mathql/doc/mathql_operational_library.tex @@ -0,0 +1 @@ +\subsection {The basic library} diff --git a/helm/mathql/doc/mathql_overview.tex b/helm/mathql/doc/mathql_overview.tex index 761b688cb..6d59b55fa 100644 --- a/helm/mathql/doc/mathql_overview.tex +++ b/helm/mathql/doc/mathql_overview.tex @@ -1,10 +1,10 @@ \section{Overview} {\MathQL}% -\footnote{See \URI{http://helm.cs.unibo.it/mathql}.} +\footnote{See \CURI{http://helm.cs.unibo.it/mathql}.} is a query language for {\RDF} \cite{RDF,RDFS} databases, developed in the context of the {\HELM}% -\footnote{See \URI{http://helm.cs.unibo.it}.} +\footnote{See \CURI{http://helm.cs.unibo.it}.} project \cite{APSCGS03}. Its name suggests that it is supposed to be the first of a group of query languages for retrieving information from distributed digital libraries of @@ -86,7 +86,7 @@ for its query results. \subsubsection*{Post-processing and code generation capabilities} The {\MathQL} query engine, that is written in {\CAML}% -\footnote{See \URI{http://caml.inria.fr}.} +\footnote{See \CURI{http://caml.inria.fr}.} for an easy integration with the {\HELM} software, provides two ways of processing the query results: at {\CAML} side and natively. @@ -133,10 +133,10 @@ However the engine does make few assumptions on the way metadata are physically organized and needs some user-provided knowledge about the concrete metadata representation. Metadata stored as {\RDF} triples are accessed through a {\MySQL}% -\footnote{See \URI{http://www.mysql.com}.} +\footnote{See \CURI{http://www.mysql.com}.} or a {\PostgreSQL}% -\footnote{See \URI{http://www.postgresql.org}.} +\footnote{See \CURI{http://www.postgresql.org}.} engine, while metadata stored as {\RDF}/{\XML} files are accessed through a {\Galax}% -\footnote{See \URI{http://db.bell-labs.com/galax/}.} +\footnote{See \CURI{http://db.bell-labs.com/galax/}.} {\XQuery} \cite{XQuery} engine. -- 2.39.2