ocaml 3.09 transition

[helm.git] / helm / papers / matita / matita.tex
diff --git a/helm/papers/matita/matita.tex b/helm/papers/matita/matita.tex

index 506d43167596abe2df3fce44b305ccca2ee7412c..d9a7a525f572972bb90800a547c1d25635a2a146 100644 (file)
--- a/helm/papers/matita/matita.tex
+++ b/helm/papers/matita/matita.tex
@@ -1,12 +1,12 @@
-\documentclass[a4paper]{llncs}
-\pagestyle{headings}
+\documentclass{kluwer}
  \usepackage{color}
  \usepackage{graphicx}
-\usepackage{amssymb,amsmath}
+% \usepackage{amssymb,amsmath}
  \usepackage{hyperref}
-\usepackage{picins}
+% \usepackage{picins}
  \usepackage{color}
  \usepackage{fancyvrb}
+\usepackage[show]{ed}
  
  \definecolor{gray}{gray}{0.85}
  %\newcommand{\logo}[3]{
@@ -18,10 +18,10 @@
  \newcommand{\ELIM}{\textsc{Elim}}
  \newcommand{\HELM}{Helm}
  \newcommand{\HINT}{\textsc{Hint}}
-\newcommand{\IN}{\ensuremath{\mathbb{N}}}
+\newcommand{\IN}{\ensuremath{\dN}}
  \newcommand{\INSTANCE}{\textsc{Instance}}
-\newcommand{\IR}{\ensuremath{\mathbb{R}}}
-\newcommand{\IZ}{\ensuremath{\mathbb{Z}}}
+\newcommand{\IR}{\ensuremath{\dR}}
+\newcommand{\IZ}{\ensuremath{\dZ}}
  \newcommand{\LIBXSLT}{LibXSLT}
  \newcommand{\LOCATE}{\textsc{Locate}}
  \newcommand{\MATCH}{\textsc{Match}}
@@ -37,6 +37,16 @@
  \newcommand{\TEXMACRO}[1]{\texttt{\char92 #1}}
  \newcommand{\UWOBO}{UWOBO}
  \newcommand{\WHELP}{Whelp}
+\newcommand{\DOT}{\ensuremath{\mbox{\textbf{.}}}}
+\newcommand{\SEMICOLON}{\ensuremath{\mbox{\textbf{;}}}}
+\newcommand{\BRANCH}{\ensuremath{\mbox{\textbf{[}}}}
+\newcommand{\SHIFT}{\ensuremath{\mbox{\textbf{\textbar}}}}
+\newcommand{\POS}[1]{\ensuremath{#1\mbox{\textbf{:}}}}
+\newcommand{\MERGE}{\ensuremath{\mbox{\textbf{]}}}}
+\newcommand{\FOCUS}[1]{\ensuremath{\mathtt{focus}~#1}}
+\newcommand{\UNFOCUS}{\ensuremath{\mathtt{unfocus}}}
+\newcommand{\SKIP}{\MATHTT{skip}}
+\newcommand{\TACTIC}[1]{\ensuremath{\mathtt{tactic}~#1}}
  
  \definecolor{gray}{gray}{0.85} % 1 -> white; 0 -> black
  \newcommand{\NT}[1]{\langle\mathit{#1}\rangle}
@@ -50,9 +60,13 @@
     \fcolorbox{black}{gray}{\BUseVerbatim[boxwidth=0.9\linewidth]{boxtmp}}
    \end{center}}
  
+\newcounter{example}
+\newenvironment{example}{\stepcounter{example}\vspace{0.5em}\noindent\emph{Example} \arabic{example}.}
+ {}
  \newcommand{\ASSIGNEDTO}[1]{\textbf{Assigned to:} #1}
  \newcommand{\FILE}[1]{\texttt{#1}}
-\newcommand{\NOTE}[1]{\marginpar{\scriptsize #1}}
+% \newcommand{\NOTE}[1]{\ifodd \arabic{page} \else \hspace{-2cm}\fi\ednote{#1}}
+\newcommand{\NOTE}[1]{\ednote{#1}{foo}}
  \newcommand{\TODO}[1]{\textbf{TODO: #1}}
  
  \newsavebox{\tmpxyz}
@@ -60,27 +74,46 @@
    \savebox{\tmpxyz}[0.9\linewidth]{
      \begin{minipage}{0.9\linewidth}
        \ensuremath{#1} \\
-      --------------------------\\ % come si fa una linea orizzontale?
+      \rule{3cm}{0.03cm}\\
        \ensuremath{#2}
      \end{minipage}}\setlength{\fboxsep}{3mm}%
    \begin{center}
     \fcolorbox{black}{gray}{\usebox{\tmpxyz}}
    \end{center}}
  
-\title{The Matita proof assistant}
-\author{Andrea Asperti, Claudio Sacerdoti Coen, Enrico Tassi
- and Stefano Zacchiroli}
-\institute{Department of Computer Science, University of Bologna\\
- Mura Anteo Zamboni, 7 --- 40127 Bologna, ITALY\\
- \email{$\{$asperti,sacerdot,tassi,zacchiro$\}$@cs.unibo.it}}
  \bibliographystyle{plain}
  
  \begin{document}
-\maketitle
+
+\begin{opening}
+
+ \title{The \MATITA{} Proof Assistant}
+
+\author{Andrea \surname{Asperti} \email{asperti@cs.unibo.it}}
+\author{Claudio \surname{Sacerdoti Coen} \email{sacerdot@cs.unibo.it}}
+\author{Enrico \surname{Tassi} \email{tassi@cs.unibo.it}}
+\author{Stefano \surname{Zacchiroli} \email{zacchiro@cs.unibo.it}}
+\institute{Department of Computer Science, University of Bologna\\
+ Mura Anteo Zamboni, 7 --- 40127 Bologna, ITALY}
+
+\runningtitle{The Matita proof assistant}
+\runningauthor{Asperti, Sacerdoti Coen, Tassi, Zacchiroli}
+
+% \date{data}
+
+\begin{motto}
+``We are nearly bug-free'' -- \emph{CSC, Oct 2005}
+\end{motto}
  
  \begin{abstract}
+ abstract qui
  \end{abstract}
  
+\keywords{Proof Assistant, Mathematical Knowledge Management, XML, Authoring,
+Digital Libraries}
+
+\end{opening}
+
  \section{Introduction}
  \label{sec:intro}
  {\em Matita} is the proof assistant under development by the \HELM{} team
@@ -187,7 +220,7 @@ language;
  rendering, and semantic selection, i.e. the possibility to select semantically
  meaningful rendering expressions, and to past the respective content into
  a different text area.
-\NOTE{il widget\\ non ha sel\\ semantica}
+\NOTE{il widget non ha sel semantica}
  \end{itemize}
  Starting from all this, the further step of developing our own 
  proof assistant was too
@@ -209,7 +242,7 @@ look like if entirely rewritten from scratch: just to give an
  idea, although \MATITA{} currently supports almost all functionalities of
  \COQ{}, it links 60'000 lins of \OCAML{} code, against ... of \COQ{} (and
  we are convinced that, starting from scratch again, we could furtherly
-reduce our code in sensible way).\NOTE{righe\\\COQ{}}
+reduce our code in sensible way).\NOTE{righe \COQ{}}
  
  \begin{itemize}
   \item scelta del sistema fondazionale
@@ -221,14 +254,510 @@ reduce our code in sensible way).\NOTE{righe\\\COQ{}}
    \end{itemize}
  \end{itemize}
  
-\section{Features}
+\section{\HELM{} library(??)}
+
+\subsection{libreria tutta visibile}
+\ASSIGNEDTO{csc}
+\NOTE{assumo che si sia gia' parlato di approccio content-centrico}
+Our commitment to the content-centric view of the architecture of the system
+has important consequences on the user's experience and on the functionalities
+of several components of \MATITA. In the content-centric view the library
+of mathematical knowledge is an already existent and completely autonomous
+entity that we are allowed to exploit and augment using \MATITA. Thus, in
+principle, when the user starts to prove a new theorem she has complete
+visibility of the library and she can refer to every definition and lemma,
+also using the mathematical notation already developed. In a similar way,
+every form of automation of the system must be able to analyze and possibly
+exploit every notion in the library.
+
+The benefits of this approach highly justify the non neglectable price to pay
+in the development of several components. We analyse now a few of the causes
+of this additional complexity.
+
+\subsubsection{Ambiguity}
+A rich mathematical library includes equivalent definitions and representations
+of the same notion. Moreover, mathematical notation inside a rich library is
+surely highly overloaded. As a consequence every mathematical expression the
+user provides is highly ambiguous since all the definitions,
+representations and special notations are available at once to the user.
+
+The usual solution to the problem, as adopted for instance in Coq, is to
+restrict the user's scope to just one interpretation for each definition,
+representation or notation. In this way much of the ambiguity is removed,
+burdening the user that must someway declare what is in scope and that must
+use special syntax when she needs to refer to something not in scope.
+
+Even with this approach ambiguity cannot be completely removed since implicit
+coercions can be arbitrarily inserted by the system to ``change the type''
+of subterms that do not have the expected type. Usually implicit coercions
+are used to overcome the absence of subtyping that should mimic the subset
+relation found in set theory. For instance, the expression
+$\forall n \in nat. 2 * n * \pi \equiv_\pi 0$ is correct in set theory since
+the set of natural numbers is a subset of that of real numbers; the
+corresponding expression $\forall n:nat. 2*n*\pi \equiv_\pi 0$ is not well typed
+and requires the automatic insertion of the coercion $real_of_nat: nat \to R$
+either around both 2 and $n$ (to make both products be on real numbers) or
+around the product $2*n$. The usual approach consists in either rejecting the
+ambiguous term or arbitrarily choosing one of the interpretations. For instance,
+Coq rejects the declaration of coercions that have alternatives
+(i.e. already declared coercions with the same domain and codomain)
+or that are obtained composing other coercions in order to
+avoid making several terms highly ambiguous by choosing to insert any one of the
+alternative coercions. Coq also arbitrarily chooses how to insert coercions in
+terms to make them well typed when there is more than one possibility (as in
+the previous example).
+
+The approach we are following is radically different. It consists in dealing
+with ambiguous expressions instead of avoiding them. As a last resource,
+when the system is unable to disambiguate the input, the user is interactively
+required to provide more information that is recorded to avoid asking the
+same question again in subsequent processing of the same input.
+More details on our approach can be found in \ref{sec:disambiguation}.
+
+\subsubsection{Consistency}
+A large mathematical library is likely to be logically inconsistent.
+It may contain incompatible axioms or alternative conjectures and it may
+even use definitions in incompatible ways. To clarify this last point,
+consider two identical definitions of a set of elements of a given type
+(or of a category of objects of a given type). Let us call the two definitions
+$A-Set$ and $B-Set$ (or $A-Category$ and $B-Category$).
+It is perfectly legitimate to either form the $A-Set$ of every $B-Set$
+or the $B-Set$ of every $A-Set$ (the same for categories). This just corresponds
+to assuming that a $B-Set$ (respectively an $A-Set$) is a small set, whereas
+an $A-Set$ (respectively a $B-Set$) is a big set (possibly of small sets).
+However, if one part of the library assumes $A-Set$s to be the small ones
+and another part of the library assumes $B-Set$s to be the small ones, the
+library as a whole will be logically inconsistent.
+
+Logical inconsistency has never been a problem in the daily work of a
+mathematician. The mathematician simply imposes himself a discipline to
+restrict himself to consistent subsets of the mathematical knowledge.
+However, in doing so he does not choose the subset in advance by forgetting
+the rest of his knowledge. On the contrary he may proceed with a sort of
+top-down strategy: he may always inspect or use part of his knowledge, but
+when he actually does so he should check recursively that inconsistencies are
+not exploited.
+
+Contrarily to the mathematical practice, the usual tendency in the world of
+assisted automation is that of building a logical environment (a consistent
+subset of the library) in a bottom up way, checking the consistency of a
+new axiom or theorem as soon as it is added to the environment. No lemma
+or definition outside the environment can be used until it is added to the
+library after every notion it depends on. Moreover, very often the logical
+environment is the only part of the library that can be inspected,
+that we can search lemmas in and that can be exploited by automatic tactics.
+
+Moving one by one notions from the library to the environment is a costly
+operation since it involves re-checking the correctness of the notion.
+As a consequence mathematical notions are packages into theories that must
+be added to the environment as a whole. However, the consistency problem is
+only raised at the level of theories: theories must be imported in a bottom
+up way and the system must check that no inconsistency arises.
+
+The practice of limiting the scope on the library to the logical environment
+is contrary to our commitment of being able to fully exploit as much as possible
+of the library at any given time. To reconcile consistency and visibility
+we have departed from the traditional implementation of an environment
+allowing environments to be built on demand in a top-down way. The new
+implementation is based on a modified meta-theory that changes the way
+convertibility, type checking, unification and refinement judgements.
+The modified meta-theory is fully described in \cite{libraryenvironments}.
+Here we just remark how a strong commitment on the way the user interacts
+with the library has lead to modifications to the logical core of the proof
+assistant. This is evidence that breakthroughs in the user interfaces
+and in the way the user interacts with the tools and with the library could
+be achieved only by means of strong architectural changes.
+
+\subsubsection{Accessibility}
+A large library that is completely in scope needs effective indexing and
+searching methods to make the user productive. Libraries of formal results
+are particularly critical since they hold a large percentage of technical
+lemmas that do not have a significative name and that must be retrieved
+using advanced methods based on matching, unification, generalization and
+instantiation.
+
+The efficiency of searching inside the library becomes a critical operation
+when automatic tactics exploit the library during the proof search. In this
+scenario the tactics must retrieve a set of candidates for backward or
+forward reasoning in a few milliseconds.
+
+The choice of several proof assistants is to use ad-hoc data structures,
+such as context trees, to index all the terms currently in scope. These
+data structures are expecially designed to quickly retrieve terms up
+to matching, instantiation and generalization. All these data structures
+try to maximize sharing of identical subterms so that matching can be
+reduced to a visit of the tree (or dag) that holds all the maximally shared
+terms together.
+
+Since the terms to be retrieved (or at least their initial prefix)
+are stored (actually ``melted'') in the data structure, these data structures
+must collect all the terms in a single location. In other words, adopting
+such data structures means centralizing the library.
+
+In the \MOWGLI{} project we have tried to follow an alternative approach
+that consists in keeping the library fully distributed and indexing it
+by means of spiders that collect metadata and store them in a database.
+The challenge is to be able to collect only a smaller as possible number
+of metadata that provide enough information to approximate the matching
+operation. A matching operation is then performed in two steps. The first
+step is a query to the remote search engine that stores the metadata in
+order to detect a (hopefully small) complete set of candidates that could
+match. Completeness means that no term that matches should be excluded from
+the set of candiates. The second step consists in retrieving from the
+distributed library all the candidates and attempt the actual matching.
+
+In the last we years we have progressively improved this technique.
+Our achievements can be found in \cite{query1,query2,query3}.
+
+The technique and tools already developed have been integrated in \MATITA{},
+that is able to contact a remote \WHELP{} search engine \cite{whelp} or that
+can be directly linked to the code of the \WHELP. In either case the database
+used to store the metadata can be local or remote.
+
+Our current challenge consists in the exploitation of \WHELP{} inside of
+\MATITA. In particular we are developing a set of tactics, for instance
+based on paramodulation \cite{paramodulation}, that perform queries to \WHELP{}
+to restrict the scope on the library to a set of interesting candidates,
+greatly reducing the search space. Moreover, queries to \WHELP{} are performed
+during parsing of user provided terms to disambiguate them.
+
+In Sect.~\ref{sec:metadata} we describe the technique adopted in \MATITA.
+
+\subsubsection{Library management}
+
+
+\subsection{ricerca e indicizzazione}
+\label{sec:metadata}
+\ASSIGNEDTO{andrea}
+
+\subsection{auto}
+\ASSIGNEDTO{andrea}
+
+\subsection{sostituzioni esplicite vs moduli}
+\ASSIGNEDTO{csc}
+
+\subsection{xml / gestione della libreria}
+\ASSIGNEDTO{gares}
+
+
+\section{User Interface (da cambiare)}
+
+\subsection{assenza di proof tree / resa in linguaggio naturale}
+\ASSIGNEDTO{andrea}
+
+\subsection{Disambiguation}
+\label{sec:disambiguation}
+\ASSIGNEDTO{zack}
+
+ \begin{table}
+  \caption{\label{tab:termsyn} Concrete syntax of CIC terms: built-in
+  notation\strut}
+ \hrule
+ \[
+ \begin{array}{@{}rcll@{}}
+   \NT{term} & ::= & & \mbox{\bf terms} \\
+     &     & x & \mbox{(identifier)} \\
+     &  |  & n & \mbox{(number)} \\
+     &  |  & s & \mbox{(symbol)} \\
+     &  |  & \mathrm{URI} & \mbox{(URI)} \\
+     &  |  & \verb+_+ & \mbox{(implicit)}\TODO{sync} \\
+     &  |  & \verb+?+n~[\verb+[+~\{\NT{subst}\}~\verb+]+] & \mbox{(meta)} \\
+     &  |  & \verb+let+~\NT{ptname}~\verb+\def+~\NT{term}~\verb+in+~\NT{term} \\
+     &  |  & \verb+let+~\NT{kind}~\NT{defs}~\verb+in+~\NT{term} \\
+     &  |  & \NT{binder}~\{\NT{ptnames}\}^{+}~\verb+.+~\NT{term} \\
+     &  |  & \NT{term}~\NT{term} & \mbox{(application)} \\
+     &  |  & \verb+Prop+ \mid \verb+Set+ \mid \verb+Type+ \mid \verb+CProp+ & \mbox{(sort)} \\
+     &  |  & \verb+match+~\NT{term}~ & \mbox{(pattern matching)} \\
+     &     & ~ ~ [\verb+[+~\verb+in+~x~\verb+]+]
+             ~ [\verb+[+~\verb+return+~\NT{term}~\verb+]+] \\
+     &     & ~ ~ \verb+with [+~[\NT{rule}~\{\verb+|+~\NT{rule}\}]~\verb+]+ & \\
+     &  |  & \verb+(+~\NT{term}~\verb+:+~\NT{term}~\verb+)+ & \mbox{(cast)} \\
+     &  |  & \verb+(+~\NT{term}~\verb+)+ \\
+   \NT{defs}  & ::= & & \mbox{\bf mutual definitions} \\
+     &     & \NT{fun}~\{\verb+and+~\NT{fun}\} \\
+   \NT{fun} & ::= & & \mbox{\bf functions} \\
+     &     & \NT{arg}~\{\NT{ptnames}\}^{+}~[\verb+on+~x]~\verb+\def+~\NT{term} \\
+   \NT{binder} & ::= & & \mbox{\bf binders} \\
+     &     & \verb+\forall+ \mid \verb+\lambda+ \\
+   \NT{arg} & ::= & & \mbox{\bf single argument} \\
+     &     & \verb+_+ \mid x \\
+   \NT{ptname} & ::= & & \mbox{\bf possibly typed name} \\
+     &     & \NT{arg} \\
+     &  |  & \verb+(+~\NT{arg}~\verb+:+~\NT{term}~\verb+)+ \\
+   \NT{ptnames} & ::= & & \mbox{\bf bound variables} \\
+     &     & \NT{arg} \\
+     &  |  & \verb+(+~\NT{arg}~\{\verb+,+~\NT{arg}\}~[\verb+:+~\NT{term}]~\verb+)+ \\
+   \NT{kind} & ::= & & \mbox{\bf induction kind} \\
+     &     & \verb+rec+ \mid \verb+corec+ \\
+   \NT{rule} & ::= & & \mbox{\bf rules} \\
+     &     & x~\{\NT{ptname}\}~\verb+\Rightarrow+~\NT{term}
+ \end{array}
+ \]
+ \hrule
+ \end{table}
+ 
+
+\subsubsection{Term input}
+
+The primary form of user interaction employed by \MATITA{} is textual script
+editing: the user modifies it and evaluate step by step its composing
+\emph{statements}. Examples of statements are inductive type definitions,
+theorem declarations, LCF-style tacticals, and macros (e.g. \texttt{Check} can
+be used to ask the system to refine a given term and pretty print the result).
+Since many statements refer to terms of the underlying calculus, \MATITA{} needs
+a concrete syntax able to encode terms of the Calculus of Inductive
+Constructions.
+
+Two of the requirements in the design of such a syntax are apparently in
+contrast:
+\begin{enumerate}
+ \item the syntax should be as close as possible to common mathematical practice
+  and implement widespread mathematical notations;
+ \item each term described by the syntax should be non-ambiguous meaning that it
+  should exists a function which associates to it a CIC term.
+\end{enumerate}
+
+These two requirements are addressed in \MATITA{} by the mean of two mechanisms
+which work together: \emph{term disambiguation} and \emph{extensible notation}.
+Their interaction is visible in the architecture of the \MATITA{} input phase,
+depicted in Fig.~\ref{fig:inputphase}. The architecture is articulated as a
+pipline of three levels: the concrete syntax level (level 0) is the one the user
+has to deal with when inserting CIC terms; the abstract syntax level (level 2)
+is an internal representation which intuitively encodes mathematical formulae at
+the content level~\cite{adams}\cite{mkm-structure}; the last level is that of
+CIC terms.
+
+\begin{figure}[ht]
+ \begin{center}
+  \includegraphics[width=0.9\textwidth]{input_phase}
+  \caption{\MATITA{} input phase}
+ \end{center}
+ \label{fig:inputphase}
+\end{figure}
+
+Requirement (1) is addressed by a built-in concrete syntax for terms, described
+in Tab.~\ref{tab:termsyn}, and the extensible notation mechanisms which offers a
+way for extending available mathematical notations. Extensible notation, which
+is also in charge of providing a parsing function mapping concrete syntax terms
+to content level terms, is described in Sect.~\ref{sec:notation}.  Requirement
+(2) is addressed by the conjunct action of that parsing function and
+disambiguation which provides a function from content level terms to CIC terms.
+
+\subsubsection{Sources of ambiguity}
+
+The translation from content level terms to CIC terms is not straightforward
+because some nodes of the content encoding admit more that one CIC encoding,
+invalidating requirement (2).
+
+\begin{example}
+ \label{ex:disambiguation}
+
+ Consider the term at the concrete syntax level \texttt{\TEXMACRO{forall} x. x +
+ ln 1 = x} of Fig.~\ref{fig:inputphase}(a), it can be the type of a lemma the
+ user may want to prove. Assuming that both \texttt{+} and \texttt{=} are parsed
+ as infix operators, all the following questions are legitimate and must be
+ answered before obtaining a CIC term from its content level encoding
+ (Fig.~\ref{fig:inputphase}(b)):
+
+ \begin{enumerate}
+
+  \item Since \texttt{ln} is an unbound identifier, which CIC constants does it
+   represent? Many different theorems in the library may share its (rather
+   short) name \dots
+
+  \item Which kind of number (\IN, \IR, \dots) the \texttt{1} literal stand for?
+   Which encoding is used in CIC to represent it? E.g., assuming $1\in\IN$, is
+   it an unary or a binary encoding?
+
+  \item Which kind of equality the ``='' node represents? Is it Leibniz's
+   polymorhpic equality? Is it a decidable equality over \IN, \IR, \dots?
+
+ \end{enumerate}
+
+\end{example}
+
+In \MATITA, three \emph{sources of ambiguity} are admitted for content level
+terms: unbound identifiers, literal numbers, and operators. Each instance of
+ambiguity sources (ambiguous entity) occuring in a content level term is
+associated to a \emph{disambiguation domain}. Intuitively a disambiguation
+domain is a set of CIC terms which may be replaced for an ambiguous entity
+during disambiguation. Each item of the domain is said to be an
+\emph{interpretation} for the ambiguous entity.
+
+\emph{Unbound identifiers} (question 1) are ambiguous entities since the
+namespace of CIC objects is not flat and the same identifier may denote many
+ofthem. For example the short name \texttt{plus\_assoc} in the \HELM{} library
+is shared by three different theorems stating the associative property of
+different additions.  This kind of ambiguity is avoidable if the user is willing
+to use long names (in form of URIs in the \texttt{cic://} scheme) in the
+concrete syntax, with the obvious drawbacks of obtaining long and unreadable
+terms.
+
+Given an unbound identifier, the corresponding disambiguation domain is computed
+querying the library for all constants, inductive types, and inductive type
+constructors having it as their short name (see the \LOCATE{} query in
+Sect.~\ref{sec:metadata}).
+
+\emph{Literal numbers} (question 2) are ambiguous entities as well since
+different kinds of numbers can be encoded in CIC (\IN, \IR, \IZ, \dots) using
+different encodings. Considering the restricted example of natural numbers we
+can for instance encode them in CIC using inductive datatypes with a number of
+constructor equal to the encoding base plus 1, obtaining one encoding for each
+base.
+
+For each possible way of mapping a literal number to a CIC term, \MATITA{} is
+aware of a \emph{number intepretation function} which, when applied to the
+natural number denoted by the literal\footnote{at the moment only literal
+natural number are supported in the concrete syntax} returns a corresponding CIC
+term. The disambiguation domain for a given literal number is built applying to
+the literal all available number interpretation functions in turn.
+
+Number interpretation functions can be defined in OCaml or directly using
+\TODO{notazione per i numeri}.
+
+\emph{Operators} (question 3) are intuitively head of applications, as such they
+are always applied to a (possiblt empty) sequence of arguments. Their ambiguity
+is a need since it is often the case that some notation is used in an overloaded
+fashion to hide the use of different CIC constants which encodes similar
+concepts. For example, in the standard library of \MATITA{} the infix \texttt{+}
+notation is available building a binary \texttt{Op(+)} node, whose
+disambiguation domain may refer to different constants like the addition over
+natural numbers \URI{cic:/matita/nat/plus/plus.con} or that over real numbers of
+the \COQ{} standard library \URI{cic:/Coq/Reals/Rdefinitions/Rplus.con}.
+
+For each possible way of mapping an operator application to a CIC term,
+\MATITA{} knows an \emph{operator interpretation function} which, when applied
+to an operator and its arguments, returns a CIC term. The disambiguation domain
+for a given operator is built applying to the operator and its arguments all
+available operator interpretation functions in turn.
+
+Operator interpretation functions could be added using the
+\texttt{interpretation} statement. For example, among the first line of the
+script \FILE{matita/library/logic/equality.ma} from the \MATITA{} standard
+library we read:
+
+\begin{grafite}
+interpretation "leibnitz's equality"
+ 'eq x y =
+   (cic:/matita/logic/equality/eq.ind#xpointer(1/1) _ x y).
+\end{grafite}
+
+Evaluating it in \MATITA{} will add an operator interpretation function for the
+binary operator \texttt{eq} which expands to the CIC term on the right hand side
+of the statement. That CIC term can be written using only built-in concrete
+syntax, can contain no ambiguity source; still, it can refer to operator
+arguments bound on the left hand side and can contain implicit terms (denoted
+with \texttt{\_}) which will be expanded to fresh metavariables. The latter
+feature is used in the example above for the first argument of Leibniz's
+polymorhpic equality.
+
+\subsubsection{Disambiguation algorithm}
+
+A \emph{disambiguation algorithm} takes as input a content level term and return
+a fully determined CIC term. The key observation on which a disambiguation
+algorithm is based is that given a content level term with more than one sources
+of ambiguity, not all possible combination of interpretation lead to a typable
+CIC term. In the term of Ex.~\ref{ex:disambiguation} for instance the
+interpretation of \texttt{ln} as a function from \IR to \IR and the
+interpretation of \texttt{1} as the Peano number $1$ can't coexists. The notion
+of ``can't coexists'' in the disambiguation of \MATITA{} is defined on top of
+the \emph{refiner} for CIC terms described in~\cite{csc-phd}.
+
+Briefly, a refiner is a function whose input is an \emph{incomplete CIC term}
+$t_1$ --- i.e. a term where metavariables occur (Sect.~\ref{sec:metavariables}
+--- and whose output is either:\NOTE{descrizione sommaria del refiner, pu\'o
+essere spostata altrove}
+
+\begin{enumerate}
+ 
+ \item an incomplete CIC term $t_2$ where $t_2$ is a well-typed term obtained
+  assigning a type to each metavariable in $t_1$ (in case of dependent types,
+  instantiation of some of the metavariable occurring in $t_1$ may occur as
+  well);
+
+ \item $\epsilon$, meaning that no well-typed term could be obtained via
+  assignment of type to metavariable in $t_1$ and their instantiation;
+
+ \item $\bot$, meaning that the refiner is unable to decide whether of the two
+  cases above apply (refinement is semi-decidable).
+
+\end{enumerate}
+
+On top of a CIC refiner \MATITA{} implement an efficient disambiguation
+algorithm, which is outlined below. It takes as input a content level term $c$
+and proceeds as follows:
+
+\begin{enumerate}
+
+ \item Create disambiguation domains $\{D_i | i\in\mathit{Dom}(c)\}$, where
+  $\mathit{Dom}(c)$ is the set of ambiguity sources of $c$. Each $D_i$ is a set
+  of CIC terms and can be built as described above.
+
+ \item An \emph{interpretation} $\Phi$ for $c$ is a map associating an
+  incomplete CIC term to each ambiguity source of $c$. Given $c$ and one of its
+  interpretations an incomplete CIC term is fully determined replacing each
+  ambiguity source of $c$ with its mapping in the interpretation and injecting
+  the remaining structure of the content level in the CIC level (e.g. replacing
+  the application of the content level with the application of the CIC level).
+  This operation is informally called ``interpreting $c$ with $\Phi$''.
+  
+  Create an initial interpretation $\Phi_0 = \{\phi_i | \phi_i = \_,
+  i\in\mathit{Dom}(c)\}$, which associates a fresh metavariable to each source
+  of ambiguity of $c$. During this step, implicit terms are expanded to fresh
+  metavariables as well.
+
+ \item Refine the current incomplete CIC term (i.e.  the term obtained
+  interpreting $t$ with $\Phi_i$).
+
+  If the refinement succeeds or is undetermined the next interpretation
+  $\Phi_{i+1}$ will be created \emph{making a choice}, that is replacing in the
+  current interpretation one of the metavariable appearing in $\Phi_i$ with one
+  of the possible choice from the corresponding disambiguation domain. The
+  metavariable to be replaced is chosen following a preorder visit of the
+  ambiguous term. Then, step 3 is attempted again with the new interpretation.
+    
+  If the refinement fails the current set of choices cannot lead to a well-typed
+  term and backtracking of the current interpretation is attempted.
+    
+ \item Once an unambiguous correct interpretation is found (i.e. $\Phi_i$ does
+  no longer contain any placeholder), backtracking is attempted anyway to find
+  the other correct interpretations.
+
+ \item Let $n$ be the number of interpretations who survived step 4. If $n=0$
+  signal a type error. If $n=1$ we have found exactly one (incomplete) CIC term
+  corresponding to the content level term $c$, returns it as output of the
+  disambiguation phase. If $n>1$ we have found many different (incomplete) CIC
+  terms which can correspond to the content level term, let the user choose one
+  of the $n$ interpretations and returns the corresponding term.
+
+\end{enumerate}
+
+The efficiency of this algorithm resides in the fact that as soon as an
+incomplete CIC term is not typable, no further instantiation of the
+metavariables of the corresponding interpretation is attemped.
+% For example, during the disambiguation of the user input
+% \texttt{\TEXMACRO{forall} x. x*0 = 0}, an interpretation $\Phi_i$ is
+% encountered which associates $?$ to the instance of \texttt{0} on the right,
+% the real number $0$ to the instance of \texttt{0} on the left, and the
+% multiplication over natural numbers (\texttt{mult} for short) to \texttt{*}.
+% The refiner will fail, since \texttt{mult} require a natural argument, and no
+% further instantiation of the placeholder will be tried.
+
+Details of the disambiguation algorithm along with an analysis of its complexity
+can be found in~\cite{disambiguation}, where a formulation without backtracking
+(corresponding to the actual \MATITA{} implementation) is also presented.
+
+\subsubsection{Disambiguation stages}
+
+\subsection{notazione}
+\label{sec:notation}
+\ASSIGNEDTO{zack}
  
  \subsection{mathml}
  \ASSIGNEDTO{zack}
  
-\subsection{metavariabili}
-\label{sec:metavariables}
-\ASSIGNEDTO{csc}
+\subsection{selezione semantica, cut paste, hyperlink}
+\ASSIGNEDTO{zack}
  
  \subsection{pattern}
  \ASSIGNEDTO{gares}\\
@@ -238,7 +767,7 @@ selection.
  Matita benefits of a graphical interface and a powerful MathML rendering
  widget that allows the user to select pieces of the sequent he is working
  on. While this is an extremely intuitive way for the user to
-restrict the application of a tactic, for example, to some subterms of the
+restrict the application of tactics, for example, to some subterms of the
  conclusion or some hypothesis, the way this action is recorded to the text
  script is not obvious.\\
  In \MATITA{} this issue is addressed by patterns.
@@ -246,12 +775,12 @@ In \MATITA{} this issue is addressed by patterns.
  \subsubsection{Pattern syntax}
  A pattern is composed of two terms: a $\NT{sequent\_path}$ and a
  $\NT{wanted}$.
-The former mocks-up a sequent, replacing unwanted subterms with $?$ and
+The former mocks-up a sequent, discharging unwanted subterms with $?$ and
  selecting the interesting parts with the placeholder $\%$. 
  The latter is a term that lives in the context of the placeholders.
  
  The concrete syntax is reported in table \ref{tab:pathsyn}
-\NOTE{uso nomi diversi \\dalla grammatica \\ ma che hanno + senso}
+\NOTE{uso nomi diversi dalla grammatica ma che hanno + senso}
  \begin{table}
   \caption{\label{tab:pathsyn} Concrete syntax of \MATITA{} patterns.\strut}
  \hrule
@@ -275,7 +804,7 @@ selects roots (subterms) of the sequent, using the
  $\NT{sequent\_path}$,  while the second 
  one searches the $\NT{wanted}$ term starting from these roots. Both are
  optional steps, and by convention the empty pattern selects the whole
-goal.
+conclusion.
  
  \begin{description}
  \item[Phase 1]
@@ -289,7 +818,7 @@ goal.
    assumption is selected. Remember that the user can be mostly
    unaware of this syntax, since the system is able to write down a 
    $\NT{sequent\_path}$ starting from a visual selection.
-  \NOTE{Questo ancora non va\\in matita}
+  \NOTE{Questo ancora non va in matita}
  
    A $\NT{multipath}$ is a CiC term in which a special constant $\%$
    is allowed.
@@ -349,15 +878,14 @@ hypothesis with $O + n$ and then use $inj\_plus\_l$ to prove $m=O$.
  \end{grafite}
  \noindent
  This pattern, that is a simple instance of the $\NT{sequent\_path}$
-grammar entry, acts on $H$ that has type (without notation) 
-$(eq~nat~(m+n)~n)$ and discharges the head of the application and the
-first two arguments with a $?$ and selects the last argument with
-$\%$. The syntax may seem uncomfortable, but the user can simply
-select with the mouse the right part of the equivalence and left to the
-system the burden of writing down in the script file the corresponding 
-pattern with $?$ and $\%$ in the right place (expecially where implicit
-arguments are hidden by the notation, like the type $nat$ in this
-example).
+grammar entry, acts on $H$ that has type (without notation) $(eq~nat~(m+n)~n)$
+and discharges the head of the application and the first two arguments with a
+$?$ and selects the last argument with $\%$. The syntax may seem uncomfortable,
+but the user can simply select with the mouse the right part of the equivalence
+and left to the system the burden of writing down in the script file the
+corresponding pattern with $?$ and $\%$ in the right place (that is not
+trivial, expecially where implicit arguments are hidden by the notation, like
+the type $nat$ in this example).
  
  Changing all the occurrences of $n$ in the hypothesis $H$ with $O+n$ 
  works too and can be done, by the experienced user, writing directly
@@ -380,12 +908,13 @@ following one, that is less readable but uses only the first phase.
  \noindent
  
  \subsubsection{Tactics supporting patterns}
-In \MATITA{} the following tactics can be restricted to subterms of
-the working sequent: simplify, change, fold, unfold, generalize, replace and
-rewrite.
-\NOTE{attualmente rewrite e \\ fold non supportano \\ phase 2. per
-supportarlo\\bisogna far loro trasformare\\il pattern phase1+phase2\\
-in un pattern phase1only\\come faccio nell'ultimo esempio.\\lo si fa
+In \MATITA{} all the tactics that can be restricted to subterm of the working
+sequent accept the pattern syntax. In particular these tactics are: simplify,
+change, fold, unfold, generalize, replace and rewrite.
+
+\NOTE{attualmente rewrite e fold non supportano phase 2. per
+supportarlo bisogna far loro trasformare il pattern phase1+phase2 
+in un pattern phase1only come faccio nell'ultimo esempio. lo si fa
  con una pattern\_of(select(pattern))}
  
  \subsubsection{Comparison with Coq}
@@ -406,7 +935,7 @@ occurce of it (counting from left to right). In our previous example,
  to change only the left part of the equivalence, the correct command
  is
  \begin{grafite}
-  change n at 2 in H with (O + H)
+  change n at 2 in H with (O + n)
  \end{grafite} 
  \noindent
  meaning that in the hypothesis $H$ the $n$ we want to change is the
@@ -438,353 +967,121 @@ but for example not for change and other tactics that do not relay on
  unification. 
  
  The idea behind this way of identifying subterms in not really far
-from the idea behind patterns patterns, but really fails in help using
-complex notation, since it relays on the way the user sees the
-sequent.  Notation can swap arguments, or place them upside-down or
-even put them inside a bidimensional matrix.  In these cases using the
-mouse to select the wanted term is probably the only way to tell the
-system exactly what you want to do. 
+from the idea behind patterns, but really fails in extending to
+complex notation, since it relays on a mono-dimensional sequent representation.
+Real math notation places arguments upside-down (like in indexed sums or
+integrations) or even puts them inside a bidimensional matrix.  
+In these cases using the mouse to select the wanted term is probably the 
+only way to tell the system exactly what you want to do. 
  
  One of the goals of \MATITA{} is to use modern publishing techiques, and
  adopting a method for restricting tactics application domain that discourages 
  using heavy math notation, would definitively be a bad choice.
  
-\subsection{tatticali}
-\ASSIGNEDTO{gares}
-
-\subsection{Disambiguation}
-\label{sec:disambiguation}
-\ASSIGNEDTO{zack}
+\subsection{Tacticals}
+\ASSIGNEDTO{gares}\\
+There are mainly two kinds of languages used by proof assistants to recorder
+proofs: tactic based and declarative. We will not investigate the philosophy
+aroud the choice that many proof assistant made, \MATITA{} included, and we will not compare the two diffrent approaches. We will describe the common issues of the first one and how \MATITA{} tries to solve them.
+
+First we must highlight the fact that proof scripts made using tactis are
+particularly unreadable. This is not a big deal for the user while he is
+constructing the proof, but is considerably a problem when he tries to reread
+what he did or when he shows his work to someone else.
+
+Another common issue for tactic based proof scripts is their mantenibility.
+Huge libraries have been developed, and backward compatibility is a really time
+consuming task. This problem is usually ameliorated with tacticals, that
+help in structuring proofs and consequently their maintenance, but have a bad
+counterpart in script readability.  Since tacticals are executed atomically,
+the common practice of executing again a script to review all the proof steps
+doesn't work at all. This issue in addition to the really poor feeling that a
+list of tactics gives about the proof makes script rereading particularly hard.
+
+\MATITA{} uses a language of tactics and tacticals, but adopts a peculiar
+strategy to make this technique more user friendly without loosing in
+mantenibility or expressivity.
+
+\subsubsection{Tacticals overview}
+Before describing the peculiarities of \MATITA{} tacticals we briefly introduce what tacticals are and where they can be useful.
+
+Tacticals first appered in LCF(cita qualcosa) and can be seen as programming
+constructs, like looping, branching, error recovery or sequential composition.
+For example $tac_1~.~tac_2$ executes the first tactic and applies the second
+only to the first goal opened by $tac_1$. Baranching can be used to specify a
+diffrent tactic to apply to each new goal opened by another tactic, for example
+$tac_1\verb+;[+~tac_{1.1}~\verb+|+~tac_{1.2}~\verb+|+~\cdots~|~tac_{1.n}~\verb+]+$
+applies respectively $tac_{1.i}$ to the $i$-th goal opened by $tac_1$. Looping
+can be used to iterate a tactic until it works: $\verb+repeat+~tac$ applies
+$tac$ to the current goal, and again $tac$ to the eventually resulting goals
+until all goal are closed or the tactic fails.
  
  \begin{table}
- \caption{\label{tab:termsyn} Concrete syntax of CIC terms: built-in
- notation\strut}
+ \caption{\label{tab:tacsyn} Concrete syntax of \MATITA{} tacticals.\strut}
  \hrule
  \[
  \begin{array}{@{}rcll@{}}
-  \NT{term} & ::= & & \mbox{\bf terms} \\
-    &     & x & \mbox{(identifier)} \\
-    &  |  & n & \mbox{(number)} \\
-    &  |  & s & \mbox{(symbol)} \\
-    &  |  & \mathrm{URI} & \mbox{(URI)} \\
-    &  |  & \verb+_+ & \mbox{(implicit)}\TODO{sync} \\
-    &  |  & \verb+?+n~[\verb+[+~\{\NT{subst}\}~\verb+]+] & \mbox{(meta)} \\
-    &  |  & \verb+let+~\NT{ptname}~\verb+\def+~\NT{term}~\verb+in+~\NT{term} \\
-    &  |  & \verb+let+~\NT{kind}~\NT{defs}~\verb+in+~\NT{term} \\
-    &  |  & \NT{binder}~\{\NT{ptnames}\}^{+}~\verb+.+~\NT{term} \\
-    &  |  & \NT{term}~\NT{term} & \mbox{(application)} \\
-    &  |  & \verb+Prop+ \mid \verb+Set+ \mid \verb+Type+ \mid \verb+CProp+ & \mbox{(sort)} \\
-    &  |  & \verb+match+~\NT{term}~ & \mbox{(pattern matching)} \\
-    &     & ~ ~ [\verb+[+~\verb+in+~x~\verb+]+]
-             ~ [\verb+[+~\verb+return+~\NT{term}~\verb+]+] \\
-    &     & ~ ~ \verb+with [+~[\NT{rule}~\{\verb+|+~\NT{rule}\}]~\verb+]+ & \\
-    &  |  & \verb+(+~\NT{term}~\verb+:+~\NT{term}~\verb+)+ & \mbox{(cast)} \\
-    &  |  & \verb+(+~\NT{term}~\verb+)+ \\
-  \NT{defs}  & ::= & & \mbox{\bf mutual definitions} \\
-    &     & \NT{fun}~\{\verb+and+~\NT{fun}\} \\
-  \NT{fun} & ::= & & \mbox{\bf functions} \\
-    &     & \NT{arg}~\{\NT{ptnames}\}^{+}~[\verb+on+~x]~\verb+\def+~\NT{term} \\
-  \NT{binder} & ::= & & \mbox{\bf binders} \\
-    &     & \verb+\forall+ \mid \verb+\lambda+ \\
-  \NT{arg} & ::= & & \mbox{\bf single argument} \\
-    &     & \verb+_+ \mid x \\
-  \NT{ptname} & ::= & & \mbox{\bf possibly typed name} \\
-    &     & \NT{arg} \\
-    &  |  & \verb+(+~\NT{arg}~\verb+:+~\NT{term}~\verb+)+ \\
-  \NT{ptnames} & ::= & & \mbox{\bf bound variables} \\
-    &     & \NT{arg} \\
-    &  |  & \verb+(+~\NT{arg}~\{\verb+,+~\NT{arg}\}~[\verb+:+~\NT{term}]~\verb+)+ \\
-  \NT{kind} & ::= & & \mbox{\bf induction kind} \\
-    &     & \verb+rec+ \mid \verb+corec+ \\
-  \NT{rule} & ::= & & \mbox{\bf rules} \\
-    &     & x~\{\NT{ptname}\}~\verb+\Rightarrow+~\NT{term}
+  \NT{punctuation} & 
+    ::= & \SEMICOLON \quad|\quad \DOT \quad|\quad \SHIFT \quad|\quad \BRANCH \quad|\quad \MERGE \quad|\quad \POS{\mathrm{NUMBER}~} & \\
+  \NT{block\_kind} & 
+    ::= & \verb+focus+ ~|~ \verb+try+ ~|~ \verb+solve+ ~|~ \verb+first+ ~|~ \verb+repeat+ ~|~ \verb+do+~\mathrm{NUMBER} & \\
+  \NT{block\_delimiter} & 
+    ::= & \verb+begin+ ~|~ \verb+end+ & \\
+  \NT{tactical} & 
+    ::= & \verb+skip+ ~|~ \NT{tactic} ~|~ \NT{block\_delimiter} ~|~ \NT{block\_kind} ~|~ \NT{punctuation} ~|~& \\
  \end{array}
  \]
  \hrule
  \end{table}
  
-\subsubsection{Term input}
-
-The primary form of user interaction employed by \MATITA{} is textual script
-editing: the user modifies it and evaluate step by step its composing
-\emph{statements}. Examples of statements are inductive type definitions,
-theorem declarations, LCF-style tacticals, and macros (e.g. \texttt{Check} can
-be used to ask the system to refine a given term and pretty print the result).
-Since many statements refer to terms of the underlying calculus, \MATITA{} needs
-a concrete syntax able to encode terms of the Calculus of Inductive
-Constructions.
-
-Two of the requirements in the design of such a syntax are apparently in
-contrast:
-\begin{enumerate}
- \item the syntax should be as close as possible to common mathematical practice
-  and implement widespread mathematical notations;
- \item each term described by the syntax should be non-ambiguous meaning that it
-  should exists a function which associates to it a CIC term.
-\end{enumerate}
-
-These two requirements are addressed in \MATITA{} by the mean of two mechanisms
-which work together: \emph{term disambiguation} and \emph{extensible notation}.
-Their interaction is visible in the architecture of the \MATITA{} input phase,
-depicted in Fig.~\ref{fig:inputphase}. The architecture is articulated as a
-pipline of three levels: the concrete syntax level (level 0) is the one the user
-has to deal with when inserting CIC terms; the abstract syntax level (level 2)
-is an internal representation which intuitively encodes mathematical formulae at
-the content level~\cite{adams}\cite{mkm-structure}; the last level is that of
-CIC terms.
-
-\begin{figure}[ht]
- \begin{center}
-  \includegraphics[width=0.9\textwidth]{input_phase}
-  \caption{\MATITA{} input phase}
- \end{center}
- \label{fig:inputphase}
-\end{figure}
-
-Requirement (1) is addressed by a built-in concrete syntax for terms, described
-in Tab.~\ref{tab:termsyn}, and the extensible notation mechanisms which offers a
-way for extending available mathematical notations. Extensible notation, which
-is also in charge of providing a parsing function mapping concrete syntax terms
-to content level terms, is described in Sect.~\ref{sec:notation}.  Requirement
-(2) is addressed by the conjunct action of that parsing function and
-disambiguation which provides a function from content level terms to CIC terms.
-
-\subsubsection{Sources of ambiguity}
-
-The translation from content level terms to CIC terms is not straightforward
-because some nodes of the content encoding admit more that one CIC encoding,
-invalidating requirement (2).
-
-\begin{example}
- \label{ex:disambiguation}
-
- Consider the term at the concrete syntax level \texttt{\TEXMACRO{forall} x. x +
- ln 1 = x} of Fig.~\ref{fig:inputphase}(a), it can be the type of a lemma the
- user may want to prove. Assuming that both \texttt{+} and \texttt{=} are parsed
- as infix operators, all the following questions are legitimate and must be
- answered before obtaining a CIC term from its content level encoding
- (Fig.~\ref{fig:inputphase}(b)):
-
- \begin{enumerate}
-
-  \item Since \texttt{ln} is an unbound identifier, which CIC constants does it
-   represent? Many different theorems in the library may share its (rather
-   short) name \dots
-
-  \item Which kind of number (\IN, \IR, \dots) the \texttt{1} literal stand for?
-   Which encoding is used in CIC to represent it? E.g., assuming $1\in\IN$, is
-   it an unary or a binary encoding?
-
-  \item Which kind of equality the ``='' node represents? Is it Leibniz's
-   polymorhpic equality? Is it a decidable equality over \IN, \IR, \dots?
-
- \end{enumerate}
-
-\end{example}
-
-In \MATITA, three \emph{sources of ambiguity} are admitted for content level
-terms: unbound identifiers, literal numbers, and operators. Each instance of
-ambiguity sources (ambiguous entity) occuring in a content level term is
-associated to a \emph{disambiguation domain}. Intuitively a disambiguation
-domain is a set of CIC terms which may be replaced for an ambiguous entity
-during disambiguation. Each item of the domain is said to be an
-\emph{interpretation} for the ambiguous entity.
-
-\emph{Unbound identifiers} (question 1) are ambiguous entities since the
-namespace of CIC objects is not flat and the same identifier may denote many
-ofthem. For example the short name \texttt{plus\_assoc} in the \HELM{} library
-is shared by three different theorems stating the associative property of
-different additions.  This kind of ambiguity is avoidable if the user is willing
-to use long names (in form of URIs in the \texttt{cic://} scheme) in the
-concrete syntax, with the obvious drawbacks of obtaining long and unreadable
-terms.
-
-Given an unbound identifier, the corresponding disambiguation domain is computed
-querying the library for all constants, inductive types, and inductive type
-constructors having it as their short name (see the \LOCATE{} query in
-Sect.~\ref{sec:metadata}).
-
-\emph{Literal numbers} (question 2) are ambiguous entities as well since
-different kinds of numbers can be encoded in CIC (\IN, \IR, \IZ, \dots) using
-different encodings. Considering the restricted example of natural numbers we
-can for instance encode them in CIC using inductive datatypes with a number of
-constructor equal to the encoding base plus 1, obtaining one encoding for each
-base.
-
-For each possible way of mapping a literal number to a CIC term, \MATITA{} is
-aware of a \emph{number intepretation function} which, when applied to the
-natural number denoted by the literal\footnote{at the moment only literal
-natural number are supported in the concrete syntax} returns a corresponding CIC
-term. The disambiguation domain for a given literal number is built applying to
-the literal all available number interpretation functions in turn.
-
-Number interpretation functions can be defined in OCaml or directly using
-\TODO{notazione per i numeri}.
-
-\emph{Operators} (question 3) are intuitively head of applications, as such they
-are always applied to a non empty sequence of arguments. Their ambiguity is a
-need since it is often the case that some notation is used in an overloaded
-fashion to hide the use of different CIC constants which encodes similar
-concepts. For example, in the standard library of \MATITA{} the infix \texttt{+}
-notation is available building a binary \texttt{Op(+)} node, whose
-disambiguation domain may refer to different constants like the addition over
-natural numbers \URI{cic:/matita/nat/plus/plus.con} or that over real numbers of
-the \COQ{} standard library \URI{cic:/Coq/Reals/Rdefinitions/Rplus.con}.
-
-For each possible way of mapping an operator application to a CIC term,
-\MATITA{} knows an \emph{operator interpretation function} which, when applied
-to an operator and its arguments, returns a CIC term. The disambiguation domain
-for a given operator is built applying to the operator and its arguments all
-available operator interpretation functions in turn.
-
-Operator interpretation functions could be added using the
-\texttt{interpretation} statement. For example, among the first line of the
-script \FILE{matita/library/logic/equality.ma} from the \MATITA{} standard
-library we read:
-
-\begin{grafite}
-interpretation "leibnitz's equality"
- 'eq x y =
-   (cic:/matita/logic/equality/eq.ind#xpointer(1/1) _ x y).
-\end{grafite}
-
-Evaluating it in \MATITA{} will add an operator interpretation function for the
-binary operator \texttt{eq} which expands to the CIC term on the right hand side
-of the statement. That CIC term can be written using only built-in concrete
-syntax, can contain no ambiguity source; still, it can refer to operator
-arguments bound on the left hand side and can contain implicit terms (denoted
-with \texttt{\_}) which will be expanded to fresh metavariables. The latter
-feature is used in the example above for the first argument of Leibniz's
-polymorhpic equality.
-
-\subsubsection{Disambiguation algorithm}
-
-\NOTE{assumo\\
-      che si sia\\
-      gia' parlato\\
-      di refine}
-
+\MATITA{} tacticals syntax is reported in table \ref{tab:tacsyn}.
+While one whould expect to find structured constructs like 
+$\verb+do+~n~\NT{tactic}$ the syntax allows pieces of tacticals to be written.
+This is essential for base idea behind matita tacticals: step-by-step execution.
  
-A \emph{disambiguation algorithm} takes as input a content level term and return
-a fully determined CIC term. The key observation on which a disambiguation
-algorithm is based is that given a content level term with more than one sources
-of ambiguity, not all possible combination of interpretation lead to a typable
-CIC term. In the term of Ex.~\ref{ex:disambiguation} for instance the
-interpretation of \texttt{ln} as a function from \IR to \IR and the
-interpretation of \texttt{1} as the Peano number $1$ can't coexists. The notion
-of ``can't coexists'' in the disambiguation of \MATITA{} is inherited from the
-refiner described in Sect.~\ref{sec:metavariables}: as long as
-$\mathit{refine}(c)\neq\bot$, the combination of interpretation which led to $c$
-can coexists.
-
-The \emph{naive disambiguation algorithm} takes as input a content level term
-$t$ and proceeds as follows:
-
-\begin{enumerate}
-
- \item Create disambiguation domains $\{D_i | i\in\mathit{Dom}(t)\}$, where
-  $\mathit{Dom}(t)$ is the set of ambiguity sources of $t$. Each $D_i$ is a set
-  of CIC terms and can be built as described above.
-
- \item Let $\Phi = \{\phi_i | {i\in\mathit{Dom}(t)},\phi_i\in D_i\}$ be an
-  interpretation for $t$. Given $t$ and an interpretation $\Phi$, a CIC term is
-  fully determined. Iterate over all possible interpretations of $t$ and refine
-  the corresponding CIC terms, keep only interpretations which lead to CIC terms
-  $c$ s.t. $\mathit{refine}(c)\neq\bot$ (i.e. interpretations that determine
-  typable terms).
-
- \item Let $n$ be the number of interpretations who survived step 2. If $n=0$
-  signal a type error. If $n=1$ we have found exactly one CIC term corresponding
-  to $t$, returns it as output of the disambiguation phase. If $n>1$ we have
-  found many different CIC terms which can correspond to the content level term,
-  let the user choose one of the $n$ interpretations and returns the
-  corresponding term.
-
-\end{enumerate}
-
-The above algorithm is highly inefficient since the number of possible
-interpretations $\Phi$ grows exponentially with the number of ambiguity sources.
-The actual algorithm used in \MATITA{} is far more efficient being, in the
-average case, linear in the number of ambiguity sources.
-
-\TODO{FINQUI}
-
-The efficient algorithm can be applied if the logic can be extended with
-metavariables and a refiner can be implemented. This is the case for CIC and
-several other logics.
-\emph{Metavariables}~\cite{munoz} are typed, non linear placeholders that can
-occur in terms; $?_i$ usually denotes the $i$-th metavariable, while $?$ denotes
-a freshly created metavariable. A \emph{refiner}~\cite{McBride} is a
-function whose input is a term with placeholders and whose output is either a
-new term obtained instantiating some placeholder or $\epsilon$, meaning that no
-well typed instantiation could be found for the placeholders occurring in
-the term (type error).
-
-The efficient algorithm starts with an interpretation $\Phi_0 = \{\phi_i |
-\phi_i = ?, i\in\mathit{Dom}(t)\}$,
-which associates a fresh metavariable to each
-source of ambiguity. Then it iterates refining the current CIC term (i.e. the
-term obtained interpreting $t$ with $\Phi_i$). If the refinement succeeds the
-next interpretation $\Phi_{i+1}$ will be created \emph{making a choice}, that is
-replacing a placeholder with one of the possible choice from the corresponding
-disambiguation domain. The placeholder to be replaced is chosen following a
-preorder visit of the ambiguous term. If the refinement fails the current set of
-choices cannot lead to a well-typed term and backtracking is attempted.
-Once an unambiguous correct interpretation is found (i.e. $\Phi_i$ does no
-longer contain any placeholder), backtracking is attempted
-anyway to find the other correct interpretations.
-
-The intuition which explain why this algorithm is more efficient is that as soon
-as a term containing placeholders is not typable, no further instantiation of
-its placeholders could lead to a typable term. For example, during the
-disambiguation of user input \texttt{\TEXMACRO{forall} x. x*0 = 0}, an
-interpretation $\Phi_i$ is encountered which associates $?$ to the instance
-of \texttt{0} on the right, the real number $0$ to the instance of \texttt{0} on
-the left, and the multiplication over natural numbers (\texttt{mult} for short)
-to \texttt{*}. The refiner will fail, since \texttt{mult} require a natural
-argument, and no further instantiation of the placeholder will be tried.
-
-If, at the end of the disambiguation, more than one possible interpretations are
-possible, the user will be asked to choose the intended one (see
-Fig.~\ref{fig:disambiguation}).
-
-\begin{figure}[htb]
-%   \centerline{\includegraphics[width=0.9\textwidth]{disambiguation-1}}
-  \caption{\label{fig:disambiguation} Disambiguation: interpretation choice}
-\end{figure}
+\subsubsection{\MATITA{} Tinycals}
+The low-level tacticals implementation of \MATITA{} allows a step-by-step execution of a tactical, that substantially means that a $\NT{block\_kind}$ is not executed as an atomic operation. This has two major benefits for the user, even being a so simple idea:
+\begin{description}
+\item[Proof structuring] 
+  is much easyer. Consider for example a proof by induction, and imagine you are using classical tacticals. After applying the
+  induction principle, with one step tacticals, you have to choose: structure
+  the proof or not. If you decide for the former you have to branch with
+  \verb+[+ and write tactics for all the cases separated by \verb+|+ and the
+  close the tactical with
+  \verb+]+. You can replace most of the cases by the identity tactic just to
+  concentrate only on the first goal, but you will have to go one step back and
+  one further every time you add something inside the tactical. And if you are
+  boared of doing so, you will finish in giving up structuring the proof and
+  write a plain list of tactics.\\
+  With step-by-step tacticals you can apply the induction principle, and just
+  open the branching tactical \verb+[+. Then you can interact with the system
+  reaching a proof of the first case, without having to specify the whole
+  branching tactical. When you have proved all the induction cases, you close
+  the branching tactical with \verb+]+ and you are done with a structured proof.
+\item[Rereading]
+  is possible. Going on step by step shows exactly what is going on.
+  Consider again a proof by induction, that starts applying the induction
+  principle and suddenly baranches with a \verb+[+. This clearly separates all
+  the induction cases, but if the square brackets content is executed in one
+  single step you completely loose the possibility of rereading it. Again,
+  executing step-by-step is the way you whould like to review the
+  demonstration. Remember tha understandig the proof from the script is not
+  easy, and only the execution of tactics (and the resulting transformed goal)
+  gives you the feeling of what is goning on.
+\end{description}
  
-Details of the disambiguation algorithm of \WHELP{} can
-be found in~\cite{disambiguation}, where an equivalent algorithm
-that avoids backtracking is also presented.
  
-\subsection{notazione}
-\label{sec:notation}
-\ASSIGNEDTO{zack}
  
-\subsection{libreria tutta visibile}
+\subsection{named variable e disambiguazione lazy}
  \ASSIGNEDTO{csc}
  
-\subsection{ricerca e indicizzazione}
-\label{sec:metadata}
-\ASSIGNEDTO{andrea}
-
-\subsection{auto}
-\ASSIGNEDTO{andrea}
-
-\subsection{xml / gestione della libreria}
-\ASSIGNEDTO{gares}
-
-\subsection{named variable}
+\subsection{metavariabili}
+\label{sec:metavariables}
  \ASSIGNEDTO{csc}
  
-\subsection{assenza di proof tree / resa in linguaggio naturale}
-\ASSIGNEDTO{andrea}
+\begin{verbatim}
  
-\subsection{selezione semantica, cut paste, hyperlink}
-\ASSIGNEDTO{zack}
-
-\subsection{sostituzioni esplicite vs moduli}
-\ASSIGNEDTO{csc}
+\end{verbatim}
  
  \section{Drawbacks, missing, \dots}
  
@@ -800,14 +1097,17 @@ that avoids backtracking is also presented.
  \subsection{localizzazione errori}
  \ASSIGNEDTO{}
  
-\textbf{Acknowledgements}
+\acknowledgements
  We would like to thank all the students that during the past
  five years collaborated in the \HELM{} project and contributed to 
  the development of Matita, and in particular
  A.Griggio, F.Guidi, P. Di Lena, L.Padovani, I.Schena, M.Selmi, 
  V.Tamburrelli.
  
+\theendnotes
+
  \bibliography{matita}
  
+
  \end{document}