helm/papers/matita/matita.tex

   1 \documentclass{kluwer}
   2 \usepackage{color}
   3 \usepackage{graphicx}
   4 % \usepackage{amssymb,amsmath}
   5 \usepackage{hyperref}
   6 % \usepackage{picins}
   7 \usepackage{color}
   8 \usepackage{fancyvrb}
   9 \usepackage[show]{ed}
  10
  11 \definecolor{gray}{gray}{0.85}
  12 %\newcommand{\logo}[3]{
  13 %\parpic(0cm,0cm)(#2,#3)[l]{\includegraphics[width=#1]{whelp-bw}}
  14 %}
  15
  16 \newcommand{\AUTO}{\textsc{Auto}}
  17 \newcommand{\COQ}{Coq}
  18 \newcommand{\ELIM}{\textsc{Elim}}
  19 \newcommand{\HELM}{Helm}
  20 \newcommand{\HINT}{\textsc{Hint}}
  21 \newcommand{\IN}{\ensuremath{\dN}}
  22 \newcommand{\INSTANCE}{\textsc{Instance}}
  23 \newcommand{\IR}{\ensuremath{\dR}}
  24 \newcommand{\IZ}{\ensuremath{\dZ}}
  25 \newcommand{\LIBXSLT}{LibXSLT}
  26 \newcommand{\LOCATE}{\textsc{Locate}}
  27 \newcommand{\MATCH}{\textsc{Match}}
  28 \newcommand{\MATITA}{Matita}
  29 \newcommand{\METAHEADING}{Symbol & Position \\ \hline\hline}
  30 \newcommand{\MOWGLI}{MoWGLI}
  31 \newcommand{\NAT}{\ensuremath{\mathit{nat}}}
  32 \newcommand{\NATIND}{\mathit{nat\_ind}}
  33 \newcommand{\NUPRL}{NuPRL}
  34 \newcommand{\OCAML}{OCaml}
  35 \newcommand{\PROP}{\mathit{Prop}}
  36 \newcommand{\REF}[3]{\ensuremath{\mathit{Ref}_{#1}(#2,#3)}}
  37 \newcommand{\TEXMACRO}[1]{\texttt{\char92 #1}}
  38 \newcommand{\UWOBO}{UWOBO}
  39 \newcommand{\WHELP}{Whelp}
  40 \newcommand{\DOT}{\ensuremath{\mbox{\textbf{.}}}}
  41 \newcommand{\SEMICOLON}{\ensuremath{\mbox{\textbf{;}}}}
  42 \newcommand{\BRANCH}{\ensuremath{\mbox{\textbf{[}}}}
  43 \newcommand{\SHIFT}{\ensuremath{\mbox{\textbf{\textbar}}}}
  44 \newcommand{\POS}[1]{\ensuremath{#1\mbox{\textbf{:}}}}
  45 \newcommand{\MERGE}{\ensuremath{\mbox{\textbf{]}}}}
  46 \newcommand{\FOCUS}[1]{\ensuremath{\mathtt{focus}~#1}}
  47 \newcommand{\UNFOCUS}{\ensuremath{\mathtt{unfocus}}}
  48 \newcommand{\SKIP}{\MATHTT{skip}}
  49 \newcommand{\TACTIC}[1]{\ensuremath{\mathtt{tactic}~#1}}
  50
  51 \definecolor{gray}{gray}{0.85} % 1 -> white; 0 -> black
  52 \newcommand{\NT}[1]{\langle\mathit{#1}\rangle}
  53 \newcommand{\URI}[1]{\texttt{#1}}
  54
  55 %{\end{SaveVerbatim}\setlength{\fboxrule}{.5mm}\setlength{\fboxsep}{2mm}%
  56 \newenvironment{grafite}{\VerbatimEnvironment
  57  \begin{SaveVerbatim}{boxtmp}}%
  58  {\end{SaveVerbatim}\setlength{\fboxsep}{3mm}%
  59   \begin{center}
  60    \fcolorbox{black}{gray}{\BUseVerbatim[boxwidth=0.9\linewidth]{boxtmp}}
  61   \end{center}}
  62
  63 \newcounter{example}
  64 \newenvironment{example}{\stepcounter{example}\vspace{0.5em}\noindent\emph{Example} \arabic{example}.}
  65  {}
  66 \newcommand{\ASSIGNEDTO}[1]{\textbf{Assigned to:} #1}
  67 \newcommand{\FILE}[1]{\texttt{#1}}
  68 % \newcommand{\NOTE}[1]{\ifodd \arabic{page} \else \hspace{-2cm}\fi\ednote{#1}}
  69 \newcommand{\NOTE}[1]{\ednote{#1}{foo}}
  70 \newcommand{\TODO}[1]{\textbf{TODO: #1}}
  71
  72 \newsavebox{\tmpxyz}
  73 \newcommand{\sequent}[2]{
  74   \savebox{\tmpxyz}[0.9\linewidth]{
  75     \begin{minipage}{0.9\linewidth}
  76       \ensuremath{#1} \\
  77       \rule{3cm}{0.03cm}\\
  78       \ensuremath{#2}
  79     \end{minipage}}\setlength{\fboxsep}{3mm}%
  80   \begin{center}
  81    \fcolorbox{black}{gray}{\usebox{\tmpxyz}}
  82   \end{center}}
  83
  84 \bibliographystyle{plain}
  85
  86 \begin{document}
  87
  88 \begin{opening}
  89
  90  \title{The \MATITA{} Proof Assistant}
  91
  92 \author{Andrea \surname{Asperti} \email{asperti@cs.unibo.it}}
  93 \author{Claudio \surname{Sacerdoti Coen} \email{sacerdot@cs.unibo.it}}
  94 \author{Enrico \surname{Tassi} \email{tassi@cs.unibo.it}}
  95 \author{Stefano \surname{Zacchiroli} \email{zacchiro@cs.unibo.it}}
  96 \institute{Department of Computer Science, University of Bologna\\
  97  Mura Anteo Zamboni, 7 --- 40127 Bologna, ITALY}
  98
  99 \runningtitle{The Matita proof assistant}
 100 \runningauthor{Asperti, Sacerdoti Coen, Tassi, Zacchiroli}
 101
 102 % \date{data}
 103
 104 \begin{motto}
 105 ``We are nearly bug-free'' -- \emph{CSC, Oct 2005}
 106 \end{motto}
 107
 108 \begin{abstract}
 109  abstract qui
 110 \end{abstract}
 111
 112 \keywords{Proof Assistant, Mathematical Knowledge Management, XML, Authoring,
 113 Digital Libraries}
 114
 115 \end{opening}
 116
 117 \section{Introduction}
 118 \label{sec:intro}
 119 {\em Matita} is the proof assistant under development by the \HELM{} team
 120 \cite{mkm-helm} at the University of Bologna, under the direction of
 121 Prof.~Asperti.
 122 The origin of the system goes back to 1999. At the time we were mostly
 123 interested to develop tools and techniques to enhance the accessibility
 124 via web of formal libraries of mathematics. Due to its dimension, the
 125 library of the \COQ{} proof assistant (of the order of 35'000 theorems)
 126 was choosed as a privileged test bench for our work, although experiments
 127 have been also conducted with other systems, and notably with \NUPRL{}.
 128 The work, mostly performed in the framework of the recently concluded
 129 European project IST-33562 \MOWGLI{}~\cite{pechino}, mainly consisted in the
 130 following teps:
 131 \begin{itemize}
 132 \item exporting the information from the internal representation of
 133  \COQ{} to a system and platform independent format. Since XML was at the
 134 time an emerging standard, we naturally adopted this technology, fostering
 135 a content-based architecture for future system, where the documents
 136 of the library were the the main components around which everything else
 137 has to be build;
 138 \item developing indexing and searching techniques supporting semantic
 139  queries to the library; these efforts gave birth to our \WHELP{}
 140 search engine, described in~\cite{whelp};
 141 \item developing languages and tools for a high-quality notational
 142 rendering of mathematical information; in particular, we have been
 143 active in the MathML Working group since 1999, and developed inside
 144 \HELM{} a MathML-compliant widget for the GTK graphical environment
 145 which can be integrated in any application.
 146 \end{itemize}
 147 The exportation issue, extensively discussed in \cite{exportation-module},
 148 has several major implications worth to be discussed.
 149
 150 The first
 151 point concerns the kind of content information to be exported. In a
 152 proof assistant like \COQ{}, proofs are represented in at least three clearly
 153 distinguishable formats: \emph{scripts} (i.e. sequences of commands issued by the
 154 user to the system during an interactive session of proof), \emph{proof objects}
 155 (which is the low-level representation of proofs in the form of
 156 lambda-terms readable to and checked by kernel) and \emph{proof-trees} (which
 157 is a kind of intermediate representation, vaguely inspired by a sequent
 158 like notation, that inherits most of the defects but essentially
 159 none of the advantages of the previous representations).
 160 Partially related to this problem, there is the
 161 issue of the {\em granularity} of the library: scripts usually comprise
 162 small developments with many definitions and theorems, while
 163 proof objects correspond to individual mathematical items.
 164
 165 In our case, the choice of the content encoding was eventually dictated
 166 by the methodological assumption of offering the information in a
 167 stable and system-independent format. The language of scripts is too
 168 oriented to \COQ, and it changes too rapidly to be of any interest
 169 to third parties. On the other side, the language of proof objects
 170 merely depend on
 171 the logical framework (the Calculus of Inductive Constructions, in
 172 the case of \COQ), is grammatically simple, semantically clear and,
 173 especially, is very stable (as kernels of proof assistants
 174 often are).
 175 So the granularity of the library is at the level of individual
 176 objects, that also justifies from another point of view the need
 177 for efficient searching techniques for retrieving individual
 178 logical items from the repository.
 179
 180 The main (possibly only) problem with proof objects is that they are
 181 difficult to read and do not directly correspond to what the user typed
 182 in. An analogy frequently made in the proof assistant community is that of
 183 comparing the vernacular language of scripts to a high level source language
 184 and lambda terms to the assembly language they are compiled in. We do not
 185 share this view and prefer to look at scripts as an imperative language,
 186 and to lambda terms as their denotational semantics; still, however,
 187 denotational semantics is possibly more formal but surely not more readable
 188 than the imperative source.
 189
 190 For all the previous reasons, a huge amount of work inside \MOWGLI{} has
 191 been devoted to automatic reconstruction of proofs in natural language
 192 from lambda terms. Since lambda terms are in close connection
 193 with natural deduction
 194 (that is still the most natural logical language discovered so far)
 195 the work is not hopeless as it may seem, especially if rendering
 196 is combined, as in our case, with dynamic features supporting
 197 in-line expansions or contractions of subproofs. The final
 198 rendering is probably not entirely satisfactory (see \cite{ida} for a
 199 discussion), but surely
 200 readable (the actual quality largely depends by the way the lambda
 201 term is written).
 202
 203 Summing up, we already disposed of the following tools/techniques:
 204 \begin{itemize}
 205 \item XML specifications for the Calculus of Inductive Constructions,
 206 with tools for parsing and saving mathematical objects in such a format;
 207 \item metadata specifications and tools for indexing and querying the
 208 XML knowledge base;
 209 \item a proof checker (i.e. the {\em kernel} of a proof assistant),
 210  implemented to check that we exported form the \COQ{} library all the
 211 logically relevant content;
 212 \item a sophisticated parser (used by the search engine), able to deal
 213 with potentially ambiguous and incomplete information, typical of the
 214 mathematical notation \cite{};
 215 \item a {\em refiner}, i.e. a type inference system, based on complex
 216 existential variables, used by the disambiguating parser;
 217 \item complex transformation algorithms for proof rendering in natural
 218 language;
 219 \item an innovative rendering widget, supporting high-quality bidimensional
 220 rendering, and semantic selection, i.e. the possibility to select semantically
 221 meaningful rendering expressions, and to past the respective content into
 222 a different text area.
 223 \NOTE{il widget non ha sel semantica}
 224 \end{itemize}
 225 Starting from all this, the further step of developing our own
 226 proof assistant was too
 227 small and too tempting to be neglected. Essentially, we ``just'' had to
 228 add an authoring interface, and a set of functionalities for the
 229 overall management of the library, integrating everything into a
 230 single system. \MATITA{} is the result of this effort.
 231
 232 At first sight, \MATITA{} looks as (and partly is) a \COQ{} clone. This is
 233 more the effect of the circumstances of its creation described
 234 above than the result of a deliberate design. In particular, we
 235 (essentially) share the same foundational dialect of \COQ{} (the
 236 Calculus of Inductive Constructions), the same implementative
 237 language (\OCAML{}), and the same (script based) authoring philosophy.
 238 However, as we shall see, the analogy essentially stops here.
 239
 240 In a sense; we like to think of \MATITA{} as the way \COQ{} would
 241 look like if entirely rewritten from scratch: just to give an
 242 idea, although \MATITA{} currently supports almost all functionalities of
 243 \COQ{}, it links 60'000 lins of \OCAML{} code, against ... of \COQ{} (and
 244 we are convinced that, starting from scratch again, we could furtherly
 245 reduce our code in sensible way).\NOTE{righe \COQ{}}
 246
 247 \begin{itemize}
 248  \item scelta del sistema fondazionale
 249  \item sistema indipendente (da Coq)
 250   \begin{itemize}
 251    \item possibilit\`a di sperimentare (soluzioni architetturali, logiche,
 252     implementative, \dots)
 253    \item compatibilit\`a con sistemi legacy
 254   \end{itemize}
 255 \end{itemize}
 256
 257 \section{\HELM{} library(??)}
 258
 259 \subsection{libreria tutta visibile}
 260 \ASSIGNEDTO{csc}
 261 \NOTE{assumo che si sia gia' parlato di approccio content-centrico}
 262 Our commitment to the content-centric view of the architecture of the system
 263 has important consequences on the user's experience and on the functionalities
 264 of several components of \MATITA. In the content-centric view the library
 265 of mathematical knowledge is an already existent and completely autonomous
 266 entity that we are allowed to exploit and augment using \MATITA. Thus, in
 267 principle, when the user starts to prove a new theorem she has complete
 268 visibility of the library and she can refer to every definition and lemma,
 269 also using the mathematical notation already developed. In a similar way,
 270 every form of automation of the system must be able to analyze and possibly
 271 exploit every notion in the library.
 272
 273 The benefits of this approach highly justify the non neglectable price to pay
 274 in the development of several components. We analyse now a few of the causes
 275 of this additional complexity.
 276
 277 \subsubsection{Ambiguity}
 278 A rich mathematical library includes equivalent definitions and representations
 279 of the same notion. Moreover, mathematical notation inside a rich library is
 280 surely highly overloaded. As a consequence every mathematical expression the
 281 user provides is highly ambiguous since all the definitions,
 282 representations and special notations are available at once to the user.
 283
 284 The usual solution to the problem, as adopted for instance in Coq, is to
 285 restrict the user's scope to just one interpretation for each definition,
 286 representation or notation. In this way much of the ambiguity is removed,
 287 burdening the user that must someway declare what is in scope and that must
 288 use special syntax when she needs to refer to something not in scope.
 289
 290 Even with this approach ambiguity cannot be completely removed since implicit
 291 coercions can be arbitrarily inserted by the system to ``change the type''
 292 of subterms that do not have the expected type. Usually implicit coercions
 293 are used to overcome the absence of subtyping that should mimic the subset
 294 relation found in set theory. For instance, the expression
 295 $\forall n \in nat. 2 * n * \pi \equiv_\pi 0$ is correct in set theory since
 296 the set of natural numbers is a subset of that of real numbers; the
 297 corresponding expression $\forall n:nat. 2*n*\pi \equiv_\pi 0$ is not well typed
 298 and requires the automatic insertion of the coercion $real_of_nat: nat \to R$
 299 either around both 2 and $n$ (to make both products be on real numbers) or
 300 around the product $2*n$. The usual approach consists in either rejecting the
 301 ambiguous term or arbitrarily choosing one of the interpretations. For instance,
 302 Coq rejects the declaration of coercions that have alternatives
 303 (i.e. already declared coercions with the same domain and codomain)
 304 or that are obtained composing other coercions in order to
 305 avoid making several terms highly ambiguous by choosing to insert any one of the
 306 alternative coercions. Coq also arbitrarily chooses how to insert coercions in
 307 terms to make them well typed when there is more than one possibility (as in
 308 the previous example).
 309
 310 The approach we are following is radically different. It consists in dealing
 311 with ambiguous expressions instead of avoiding them. As a last resource,
 312 when the system is unable to disambiguate the input, the user is interactively
 313 required to provide more information that is recorded to avoid asking the
 314 same question again in subsequent processing of the same input.
 315 More details on our approach can be found in \ref{sec:disambiguation}.
 316
 317 \subsubsection{Consistency}
 318 A large mathematical library is likely to be logically inconsistent.
 319 It may contain incompatible axioms or alternative conjectures and it may
 320 even use definitions in incompatible ways. To clarify this last point,
 321 consider two identical definitions of a set of elements of a given type
 322 (or of a category of objects of a given type). Let us call the two definitions
 323 $A-Set$ and $B-Set$ (or $A-Category$ and $B-Category$).
 324 It is perfectly legitimate to either form the $A-Set$ of every $B-Set$
 325 or the $B-Set$ of every $A-Set$ (the same for categories). This just corresponds
 326 to assuming that a $B-Set$ (respectively an $A-Set$) is a small set, whereas
 327 an $A-Set$ (respectively a $B-Set$) is a big set (possibly of small sets).
 328 However, if one part of the library assumes $A-Set$s to be the small ones
 329 and another part of the library assumes $B-Set$s to be the small ones, the
 330 library as a whole will be logically inconsistent.
 331
 332 Logical inconsistency has never been a problem in the daily work of a
 333 mathematician. The mathematician simply imposes himself a discipline to
 334 restrict himself to consistent subsets of the mathematical knowledge.
 335 However, in doing so he does not choose the subset in advance by forgetting
 336 the rest of his knowledge. On the contrary he may proceed with a sort of
 337 top-down strategy: he may always inspect or use part of his knowledge, but
 338 when he actually does so he should check recursively that inconsistencies are
 339 not exploited.
 340
 341 Contrarily to the mathematical practice, the usual tendency in the world of
 342 assisted automation is that of building a logical environment (a consistent
 343 subset of the library) in a bottom up way, checking the consistency of a
 344 new axiom or theorem as soon as it is added to the environment. No lemma
 345 or definition outside the environment can be used until it is added to the
 346 library after every notion it depends on. Moreover, very often the logical
 347 environment is the only part of the library that can be inspected,
 348 that we can search lemmas in and that can be exploited by automatic tactics.
 349
 350 Moving one by one notions from the library to the environment is a costly
 351 operation since it involves re-checking the correctness of the notion.
 352 As a consequence mathematical notions are packages into theories that must
 353 be added to the environment as a whole. However, the consistency problem is
 354 only raised at the level of theories: theories must be imported in a bottom
 355 up way and the system must check that no inconsistency arises.
 356
 357 The practice of limiting the scope on the library to the logical environment
 358 is contrary to our commitment of being able to fully exploit as much as possible
 359 of the library at any given time. To reconcile consistency and visibility
 360 we have departed from the traditional implementation of an environment
 361 allowing environments to be built on demand in a top-down way. The new
 362 implementation is based on a modified meta-theory that changes the way
 363 convertibility, type checking, unification and refinement judgements.
 364 The modified meta-theory is fully described in \cite{libraryenvironments}.
 365 Here we just remark how a strong commitment on the way the user interacts
 366 with the library has lead to modifications to the logical core of the proof
 367 assistant. This is evidence that breakthroughs in the user interfaces
 368 and in the way the user interacts with the tools and with the library could
 369 be achieved only by means of strong architectural changes.
 370
 371 \subsubsection{Accessibility}
 372 A large library that is completely in scope needs effective indexing and
 373 searching methods to make the user productive. Libraries of formal results
 374 are particularly critical since they hold a large percentage of technical
 375 lemmas that do not have a significative name and that must be retrieved
 376 using advanced methods based on matching, unification, generalization and
 377 instantiation.
 378
 379 The efficiency of searching inside the library becomes a critical operation
 380 when automatic tactics exploit the library during the proof search. In this
 381 scenario the tactics must retrieve a set of candidates for backward or
 382 forward reasoning in a few milliseconds.
 383
 384 The choice of several proof assistants is to use ad-hoc data structures,
 385 such as context trees, to index all the terms currently in scope. These
 386 data structures are expecially designed to quickly retrieve terms up
 387 to matching, instantiation and generalization. All these data structures
 388 try to maximize sharing of identical subterms so that matching can be
 389 reduced to a visit of the tree (or dag) that holds all the maximally shared
 390 terms together.
 391
 392 Since the terms to be retrieved (or at least their initial prefix)
 393 are stored (actually ``melted'') in the data structure, these data structures
 394 must collect all the terms in a single location. In other words, adopting
 395 such data structures means centralizing the library.
 396
 397 In the \MOWGLI{} project we have tried to follow an alternative approach
 398 that consists in keeping the library fully distributed and indexing it
 399 by means of spiders that collect metadata and store them in a database.
 400 The challenge is to be able to collect only a smaller as possible number
 401 of metadata that provide enough information to approximate the matching
 402 operation. A matching operation is then performed in two steps. The first
 403 step is a query to the remote search engine that stores the metadata in
 404 order to detect a (hopefully small) complete set of candidates that could
 405 match. Completeness means that no term that matches should be excluded from
 406 the set of candiates. The second step consists in retrieving from the
 407 distributed library all the candidates and attempt the actual matching.
 408
 409 In the last we years we have progressively improved this technique.
 410 Our achievements can be found in \cite{query1,query2,query3}.
 411
 412 The technique and tools already developed have been integrated in \MATITA{},
 413 that is able to contact a remote \WHELP{} search engine \cite{whelp} or that
 414 can be directly linked to the code of the \WHELP. In either case the database
 415 used to store the metadata can be local or remote.
 416
 417 Our current challenge consists in the exploitation of \WHELP{} inside of
 418 \MATITA. In particular we are developing a set of tactics, for instance
 419 based on paramodulation \cite{paramodulation}, that perform queries to \WHELP{}
 420 to restrict the scope on the library to a set of interesting candidates,
 421 greatly reducing the search space. Moreover, queries to \WHELP{} are performed
 422 during parsing of user provided terms to disambiguate them.
 423
 424 In Sect.~\ref{sec:metadata} we describe the technique adopted in \MATITA.
 425
 426 \subsubsection{Library management}
 427
 428
 429 \subsection{ricerca e indicizzazione}
 430 \label{sec:metadata}
 431 \ASSIGNEDTO{andrea}
 432
 433 \subsection{auto}
 434 \ASSIGNEDTO{andrea}
 435
 436 \subsection{sostituzioni esplicite vs moduli}
 437 \ASSIGNEDTO{csc}
 438
 439 \subsection{xml / gestione della libreria}
 440 \ASSIGNEDTO{gares}
 441
 442
 443 \section{User Interface (da cambiare)}
 444
 445 \subsection{assenza di proof tree / resa in linguaggio naturale}
 446 \ASSIGNEDTO{andrea}
 447
 448 \subsection{Disambiguation}
 449 \label{sec:disambiguation}
 450 \ASSIGNEDTO{zack}
 451
 452  \begin{table}
 453   \caption{\label{tab:termsyn} Concrete syntax of CIC terms: built-in
 454   notation\strut}
 455  \hrule
 456  \[
 457  \begin{array}{@{}rcll@{}}
 458    \NT{term} & ::= & & \mbox{\bf terms} \\
 459      &     & x & \mbox{(identifier)} \\
 460      &  |  & n & \mbox{(number)} \\
 461      &  |  & s & \mbox{(symbol)} \\
 462      &  |  & \mathrm{URI} & \mbox{(URI)} \\
 463      &  |  & \verb+_+ & \mbox{(implicit)}\TODO{sync} \\
 464      &  |  & \verb+?+n~[\verb+[+~\{\NT{subst}\}~\verb+]+] & \mbox{(meta)} \\
 465      &  |  & \verb+let+~\NT{ptname}~\verb+\def+~\NT{term}~\verb+in+~\NT{term} \\
 466      &  |  & \verb+let+~\NT{kind}~\NT{defs}~\verb+in+~\NT{term} \\
 467      &  |  & \NT{binder}~\{\NT{ptnames}\}^{+}~\verb+.+~\NT{term} \\
 468      &  |  & \NT{term}~\NT{term} & \mbox{(application)} \\
 469      &  |  & \verb+Prop+ \mid \verb+Set+ \mid \verb+Type+ \mid \verb+CProp+ & \mbox{(sort)} \\
 470      &  |  & \verb+match+~\NT{term}~ & \mbox{(pattern matching)} \\
 471      &     & ~ ~ [\verb+[+~\verb+in+~x~\verb+]+]
 472               ~ [\verb+[+~\verb+return+~\NT{term}~\verb+]+] \\
 473      &     & ~ ~ \verb+with [+~[\NT{rule}~\{\verb+|+~\NT{rule}\}]~\verb+]+ & \\
 474      &  |  & \verb+(+~\NT{term}~\verb+:+~\NT{term}~\verb+)+ & \mbox{(cast)} \\
 475      &  |  & \verb+(+~\NT{term}~\verb+)+ \\
 476    \NT{defs}  & ::= & & \mbox{\bf mutual definitions} \\
 477      &     & \NT{fun}~\{\verb+and+~\NT{fun}\} \\
 478    \NT{fun} & ::= & & \mbox{\bf functions} \\
 479      &     & \NT{arg}~\{\NT{ptnames}\}^{+}~[\verb+on+~x]~\verb+\def+~\NT{term} \\
 480    \NT{binder} & ::= & & \mbox{\bf binders} \\
 481      &     & \verb+\forall+ \mid \verb+\lambda+ \\
 482    \NT{arg} & ::= & & \mbox{\bf single argument} \\
 483      &     & \verb+_+ \mid x \\
 484    \NT{ptname} & ::= & & \mbox{\bf possibly typed name} \\
 485      &     & \NT{arg} \\
 486      &  |  & \verb+(+~\NT{arg}~\verb+:+~\NT{term}~\verb+)+ \\
 487    \NT{ptnames} & ::= & & \mbox{\bf bound variables} \\
 488      &     & \NT{arg} \\
 489      &  |  & \verb+(+~\NT{arg}~\{\verb+,+~\NT{arg}\}~[\verb+:+~\NT{term}]~\verb+)+ \\
 490    \NT{kind} & ::= & & \mbox{\bf induction kind} \\
 491      &     & \verb+rec+ \mid \verb+corec+ \\
 492    \NT{rule} & ::= & & \mbox{\bf rules} \\
 493      &     & x~\{\NT{ptname}\}~\verb+\Rightarrow+~\NT{term}
 494  \end{array}
 495  \]
 496  \hrule
 497  \end{table}
 498
 499
 500 \subsubsection{Term input}
 501
 502 The primary form of user interaction employed by \MATITA{} is textual script
 503 editing: the user modifies it and evaluate step by step its composing
 504 \emph{statements}. Examples of statements are inductive type definitions,
 505 theorem declarations, LCF-style tacticals, and macros (e.g. \texttt{Check} can
 506 be used to ask the system to refine a given term and pretty print the result).
 507 Since many statements refer to terms of the underlying calculus, \MATITA{} needs
 508 a concrete syntax able to encode terms of the Calculus of Inductive
 509 Constructions.
 510
 511 Two of the requirements in the design of such a syntax are apparently in
 512 contrast:
 513 \begin{enumerate}
 514  \item the syntax should be as close as possible to common mathematical practice
 515   and implement widespread mathematical notations;
 516  \item each term described by the syntax should be non-ambiguous meaning that it
 517   should exists a function which associates to it a CIC term.
 518 \end{enumerate}
 519
 520 These two requirements are addressed in \MATITA{} by the mean of two mechanisms
 521 which work together: \emph{term disambiguation} and \emph{extensible notation}.
 522 Their interaction is visible in the architecture of the \MATITA{} input phase,
 523 depicted in Fig.~\ref{fig:inputphase}. The architecture is articulated as a
 524 pipline of three levels: the concrete syntax level (level 0) is the one the user
 525 has to deal with when inserting CIC terms; the abstract syntax level (level 2)
 526 is an internal representation which intuitively encodes mathematical formulae at
 527 the content level~\cite{adams}\cite{mkm-structure}; the last level is that of
 528 CIC terms.
 529
 530 \begin{figure}[ht]
 531  \begin{center}
 532   \includegraphics[width=0.9\textwidth]{input_phase}
 533   \caption{\MATITA{} input phase}
 534  \end{center}
 535  \label{fig:inputphase}
 536 \end{figure}
 537
 538 Requirement (1) is addressed by a built-in concrete syntax for terms, described
 539 in Tab.~\ref{tab:termsyn}, and the extensible notation mechanisms which offers a
 540 way for extending available mathematical notations. Extensible notation, which
 541 is also in charge of providing a parsing function mapping concrete syntax terms
 542 to content level terms, is described in Sect.~\ref{sec:notation}.  Requirement
 543 (2) is addressed by the conjunct action of that parsing function and
 544 disambiguation which provides a function from content level terms to CIC terms.
 545
 546 \subsubsection{Sources of ambiguity}
 547
 548 The translation from content level terms to CIC terms is not straightforward
 549 because some nodes of the content encoding admit more that one CIC encoding,
 550 invalidating requirement (2).
 551
 552 \begin{example}
 553  \label{ex:disambiguation}
 554
 555  Consider the term at the concrete syntax level \texttt{\TEXMACRO{forall} x. x +
 556  ln 1 = x} of Fig.~\ref{fig:inputphase}(a), it can be the type of a lemma the
 557  user may want to prove. Assuming that both \texttt{+} and \texttt{=} are parsed
 558  as infix operators, all the following questions are legitimate and must be
 559  answered before obtaining a CIC term from its content level encoding
 560  (Fig.~\ref{fig:inputphase}(b)):
 561
 562  \begin{enumerate}
 563
 564   \item Since \texttt{ln} is an unbound identifier, which CIC constants does it
 565    represent? Many different theorems in the library may share its (rather
 566    short) name \dots
 567
 568   \item Which kind of number (\IN, \IR, \dots) the \texttt{1} literal stand for?
 569    Which encoding is used in CIC to represent it? E.g., assuming $1\in\IN$, is
 570    it an unary or a binary encoding?
 571
 572   \item Which kind of equality the ``='' node represents? Is it Leibniz's
 573    polymorhpic equality? Is it a decidable equality over \IN, \IR, \dots?
 574
 575  \end{enumerate}
 576
 577 \end{example}
 578
 579 In \MATITA, three \emph{sources of ambiguity} are admitted for content level
 580 terms: unbound identifiers, literal numbers, and operators. Each instance of
 581 ambiguity sources (ambiguous entity) occuring in a content level term is
 582 associated to a \emph{disambiguation domain}. Intuitively a disambiguation
 583 domain is a set of CIC terms which may be replaced for an ambiguous entity
 584 during disambiguation. Each item of the domain is said to be an
 585 \emph{interpretation} for the ambiguous entity.
 586
 587 \emph{Unbound identifiers} (question 1) are ambiguous entities since the
 588 namespace of CIC objects is not flat and the same identifier may denote many
 589 ofthem. For example the short name \texttt{plus\_assoc} in the \HELM{} library
 590 is shared by three different theorems stating the associative property of
 591 different additions.  This kind of ambiguity is avoidable if the user is willing
 592 to use long names (in form of URIs in the \texttt{cic://} scheme) in the
 593 concrete syntax, with the obvious drawbacks of obtaining long and unreadable
 594 terms.
 595
 596 Given an unbound identifier, the corresponding disambiguation domain is computed
 597 querying the library for all constants, inductive types, and inductive type
 598 constructors having it as their short name (see the \LOCATE{} query in
 599 Sect.~\ref{sec:metadata}).
 600
 601 \emph{Literal numbers} (question 2) are ambiguous entities as well since
 602 different kinds of numbers can be encoded in CIC (\IN, \IR, \IZ, \dots) using
 603 different encodings. Considering the restricted example of natural numbers we
 604 can for instance encode them in CIC using inductive datatypes with a number of
 605 constructor equal to the encoding base plus 1, obtaining one encoding for each
 606 base.
 607
 608 For each possible way of mapping a literal number to a CIC term, \MATITA{} is
 609 aware of a \emph{number intepretation function} which, when applied to the
 610 natural number denoted by the literal\footnote{at the moment only literal
 611 natural number are supported in the concrete syntax} returns a corresponding CIC
 612 term. The disambiguation domain for a given literal number is built applying to
 613 the literal all available number interpretation functions in turn.
 614
 615 Number interpretation functions can be defined in OCaml or directly using
 616 \TODO{notazione per i numeri}.
 617
 618 \emph{Operators} (question 3) are intuitively head of applications, as such they
 619 are always applied to a (possiblt empty) sequence of arguments. Their ambiguity
 620 is a need since it is often the case that some notation is used in an overloaded
 621 fashion to hide the use of different CIC constants which encodes similar
 622 concepts. For example, in the standard library of \MATITA{} the infix \texttt{+}
 623 notation is available building a binary \texttt{Op(+)} node, whose
 624 disambiguation domain may refer to different constants like the addition over
 625 natural numbers \URI{cic:/matita/nat/plus/plus.con} or that over real numbers of
 626 the \COQ{} standard library \URI{cic:/Coq/Reals/Rdefinitions/Rplus.con}.
 627
 628 For each possible way of mapping an operator application to a CIC term,
 629 \MATITA{} knows an \emph{operator interpretation function} which, when applied
 630 to an operator and its arguments, returns a CIC term. The disambiguation domain
 631 for a given operator is built applying to the operator and its arguments all
 632 available operator interpretation functions in turn.
 633
 634 Operator interpretation functions could be added using the
 635 \texttt{interpretation} statement. For example, among the first line of the
 636 script \FILE{matita/library/logic/equality.ma} from the \MATITA{} standard
 637 library we read:
 638
 639 \begin{grafite}
 640 interpretation "leibnitz's equality"
 641  'eq x y =
 642    (cic:/matita/logic/equality/eq.ind#xpointer(1/1) _ x y).
 643 \end{grafite}
 644
 645 Evaluating it in \MATITA{} will add an operator interpretation function for the
 646 binary operator \texttt{eq} which expands to the CIC term on the right hand side
 647 of the statement. That CIC term can be written using only built-in concrete
 648 syntax, can contain no ambiguity source; still, it can refer to operator
 649 arguments bound on the left hand side and can contain implicit terms (denoted
 650 with \texttt{\_}) which will be expanded to fresh metavariables. The latter
 651 feature is used in the example above for the first argument of Leibniz's
 652 polymorhpic equality.
 653
 654 \subsubsection{Disambiguation algorithm}
 655
 656 A \emph{disambiguation algorithm} takes as input a content level term and return
 657 a fully determined CIC term. The key observation on which a disambiguation
 658 algorithm is based is that given a content level term with more than one sources
 659 of ambiguity, not all possible combination of interpretation lead to a typable
 660 CIC term. In the term of Ex.~\ref{ex:disambiguation} for instance the
 661 interpretation of \texttt{ln} as a function from \IR to \IR and the
 662 interpretation of \texttt{1} as the Peano number $1$ can't coexists. The notion
 663 of ``can't coexists'' in the disambiguation of \MATITA{} is defined on top of
 664 the \emph{refiner} for CIC terms described in~\cite{csc-phd}.
 665
 666 Briefly, a refiner is a function whose input is an \emph{incomplete CIC term}
 667 $t_1$ --- i.e. a term where metavariables occur (Sect.~\ref{sec:metavariables}
 668 --- and whose output is either:\NOTE{descrizione sommaria del refiner, pu\'o
 669 essere spostata altrove}
 670
 671 \begin{enumerate}
 672
 673  \item an incomplete CIC term $t_2$ where $t_2$ is a well-typed term obtained
 674   assigning a type to each metavariable in $t_1$ (in case of dependent types,
 675   instantiation of some of the metavariable occurring in $t_1$ may occur as
 676   well);
 677
 678  \item $\epsilon$, meaning that no well-typed term could be obtained via
 679   assignment of type to metavariable in $t_1$ and their instantiation;
 680
 681  \item $\bot$, meaning that the refiner is unable to decide whether of the two
 682   cases above apply (refinement is semi-decidable).
 683
 684 \end{enumerate}
 685
 686 On top of a CIC refiner \MATITA{} implement an efficient disambiguation
 687 algorithm, which is outlined below. It takes as input a content level term $c$
 688 and proceeds as follows:
 689
 690 \begin{enumerate}
 691
 692  \item Create disambiguation domains $\{D_i | i\in\mathit{Dom}(c)\}$, where
 693   $\mathit{Dom}(c)$ is the set of ambiguity sources of $c$. Each $D_i$ is a set
 694   of CIC terms and can be built as described above.
 695
 696  \item An \emph{interpretation} $\Phi$ for $c$ is a map associating an
 697   incomplete CIC term to each ambiguity source of $c$. Given $c$ and one of its
 698   interpretations an incomplete CIC term is fully determined replacing each
 699   ambiguity source of $c$ with its mapping in the interpretation and injecting
 700   the remaining structure of the content level in the CIC level (e.g. replacing
 701   the application of the content level with the application of the CIC level).
 702   This operation is informally called ``interpreting $c$ with $\Phi$''.
 703
 704   Create an initial interpretation $\Phi_0 = \{\phi_i | \phi_i = \_,
 705   i\in\mathit{Dom}(c)\}$, which associates a fresh metavariable to each source
 706   of ambiguity of $c$. During this step, implicit terms are expanded to fresh
 707   metavariables as well.
 708
 709  \item Refine the current incomplete CIC term (i.e.  the term obtained
 710   interpreting $t$ with $\Phi_i$).
 711
 712   If the refinement succeeds or is undetermined the next interpretation
 713   $\Phi_{i+1}$ will be created \emph{making a choice}, that is replacing in the
 714   current interpretation one of the metavariable appearing in $\Phi_i$ with one
 715   of the possible choice from the corresponding disambiguation domain. The
 716   metavariable to be replaced is chosen following a preorder visit of the
 717   ambiguous term. Then, step 3 is attempted again with the new interpretation.
 718
 719   If the refinement fails the current set of choices cannot lead to a well-typed
 720   term and backtracking of the current interpretation is attempted.
 721
 722  \item Once an unambiguous correct interpretation is found (i.e. $\Phi_i$ does
 723   no longer contain any placeholder), backtracking is attempted anyway to find
 724   the other correct interpretations.
 725
 726  \item Let $n$ be the number of interpretations who survived step 4. If $n=0$
 727   signal a type error. If $n=1$ we have found exactly one (incomplete) CIC term
 728   corresponding to the content level term $c$, returns it as output of the
 729   disambiguation phase. If $n>1$ we have found many different (incomplete) CIC
 730   terms which can correspond to the content level term, let the user choose one
 731   of the $n$ interpretations and returns the corresponding term.
 732
 733 \end{enumerate}
 734
 735 The efficiency of this algorithm resides in the fact that as soon as an
 736 incomplete CIC term is not typable, no further instantiation of the
 737 metavariables of the corresponding interpretation is attemped.
 738 % For example, during the disambiguation of the user input
 739 % \texttt{\TEXMACRO{forall} x. x*0 = 0}, an interpretation $\Phi_i$ is
 740 % encountered which associates $?$ to the instance of \texttt{0} on the right,
 741 % the real number $0$ to the instance of \texttt{0} on the left, and the
 742 % multiplication over natural numbers (\texttt{mult} for short) to \texttt{*}.
 743 % The refiner will fail, since \texttt{mult} require a natural argument, and no
 744 % further instantiation of the placeholder will be tried.
 745
 746 Details of the disambiguation algorithm along with an analysis of its complexity
 747 can be found in~\cite{disambiguation}, where a formulation without backtracking
 748 (corresponding to the actual \MATITA{} implementation) is also presented.
 749
 750 \subsubsection{Disambiguation stages}
 751
 752 \subsection{notation}
 753 \label{sec:notation}
 754 \ASSIGNEDTO{zack}
 755
 756 Mathematical notation plays a fundamental role in mathematical practice: it
 757 helps expressing in a concise symbolic fashion concepts of arbitrary complexity.
 758 Its use in proof assistants like \MATITA{} is no exception. Formal mathematics
 759 indeed often impose to encode mathematical concepts at a very high level of
 760 details (e.g. Peano numbers, implicit arguments) having a restricted toolbox of
 761 syntactic constructions in the calculus.
 762
 763 Consider for example one of the point reached while proving the distributivity
 764 of times over minus on natural numbers included in the \MATITA{} standards
 765 library. (Part of) the reached sequent can be seen in \MATITA{} both using the
 766 notation for various arithmetical and relational operator or without using it.
 767 The sequent rendered without using notation would be as follows:
 768 \sequent{
 769 \mathtt{H}: \mathtt{le} z y\\
 770 \mathtt{Hcut}: \mathtt{eq} \mathtt{nat} (\mathtt{plus} (\mathtt{times} x (\mathtt{minus}
 771 y z)) (\mathtt{times} x z))\\
 772 (\mathtt{plus} (\mathtt{minus} (\mathtt{times} x y) (\mathtt{times} x z))
 773 (\mathtt{times} x z))}{
 774 \mathtt{eq} \mathtt{nat} (\mathtt{times} x (\mathtt{minus} y z)) (\mathtt{minus}
 775 (\mathtt{times} x y) (\mathtt{times} x z))}
 776 while the corresponding sequent rendered with notation enabled would be:
 777 \sequent{
 778 H: z\leq y\\
 779 Hcut: x*(y-z)+x*z=x*y-x*z+x*z}{
 780 x*(y-z)=x*y-x*z}
 781
 782
 783 \subsection{mathml}
 784 \ASSIGNEDTO{zack}
 785
 786 \subsection{selezione semantica, cut paste, hyperlink}
 787 \ASSIGNEDTO{zack}
 788
 789 \subsection{pattern}
 790 \ASSIGNEDTO{gares}\\
 791 Patterns are the textual counterpart of the MathML widget graphical
 792 selection.
 793
 794 Matita benefits of a graphical interface and a powerful MathML rendering
 795 widget that allows the user to select pieces of the sequent he is working
 796 on. While this is an extremely intuitive way for the user to
 797 restrict the application of tactics, for example, to some subterms of the
 798 conclusion or some hypothesis, the way this action is recorded to the text
 799 script is not obvious.\\
 800 In \MATITA{} this issue is addressed by patterns.
 801
 802 \subsubsection{Pattern syntax}
 803 A pattern is composed of two terms: a $\NT{sequent\_path}$ and a
 804 $\NT{wanted}$.
 805 The former mocks-up a sequent, discharging unwanted subterms with $?$ and
 806 selecting the interesting parts with the placeholder $\%$.
 807 The latter is a term that lives in the context of the placeholders.
 808
 809 The concrete syntax is reported in table \ref{tab:pathsyn}
 810 \NOTE{uso nomi diversi dalla grammatica ma che hanno + senso}
 811 \begin{table}
 812  \caption{\label{tab:pathsyn} Concrete syntax of \MATITA{} patterns.\strut}
 813 \hrule
 814 \[
 815 \begin{array}{@{}rcll@{}}
 816   \NT{pattern} &
 817     ::= & [~\verb+in match+~\NT{wanted}~]~[~\verb+in+~\NT{sequent\_path}~] & \\
 818   \NT{sequent\_path} &
 819     ::= & \{~\NT{ident}~[~\verb+:+~\NT{multipath}~]~\}~
 820       [~\verb+\vdash+~\NT{multipath}~] & \\
 821   \NT{wanted} & ::= & \NT{term} & \\
 822   \NT{multipath} & ::= & \NT{term\_with\_placeholders} & \\
 823 \end{array}
 824 \]
 825 \hrule
 826 \end{table}
 827
 828 \subsubsection{How patterns work}
 829 Patterns mimic the user's selection in two steps. The first one
 830 selects roots (subterms) of the sequent, using the
 831 $\NT{sequent\_path}$,  while the second
 832 one searches the $\NT{wanted}$ term starting from these roots. Both are
 833 optional steps, and by convention the empty pattern selects the whole
 834 conclusion.
 835
 836 \begin{description}
 837 \item[Phase 1]
 838   concerns only the $[~\verb+in+~\NT{sequent\_path}~]$
 839   part of the syntax. $\NT{ident}$ is an hypothesis name and
 840   selects the assumption where the following optional $\NT{multipath}$
 841   will operate. \verb+\vdash+ can be considered the name for the goal.
 842   If the whole pattern is omitted, the whole goal will be selected.
 843   If one or more hypotheses names are given the selection is restricted to
 844   these assumptions. If a $\NT{multipath}$ is omitted the whole
 845   assumption is selected. Remember that the user can be mostly
 846   unaware of this syntax, since the system is able to write down a
 847   $\NT{sequent\_path}$ starting from a visual selection.
 848   \NOTE{Questo ancora non va in matita}
 849
 850   A $\NT{multipath}$ is a CiC term in which a special constant $\%$
 851   is allowed.
 852   The roots of discharged subterms are marked with $?$, while $\%$
 853   is used to select roots. The default $\NT{multipath}$, the one that
 854   selects the whole term, is simply $\%$.
 855   Valid $\NT{multipath}$ are, for example, $(?~\%~?)$ or $\%~\verb+\to+~(\%~?)$
 856   that respectively select the first argument of an application or
 857   the source of an arrow and the head of the application that is
 858   found in the arrow target.
 859
 860   The first phase selects not only terms (roots of subterms) but also
 861   their context that will be eventually used in the second phase.
 862
 863 \item[Phase 2]
 864   plays a role only if the $[~\verb+in match+~\NT{wanted}~]$
 865   part is specified. From the first phase we have some terms, that we
 866   will see as subterm roots, and their context. For each of these
 867   contexts the $\NT{wanted}$ term is disambiguated in it and the
 868   corresponding root is searched for a subterm $\alpha$-equivalent to
 869   $\NT{wanted}$. The result of this search is the selection the
 870   pattern represents.
 871
 872 \end{description}
 873
 874 \noindent
 875 Since the first step is equipotent to the composition of the two
 876 steps, the system uses it to represent each visual selection.
 877 The second step is only meant for the
 878 experienced user that writes patterns by hand, since it really
 879 helps in writing concise patterns as we will see in the
 880 following examples.
 881
 882 \subsubsection{Examples}
 883 To explain how the first step works let's give an example. Consider
 884 you want to prove the uniqueness of the identity element $0$ for natural
 885 sum, and that you can relay on the previously demonstrated left
 886 injectivity of the sum, that is $inj\_plus\_l:\forall x,y,z.x+y=z+y \to x =z$.
 887 Typing
 888 \begin{grafite}
 889 theorem valid_name: \forall n,m. m + n = n \to m = O.
 890   intros (n m H).
 891 \end{grafite}
 892 \noindent
 893 leads you to the following sequent
 894 \sequent{
 895 n:nat\\
 896 m:nat\\
 897 H: m + n = n}{
 898 m=O
 899 }
 900 \noindent
 901 where you want to change the right part of the equivalence of the $H$
 902 hypothesis with $O + n$ and then use $inj\_plus\_l$ to prove $m=O$.
 903 \begin{grafite}
 904   change in H:(? ? ? %) with (O + n).
 905 \end{grafite}
 906 \noindent
 907 This pattern, that is a simple instance of the $\NT{sequent\_path}$
 908 grammar entry, acts on $H$ that has type (without notation) $(eq~nat~(m+n)~n)$
 909 and discharges the head of the application and the first two arguments with a
 910 $?$ and selects the last argument with $\%$. The syntax may seem uncomfortable,
 911 but the user can simply select with the mouse the right part of the equivalence
 912 and left to the system the burden of writing down in the script file the
 913 corresponding pattern with $?$ and $\%$ in the right place (that is not
 914 trivial, expecially where implicit arguments are hidden by the notation, like
 915 the type $nat$ in this example).
 916
 917 Changing all the occurrences of $n$ in the hypothesis $H$ with $O+n$
 918 works too and can be done, by the experienced user, writing directly
 919 a simpler pattern that uses the second phase.
 920 \begin{grafite}
 921   change in match n in H with (O + n).
 922 \end{grafite}
 923 \noindent
 924 In this case the $\NT{sequent\_path}$ selects the whole $H$, while
 925 the second phase searches the wanted $n$ inside it by
 926 $\alpha$-equivalence. The resulting
 927 equivalence will be $m+(O+n)=O+n$ since the second phase found two
 928 occurrences of $n$ in $H$ and the tactic changed both.
 929
 930 Just for completeness the second pattern is equivalent to the
 931 following one, that is less readable but uses only the first phase.
 932 \begin{grafite}
 933   change in H:(? ? (? ? %) %) with (O + n).
 934 \end{grafite}
 935 \noindent
 936
 937 \subsubsection{Tactics supporting patterns}
 938 In \MATITA{} all the tactics that can be restricted to subterm of the working
 939 sequent accept the pattern syntax. In particular these tactics are: simplify,
 940 change, fold, unfold, generalize, replace and rewrite.
 941
 942 \NOTE{attualmente rewrite e fold non supportano phase 2. per
 943 supportarlo bisogna far loro trasformare il pattern phase1+phase2
 944 in un pattern phase1only come faccio nell'ultimo esempio. lo si fa
 945 con una pattern\_of(select(pattern))}
 946
 947 \subsubsection{Comparison with Coq}
 948 Coq has a two diffrent ways of restricting the application of tactis to
 949 subterms of the sequent, both relaying on the same special syntax to identify
 950 a term occurrence.
 951
 952 The first way is to use this special syntax to specify directly to the
 953 tactic the occurrnces of a wanted term that should be affected, while
 954 the second is to prepare the sequent with another tactic called
 955 pattern and the apply the real tactic. Note that the choice is not
 956 left to the user, since some tactics needs the sequent to be prepared
 957 with pattern and do not accept directly this special syntax.
 958
 959 The base idea is that to identify a subterm of the sequent we can
 960 write it and say that we want, for example, the third and the fifth
 961 occurce of it (counting from left to right). In our previous example,
 962 to change only the left part of the equivalence, the correct command
 963 is
 964 \begin{grafite}
 965   change n at 2 in H with (O + n)
 966 \end{grafite}
 967 \noindent
 968 meaning that in the hypothesis $H$ the $n$ we want to change is the
 969 second we encounter proceeding from left toright.
 970
 971 The tactic pattern computes a
 972 $\beta$-expansion of a part of the sequent with respect to some
 973 occurrences of the given term. In the previous example the following
 974 command
 975 \begin{grafite}
 976   pattern n at 2 in H
 977 \end{grafite}
 978 \noindent
 979 would have resulted in this sequent
 980 \begin{grafite}
 981   n : nat
 982   m : nat
 983   H : (fun n0 : nat => m + n = n0) n
 984   ============================
 985    m = 0
 986 \end{grafite}
 987 \noindent
 988 where $H$ is $\beta$-expanded over the second $n$
 989 occurrence. This is a trick to make the unification algorithm ignore
 990 the head of the application (since the unification is essentially
 991 first-order) but normally operate on the arguments.
 992 This works for some tactics, like rewrite and replace,
 993 but for example not for change and other tactics that do not relay on
 994 unification.
 995
 996 The idea behind this way of identifying subterms in not really far
 997 from the idea behind patterns, but really fails in extending to
 998 complex notation, since it relays on a mono-dimensional sequent representation.
 999 Real math notation places arguments upside-down (like in indexed sums or
1000 integrations) or even puts them inside a bidimensional matrix.
1001 In these cases using the mouse to select the wanted term is probably the
1002 only way to tell the system exactly what you want to do.
1003
1004 One of the goals of \MATITA{} is to use modern publishing techiques, and
1005 adopting a method for restricting tactics application domain that discourages
1006 using heavy math notation, would definitively be a bad choice.
1007
1008 \subsection{Tacticals}
1009 \ASSIGNEDTO{gares}\\
1010 There are mainly two kinds of languages used by proof assistants to recorder
1011 proofs: tactic based and declarative. We will not investigate the philosophy
1012 aroud the choice that many proof assistant made, \MATITA{} included, and we will not compare the two diffrent approaches. We will describe the common issues of the first one and how \MATITA{} tries to solve them.
1013
1014 First we must highlight the fact that proof scripts made using tactis are
1015 particularly unreadable. This is not a big deal for the user while he is
1016 constructing the proof, but is considerably a problem when he tries to reread
1017 what he did or when he shows his work to someone else.
1018
1019 Another common issue for tactic based proof scripts is their mantenibility.
1020 Huge libraries have been developed, and backward compatibility is a really time
1021 consuming task. This problem is usually ameliorated with tacticals, that
1022 help in structuring proofs and consequently their maintenance, but have a bad
1023 counterpart in script readability.  Since tacticals are executed atomically,
1024 the common practice of executing again a script to review all the proof steps
1025 doesn't work at all. This issue in addition to the really poor feeling that a
1026 list of tactics gives about the proof makes script rereading particularly hard.
1027
1028 \MATITA{} uses a language of tactics and tacticals, but adopts a peculiar
1029 strategy to make this technique more user friendly without loosing in
1030 mantenibility or expressivity.
1031
1032 \subsubsection{Tacticals overview}
1033 Before describing the peculiarities of \MATITA{} tacticals we briefly introduce what tacticals are and where they can be useful.
1034
1035 Tacticals first appered in LCF(cita qualcosa) and can be seen as programming
1036 constructs, like looping, branching, error recovery or sequential composition.
1037 For example $tac_1~.~tac_2$ executes the first tactic and applies the second
1038 only to the first goal opened by $tac_1$. Baranching can be used to specify a
1039 diffrent tactic to apply to each new goal opened by another tactic, for example
1040 $tac_1\verb+;[+~tac_{1.1}~\verb+|+~tac_{1.2}~\verb+|+~\cdots~|~tac_{1.n}~\verb+]+$
1041 applies respectively $tac_{1.i}$ to the $i$-th goal opened by $tac_1$. Looping
1042 can be used to iterate a tactic until it works: $\verb+repeat+~tac$ applies
1043 $tac$ to the current goal, and again $tac$ to the eventually resulting goals
1044 until all goal are closed or the tactic fails.
1045
1046 \begin{table}
1047  \caption{\label{tab:tacsyn} Concrete syntax of \MATITA{} tacticals.\strut}
1048 \hrule
1049 \[
1050 \begin{array}{@{}rcll@{}}
1051   \NT{punctuation} &
1052     ::= & \SEMICOLON \quad|\quad \DOT \quad|\quad \SHIFT \quad|\quad \BRANCH \quad|\quad \MERGE \quad|\quad \POS{\mathrm{NUMBER}~} & \\
1053   \NT{block\_kind} &
1054     ::= & \verb+focus+ ~|~ \verb+try+ ~|~ \verb+solve+ ~|~ \verb+first+ ~|~ \verb+repeat+ ~|~ \verb+do+~\mathrm{NUMBER} & \\
1055   \NT{block\_delimiter} &
1056     ::= & \verb+begin+ ~|~ \verb+end+ & \\
1057   \NT{tactical} &
1058     ::= & \verb+skip+ ~|~ \NT{tactic} ~|~ \NT{block\_delimiter} ~|~ \NT{block\_kind} ~|~ \NT{punctuation} ~|~& \\
1059 \end{array}
1060 \]
1061 \hrule
1062 \end{table}
1063
1064 \MATITA{} tacticals syntax is reported in table \ref{tab:tacsyn}.
1065 While one whould expect to find structured constructs like
1066 $\verb+do+~n~\NT{tactic}$ the syntax allows pieces of tacticals to be written.
1067 This is essential for base idea behind matita tacticals: step-by-step execution.
1068
1069 \subsubsection{\MATITA{} Tinycals}
1070 The low-level tacticals implementation of \MATITA{} allows a step-by-step execution of a tactical, that substantially means that a $\NT{block\_kind}$ is not executed as an atomic operation. This has two major benefits for the user, even being a so simple idea:
1071 \begin{description}
1072 \item[Proof structuring]
1073   is much easyer. Consider for example a proof by induction, and imagine you are using classical tacticals. After applying the
1074   induction principle, with one step tacticals, you have to choose: structure
1075   the proof or not. If you decide for the former you have to branch with
1076   \verb+[+ and write tactics for all the cases separated by \verb+|+ and the
1077   close the tactical with
1078   \verb+]+. You can replace most of the cases by the identity tactic just to
1079   concentrate only on the first goal, but you will have to go one step back and
1080   one further every time you add something inside the tactical. And if you are
1081   boared of doing so, you will finish in giving up structuring the proof and
1082   write a plain list of tactics.\\
1083   With step-by-step tacticals you can apply the induction principle, and just
1084   open the branching tactical \verb+[+. Then you can interact with the system
1085   reaching a proof of the first case, without having to specify the whole
1086   branching tactical. When you have proved all the induction cases, you close
1087   the branching tactical with \verb+]+ and you are done with a structured proof.
1088 \item[Rereading]
1089   is possible. Going on step by step shows exactly what is going on.
1090   Consider again a proof by induction, that starts applying the induction
1091   principle and suddenly baranches with a \verb+[+. This clearly separates all
1092   the induction cases, but if the square brackets content is executed in one
1093   single step you completely loose the possibility of rereading it. Again,
1094   executing step-by-step is the way you whould like to review the
1095   demonstration. Remember tha understandig the proof from the script is not
1096   easy, and only the execution of tactics (and the resulting transformed goal)
1097   gives you the feeling of what is goning on.
1098 \end{description}
1099
1100
1101
1102 \subsection{named variable e disambiguazione lazy}
1103 \ASSIGNEDTO{csc}
1104
1105 \subsection{metavariabili}
1106 \label{sec:metavariables}
1107 \ASSIGNEDTO{csc}
1108
1109 \begin{verbatim}
1110
1111 \end{verbatim}
1112
1113 \section{Drawbacks, missing, \dots}
1114
1115 \subsection{moduli}
1116 \ASSIGNEDTO{}
1117
1118 \subsection{ltac}
1119 \ASSIGNEDTO{}
1120
1121 \subsection{estrazione}
1122 \ASSIGNEDTO{}
1123
1124 \subsection{localizzazione errori}
1125 \ASSIGNEDTO{}
1126
1127 \acknowledgements
1128 We would like to thank all the students that during the past
1129 five years collaborated in the \HELM{} project and contributed to
1130 the development of Matita, and in particular
1131 A.Griggio, F.Guidi, P. Di Lena, L.Padovani, I.Schena, M.Selmi,
1132 V.Tamburrelli.
1133
1134 \theendnotes
1135
1136 \bibliography{matita}
1137
1138
1139 \end{document}
1140