X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2Focaml%2Ftex_cic_textual_parser%2FtexCicTextualLexer.mll;fp=helm%2Focaml%2Ftex_cic_textual_parser%2FtexCicTextualLexer.mll;h=01ddd0cf302acdede1f9b14f086864077677ce10;hb=bac72fcaa876137ab7a5630e0c1badc2a627dce8;hp=0000000000000000000000000000000000000000;hpb=2dc0733271cd251aaa3edaece8a883fe691775ab;p=helm.git diff --git a/helm/ocaml/tex_cic_textual_parser/texCicTextualLexer.mll b/helm/ocaml/tex_cic_textual_parser/texCicTextualLexer.mll new file mode 100644 index 000000000..01ddd0cf3 --- /dev/null +++ b/helm/ocaml/tex_cic_textual_parser/texCicTextualLexer.mll @@ -0,0 +1,122 @@ +(* Copyright (C) 2000, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://cs.unibo.it/helm/. + *) + +{ + open TexCicTextualParser;; + module L = Lexing;; + module U = UriManager;; + + let indtyuri_of_uri uri = + let index_sharp = String.index uri '#' in + let index_num = index_sharp + 3 in + try + (UriManager.uri_of_string (String.sub uri 0 index_sharp), + int_of_string(String.sub uri index_num (String.length uri - index_num)) - 1 + ) + with + Failure msg -> + raise (CicTextualParser0.LexerFailure "Not an inductive URI") + ;; + + let indconuri_of_uri uri = + let index_sharp = String.index uri '#' in + let index_div = String.rindex uri '/' in + let index_con = index_div + 1 in + try + (UriManager.uri_of_string (String.sub uri 0 index_sharp), + int_of_string + (String.sub uri (index_sharp + 3) (index_div - index_sharp - 3)) - 1, + int_of_string + (String.sub uri index_con (String.length uri - index_con)) + ) + with + Failure msg -> + raise (CicTextualParser0.LexerFailure "Not a constructor URI") + ;; + + (* TeX unquoting for "_" *) + let unquote str = + Str.global_replace (Str.regexp "\\\\_") "_" str + ;; +} +let dollar = '$' +let num = ['1'-'9']['0'-'9']* | '0' +let letter = ['A'-'Z' 'a'-'z'] +let alfa = letter | ['_' ''' '-'] | "\\_" +let ident = letter (alfa | num)* +let baseuri = '/'(ident '/')* ident '.' +let conuri = baseuri "con" +let varuri = baseuri "var" +let indtyuri = baseuri "ind#1/" num +let indconuri = baseuri "ind#1/" num "/" num +let blanks = [' ' '\t' '\n' '~' '{' '}'] | "\\;" | "\\rm" +rule token = + parse + blanks { token lexbuf } (* skip blanks *) + | "\\Case" { CASE } + | "\\Fix" { FIX } + | "\\CoFix" { COFIX } + | "\\Set" { SET } + | "\\Prop" { PROP } + | "\\Type" { TYPE } + | ident { ID (unquote (L.lexeme lexbuf)) } + | conuri { CONURI + (U.uri_of_string ("cic:" ^ (unquote (L.lexeme lexbuf)))) } + | varuri { VARURI + (U.uri_of_string ("cic:" ^ (unquote (L.lexeme lexbuf)))) } + | indtyuri { INDTYURI + (indtyuri_of_uri ("cic:" ^ (unquote (L.lexeme lexbuf)))) } + | indconuri { INDCONURI + (indconuri_of_uri("cic:" ^ (unquote (L.lexeme lexbuf)))) } + | num { NUM (int_of_string (L.lexeme lexbuf)) } + | '?' num { let lexeme = L.lexeme lexbuf in + META + (int_of_string + (String.sub lexeme 1 (String.length lexeme - 1))) } + | ":>" { CAST } + | ":=" { LETIN } + | '?' { IMPLICIT } + | '(' { LPAREN } + | ')' { RPAREN } + | "\\[" { LBRACKET } + | "\\]" { RBRACKET } + | "\\{" { LCURLY } + | "\\}" { RCURLY } + | ';' { SEMICOLON } + | "\\lambda" { LAMBDA } + | "\\pi" { PROD } + | "\\forall" { PROD } + | ':' { COLON } + | '.' { DOT } + | "\\to" { ARROW } + | '_' { NONE } + | dollar { DOLLAR } + | eof { EOF } + (* Arithmetical operators *) + | '+' { PLUS } + | '-' { MINUS } + | '*' { TIMES } + | '=' { EQ } +{}