X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2Focaml%2Fcic_disambiguation%2Flexer.ml;h=5363d2180e72426f31bd329eeaf3f406da343db0;hb=6f7dbdfa37be6a1135df8169557eab5c92c485e2;hp=79003792bdb473304e5dd8536583047029475971;hpb=0148419c577eab74538b8e2564a64e399d8bdd65;p=helm.git diff --git a/helm/ocaml/cic_disambiguation/lexer.ml b/helm/ocaml/cic_disambiguation/lexer.ml index 79003792b..5363d2180 100644 --- a/helm/ocaml/cic_disambiguation/lexer.ml +++ b/helm/ocaml/cic_disambiguation/lexer.ml @@ -1,18 +1,48 @@ +(* Copyright (C) 2004, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) exception Error of int * int * string +exception Not_an_extended_ident + let regexp alpha = [ 'a' - 'z' 'A' - 'Z' ] let regexp digit = [ '0' - '9' ] let regexp blank = [ ' ' '\t' '\n' ] let regexp blanks = blank+ let regexp num = digit+ -let regexp ident = alpha (alpha | num)* -let regexp symbol = [^ 'a' - 'z' 'A' - 'Z' '0' - '9' ' ' '\t' '\n' ] let regexp tex_token = '\\' alpha+ +let regexp symbol = [^ 'a' - 'z' 'A' - 'Z' '0' - '9' ' ' '\t' '\n' ] +let regexp ident_cont = alpha | num | '_' +let regexp ident_cont' = ident_cont | tex_token +let regexp ident = (alpha ident_cont*) | ('_' ident_cont+) +let regexp ident' = ((alpha | tex_token) ident_cont'*) | ('_' ident_cont'+) let regexp lparen = [ '(' '[' '{' ] let regexp rparen = [ ')' ']' '}' ] -let regexp meta = '?' (alpha | num)+ +let regexp meta = '?' num +(* let regexp catchall = .* *) let keywords = Hashtbl.create 17 let _ = @@ -26,29 +56,57 @@ let error_at_end lexbuf msg = let return lexbuf token = (token, Ulexing.loc lexbuf) +(* +let parse_ext_ident ident = + let len = String.length ident in + let buf = Buffer.create len in + let in_tex_token = ref false in + let tex_token = Buffer.create 10 in + try + for i = 0 to len - 1 do + match ident.[i] with + | '\' when not !in_tex_token -> + if i < len - 1 && + in_tex_token := true + done + with Invalid_argument -> assert false + +let rec token' = lexer + | ident' -> + (try + let ident = parse_ext_ident (Ulexing.utf8_lexeme lexbuf) in + return lexbuf ("IDENT'", ident) + with Not_an_extended_ident -> + Ulexing.rollback lexbuf; + token lexbuf) + | _ -> + Ulexing.rollback lexbuf; + token lexbuf + +and token = lexer +*) let rec token = lexer -| blanks -> token lexbuf -| ident -> - let lexeme = Ulexing.utf8_lexeme lexbuf in - (try - return lexbuf (Hashtbl.find keywords lexeme) - with Not_found -> - return lexbuf ("IDENT", lexeme)) -| num -> return lexbuf ("INT", Ulexing.utf8_lexeme lexbuf) -| lparen -> return lexbuf ("LPAREN", Ulexing.utf8_lexeme lexbuf) -| rparen -> return lexbuf ("RPAREN", Ulexing.utf8_lexeme lexbuf) -| meta -> return lexbuf ("META", Ulexing.utf8_lexeme lexbuf) -| symbol -> return lexbuf ("SYMBOL", Ulexing.utf8_lexeme lexbuf) -| tex_token -> - let macro = - Ulexing.utf8_sub_lexeme lexbuf 1 (Ulexing.lexeme_length lexbuf - 1) - in - (try - return lexbuf ("SYMBOL", Macro.expand macro) - with Macro.Macro_not_found _ -> - return lexbuf ("SYMBOL", Ulexing.utf8_lexeme lexbuf)) -| eof -> return lexbuf ("EOI", "") -| _ -> error lexbuf "Invalid character" + | blanks -> token lexbuf + | ident -> + let lexeme = Ulexing.utf8_lexeme lexbuf in + (try + return lexbuf (Hashtbl.find keywords lexeme) + with Not_found -> return lexbuf ("IDENT", lexeme)) + | num -> return lexbuf ("NUM", Ulexing.utf8_lexeme lexbuf) + | lparen -> return lexbuf ("LPAREN", Ulexing.utf8_lexeme lexbuf) + | rparen -> return lexbuf ("RPAREN", Ulexing.utf8_lexeme lexbuf) + | meta -> return lexbuf ("META", Ulexing.utf8_lexeme lexbuf) + | symbol -> return lexbuf ("SYMBOL", Ulexing.utf8_lexeme lexbuf) + | tex_token -> + let macro = + Ulexing.utf8_sub_lexeme lexbuf 1 (Ulexing.lexeme_length lexbuf - 1) + in + (try + return lexbuf ("SYMBOL", Macro.expand macro) + with Macro.Macro_not_found _ -> + return lexbuf ("SYMBOL", Ulexing.utf8_lexeme lexbuf)) + | eof -> return lexbuf ("EOI", "") + | _ -> error lexbuf "Invalid character" let tok_func stream = let lexbuf = Ulexing.from_utf8_stream stream in