(* Copyright (C) 2004, HELM Team. * * This file is part of HELM, an Hypertextual, Electronic * Library of Mathematics, developed at the Computer Science * Department, University of Bologna, Italy. * * HELM is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * HELM is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HELM; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, * MA 02111-1307, USA. * * For details, see the HELM World-Wide-Web page, * http://helm.cs.unibo.it/ *) exception Error of int * int * string let regexp alpha = [ 'a' - 'z' 'A' - 'Z' ] let regexp digit = [ '0' - '9' ] let regexp blank = [ ' ' '\t' '\n' ] let regexp blanks = blank+ let regexp num = digit+ let regexp ident = alpha (alpha | num)* let regexp symbol = [^ 'a' - 'z' 'A' - 'Z' '0' - '9' ' ' '\t' '\n' ] let regexp tex_token = '\\' alpha+ let regexp lparen = [ '(' '[' '{' ] let regexp rparen = [ ')' ']' '}' ] let regexp meta = '?' num let keywords = Hashtbl.create 17 let _ = List.iter (fun keyword -> Hashtbl.add keywords keyword ("", keyword)) [ "Prop"; "Type"; "Set"; "let"; "rec"; "using"; "match"; "with" ] let error lexbuf msg = raise (Error (Ulexing.lexeme_start lexbuf, Ulexing.lexeme_end lexbuf, msg)) let error_at_end lexbuf msg = raise (Error (Ulexing.lexeme_end lexbuf, Ulexing.lexeme_end lexbuf, msg)) let return lexbuf token = (token, Ulexing.loc lexbuf) let rec token = lexer | blanks -> token lexbuf | ident -> let lexeme = Ulexing.utf8_lexeme lexbuf in (try return lexbuf (Hashtbl.find keywords lexeme) with Not_found -> return lexbuf ("IDENT", lexeme)) | num -> return lexbuf ("INT", Ulexing.utf8_lexeme lexbuf) | lparen -> return lexbuf ("LPAREN", Ulexing.utf8_lexeme lexbuf) | rparen -> return lexbuf ("RPAREN", Ulexing.utf8_lexeme lexbuf) | meta -> return lexbuf ("META", Ulexing.utf8_lexeme lexbuf) | symbol -> return lexbuf ("SYMBOL", Ulexing.utf8_lexeme lexbuf) | tex_token -> let macro = Ulexing.utf8_sub_lexeme lexbuf 1 (Ulexing.lexeme_length lexbuf - 1) in (try return lexbuf ("SYMBOL", Macro.expand macro) with Macro.Macro_not_found _ -> return lexbuf ("SYMBOL", Ulexing.utf8_lexeme lexbuf)) | eof -> return lexbuf ("EOI", "") | _ -> error lexbuf "Invalid character" let tok_func stream = let lexbuf = Ulexing.from_utf8_stream stream in Token.make_stream_and_location (fun () -> try token lexbuf with | Ulexing.Error -> error_at_end lexbuf "Unexpected character" | Ulexing.InvalidCodepoint i -> error_at_end lexbuf "Invalid code point") let lex = { Token.tok_func = tok_func; Token.tok_using = (fun _ -> ()); Token.tok_removing = (fun _ -> ()); Token.tok_match = Token.default_match; Token.tok_text = Token.lexer_text; Token.tok_comm = None; }