(* Copyright (C) 2004, HELM Team. * * This file is part of HELM, an Hypertextual, Electronic * Library of Mathematics, developed at the Computer Science * Department, University of Bologna, Italy. * * HELM is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * HELM is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HELM; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, * MA 02111-1307, USA. * * For details, see the HELM World-Wide-Web page, * http://helm.cs.unibo.it/ *) exception Error of int * int * string exception Not_an_extended_ident let regexp alpha = [ 'a' - 'z' 'A' - 'Z' ] let regexp digit = [ '0' - '9' ] let regexp blank = [ ' ' '\t' '\n' ] let regexp blanks = blank+ let regexp num = digit+ let regexp tex_token = '\\' alpha+ let regexp symbol = [^ 'a' - 'z' 'A' - 'Z' '0' - '9' ' ' '\t' '\n' ] let regexp ident_cont = alpha | num | '_' | '\'' let regexp ident_cont' = ident_cont | tex_token let regexp ident = (alpha ident_cont*) | ('_' ident_cont+) let regexp ident' = ((alpha | tex_token) ident_cont'*) | ('_' ident_cont'+) let regexp paren = [ '(' '[' '{' ')' ']' '}' ] let regexp implicit = '?' let regexp meta = '?' num let regexp qstring = '"' [^ '"']* '"' let regexp uri = (* schema *) (* path *) (* ext *) (* xpointer *) ("cic:/" | "theory:/") ident ('/' ident)* ('.' ident)+ ('#' num ('/' num)*)? (* let regexp catchall = .* *) let keywords = Hashtbl.create 17 let _ = List.iter (fun keyword -> Hashtbl.add keywords keyword ("", keyword)) [ "Prop"; "Type"; "Set"; "let"; "rec"; "using"; "match"; "with" ] let error lexbuf msg = raise (Error (Ulexing.lexeme_start lexbuf, Ulexing.lexeme_end lexbuf, msg)) let error_at_end lexbuf msg = raise (Error (Ulexing.lexeme_end lexbuf, Ulexing.lexeme_end lexbuf, msg)) let return lexbuf token = (token, Ulexing.loc lexbuf) (* let parse_ext_ident ident = let len = String.length ident in let buf = Buffer.create len in let in_tex_token = ref false in let tex_token = Buffer.create 10 in try for i = 0 to len - 1 do match ident.[i] with | '\' when not !in_tex_token -> if i < len - 1 && in_tex_token := true done with Invalid_argument -> assert false let rec token' = lexer | ident' -> (try let ident = parse_ext_ident (Ulexing.utf8_lexeme lexbuf) in return lexbuf ("IDENT'", ident) with Not_an_extended_ident -> Ulexing.rollback lexbuf; token lexbuf) | _ -> Ulexing.rollback lexbuf; token lexbuf and token = lexer *) let rec token = lexer | blanks -> token lexbuf | uri -> return lexbuf ("URI", Ulexing.utf8_lexeme lexbuf) | ident -> let lexeme = Ulexing.utf8_lexeme lexbuf in (try return lexbuf (Hashtbl.find keywords lexeme) with Not_found -> return lexbuf ("IDENT", lexeme)) | num -> return lexbuf ("NUM", Ulexing.utf8_lexeme lexbuf) | paren -> return lexbuf ("PAREN", Ulexing.utf8_lexeme lexbuf) | meta -> return lexbuf ("META", Ulexing.utf8_lexeme lexbuf) | implicit -> return lexbuf ("IMPLICIT", Ulexing.utf8_lexeme lexbuf) | qstring -> let lexeme = Ulexing.utf8_lexeme lexbuf in let s = String.sub lexeme 1 (String.length lexeme - 2) in return lexbuf ("QSTRING", s) | symbol -> return lexbuf ("SYMBOL", Ulexing.utf8_lexeme lexbuf) | tex_token -> let macro = Ulexing.utf8_sub_lexeme lexbuf 1 (Ulexing.lexeme_length lexbuf - 1) in (try return lexbuf ("SYMBOL", CicTextualParser2Macro.expand macro) with CicTextualParser2Macro.Macro_not_found _ -> return lexbuf ("SYMBOL", Ulexing.utf8_lexeme lexbuf)) | eof -> return lexbuf ("EOI", "") | _ -> error lexbuf "Invalid character" let tok_func stream = let lexbuf = Ulexing.from_utf8_stream stream in Token.make_stream_and_location (fun () -> try token lexbuf with | Ulexing.Error -> error_at_end lexbuf "Unexpected character" | Ulexing.InvalidCodepoint i -> error_at_end lexbuf "Invalid code point") let cic_lexer = { Token.tok_func = tok_func; Token.tok_using = (fun _ -> ()); Token.tok_removing = (fun _ -> ()); Token.tok_match = Token.default_match; Token.tok_text = Token.lexer_text; Token.tok_comm = None; }