From 0eb55693fa50c695866cf5205b04cdf9bf5e8e9d Mon Sep 17 00:00:00 2001 From: matitaweb Date: Tue, 6 Sep 2011 12:24:51 +0000 Subject: [PATCH] Removed ghost copy of a MatitaScriptLexer (moved from components/content_pres/ to matita/) --- .../content_pres/matitaScriptLexer.ml | 354 ------------------ 1 file changed, 354 deletions(-) delete mode 100644 matitaB/components/content_pres/matitaScriptLexer.ml diff --git a/matitaB/components/content_pres/matitaScriptLexer.ml b/matitaB/components/content_pres/matitaScriptLexer.ml deleted file mode 100644 index e1ab73b53..000000000 --- a/matitaB/components/content_pres/matitaScriptLexer.ml +++ /dev/null @@ -1,354 +0,0 @@ -(* Copyright (C) 2005, HELM Team. - * - * This file is part of HELM, an Hypertextual, Electronic - * Library of Mathematics, developed at the Computer Science - * Department, University of Bologna, Italy. - * - * HELM is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * HELM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with HELM; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - * - * For details, see the HELM World-Wide-Web page, - * http://helm.cs.unibo.it/ - *) - -(* $Id: cicNotationLexer.ml 11231 2011-03-30 11:52:27Z ricciott $ *) - -open Printf - -exception Error of int * int * string - -module StringSet = Set.Make(String) - -(* Lexer *) -let regexp number = xml_digit+ -let regexp utf8_blank = " " | "\r\n" | "\n" | "\t" | [160] (* this is a nbsp *) -let regexp percentage = - ('-' | "") [ '0' - '9' ] + '%' -let regexp floatwithunit = - ('-' | "") [ '0' - '9' ] + ["."] [ '0' - '9' ] + ([ 'a' - 'z' ] + | "" ) -let regexp color = "#" [ '0' - '9' 'a' - 'f' 'A' - 'F' ] [ '0' - '9' 'a' - 'f' -'A' - 'F' ] [ '0' - '9' 'a' - 'f' 'A' - 'F' ] [ '0' - '9' 'a' - 'f' 'A' - 'F' ] -[ '0' - '9' 'a' - 'f' 'A' - 'F' ] [ '0' - '9' 'a' - 'f' 'A' - 'F' ] - - (* ZACK: breaks unicode's binder followed by an ascii letter without blank *) -(* let regexp ident_letter = xml_letter *) - -let regexp ident_letter = [ 'a' - 'z' 'A' - 'Z' ] - - (* must be in sync with "is_ligature_char" below *) -let regexp ligature_char = [ "'`~!?@*()[]<>-+=|:;.,/\"" ] -let regexp ligature = ligature_char ligature_char+ - -let regexp we_proved = "we" utf8_blank+ "proved" -let regexp we_have = "we" utf8_blank+ "have" -let regexp let_rec = "let" utf8_blank+ "rec" -let regexp let_corec = "let" utf8_blank+ "corec" -let regexp ident_decoration = '\'' | '?' | '`' -let regexp ident_cont = ident_letter | xml_digit | '_' -let regexp ident_start = ident_letter -let regexp ident = ident_letter ident_cont* ident_decoration* -let regexp variable_ident = '_' '_' number -let regexp pident = '_' ident - -let regexp uri_step = [ 'a' - 'z' 'A' - 'Z' '0' - '9' '_' '-' ''' ]+ - -let regexp uri = - ("cic:/" | "theory:/") (* schema *) -(* ident ('/' ident)* |+ path +| *) - uri_step ('/' uri_step)* (* path *) - ('.' ident)+ (* ext *) -(* ("#xpointer(" number ('/' number)+ ")")? (* xpointer *) *) - ("(" number (',' number)* ")")? (* reference spec *) - -let regexp qstring = '"' [^ '"']* '"' -let regexp hreftag = " Hashtbl.add ligatures ligature symbol) - [ ("->", <:unicode>); ("=>", <:unicode>); - (":=", <:unicode>); - ] - -let regexp nreference = - "cic:/" (* schema *) - uri_step ('/' uri_step)* (* path *) - '.' - ( "dec" - | "def" "(" number ")" - | "fix" "(" number "," number "," number ")" - | "cfx" "(" number ")" - | "ind" "(" number "," number "," number ")" - | "con" "(" number "," number "," number ")") (* ext + reference *) - -let error lexbuf msg = - let begin_cnum, end_cnum = Ulexing.loc lexbuf in - raise (Error (begin_cnum, end_cnum, msg)) -let error_at_end lexbuf msg = - let begin_cnum, end_cnum = Ulexing.loc lexbuf in - raise (Error (begin_cnum, end_cnum, msg)) - -let loc_of_buf lexbuf = - HExtlib.floc_of_loc (Ulexing.loc lexbuf) - -let return_with_loc token begin_cnum end_cnum = - let flocation = HExtlib.floc_of_loc (begin_cnum,end_cnum) in - token, flocation - -let return lexbuf token = - let begin_cnum, end_cnum = Ulexing.loc lexbuf in - return_with_loc token begin_cnum end_cnum - -let return_lexeme lexbuf name = return lexbuf (name, Ulexing.utf8_lexeme lexbuf) - -let return_symbol lexbuf s = return lexbuf ("SYMBOL", s) -let return_eoi lexbuf = return lexbuf ("EOI", "") - -let remove_quotes s = String.sub s 1 (String.length s - 2) - -let mk_lexer token = - let tok_func stream = -(* let lexbuf = Ulexing.from_utf8_stream stream in *) -(** XXX Obj.magic rationale. - * The problem. - * camlp5 constraints the tok_func field of Token.glexer to have type: - * Stream.t char -> (Stream.t 'te * flocation_function) - * In order to use ulex we have (in theory) to instantiate a new lexbuf each - * time a char Stream.t is passed, destroying the previous lexbuf which may - * have consumed a character from the old stream which is lost forever :-( - * The "solution". - * Instead of passing to camlp5 a char Stream.t we pass a lexbuf, casting it to - * char Stream.t with Obj.magic where needed. - *) - let lexbuf = Obj.magic stream in - Token.make_stream_and_location - (fun () -> - try - token lexbuf - with - | Ulexing.Error -> error_at_end lexbuf "Unexpected character" - | Ulexing.InvalidCodepoint p -> - error_at_end lexbuf (sprintf "Invalid code point: %d" p)) - in - { - Token.tok_func = tok_func; - Token.tok_using = (fun _ -> ()); - Token.tok_removing = (fun _ -> ()); - Token.tok_match = Token.default_match; - Token.tok_text = Token.lexer_text; - Token.tok_comm = None; - } - -let expand_macro lexbuf = - let macro = - Ulexing.utf8_sub_lexeme lexbuf 1 (Ulexing.lexeme_length lexbuf - 1) - in - try - ("SYMBOL", Utf8Macro.expand macro) - with Utf8Macro.Macro_not_found _ -> -(* FG: unexpanded TeX macros are terminated by a space for rendering *) - "SYMBOL", (Ulexing.utf8_lexeme lexbuf ^ " ") - -let remove_quotes s = String.sub s 1 (String.length s - 2) -let remove_left_quote s = String.sub s 1 (String.length s - 1) - -let rec level2_pattern_token_group counter buffer = - lexer - | end_group -> - if (counter > 0) then - Buffer.add_string buffer (Ulexing.utf8_lexeme lexbuf) ; - snd (Ulexing.loc lexbuf) - | begin_group -> - Buffer.add_string buffer (Ulexing.utf8_lexeme lexbuf) ; - ignore (level2_pattern_token_group (counter + 1) buffer lexbuf) ; - level2_pattern_token_group counter buffer lexbuf - | _ -> - Buffer.add_string buffer (Ulexing.utf8_lexeme lexbuf) ; - level2_pattern_token_group counter buffer lexbuf - -let read_unparsed_group token_name lexbuf = - let buffer = Buffer.create 16 in - let begin_cnum, _ = Ulexing.loc lexbuf in - let end_cnum = level2_pattern_token_group 0 buffer lexbuf in - return_with_loc (token_name, Buffer.contents buffer) begin_cnum end_cnum - -let handle_keywords lexbuf k name = - let s = Ulexing.utf8_lexeme lexbuf in - if k s then - return lexbuf ("", s) - else - return lexbuf (name, s) -;; - -let rec level2_meta_token = - lexer - | utf8_blank+ -> level2_meta_token lexbuf - | hreftag -> return lexbuf ("ATAG","") - | hrefclose -> return lexbuf ("ATAGEND","") - | ident -> - handle_keywords lexbuf (fun x -> List.mem x level2_meta_keywords) "IDENT" - | variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf) - | pident -> - handle_keywords lexbuf (fun x -> List.mem x level2_meta_keywords) "PIDENT" - | "@{" -> read_unparsed_group "UNPARSED_AST" lexbuf - | ast_ident -> - return lexbuf ("UNPARSED_AST", - remove_left_quote (Ulexing.utf8_lexeme lexbuf)) - | ast_csymbol -> - return lexbuf ("UNPARSED_AST", - remove_left_quote (Ulexing.utf8_lexeme lexbuf)) - | eof -> return_eoi lexbuf - -let rec comment_token acc depth = - lexer - | beginnote -> - let acc = acc ^ Ulexing.utf8_lexeme lexbuf in - comment_token acc (depth + 1) lexbuf - | endcomment -> - let acc = acc ^ Ulexing.utf8_lexeme lexbuf in - if depth = 0 - then acc - else comment_token acc (depth - 1) lexbuf - | _ -> - let acc = acc ^ Ulexing.utf8_lexeme lexbuf in - comment_token acc depth lexbuf - - (** @param k continuation to be invoked when no ligature has been found *) -let ligatures_token k = - lexer - | ligature -> - let lexeme = Ulexing.utf8_lexeme lexbuf in - (match List.rev (Hashtbl.find_all ligatures lexeme) with - | [] -> (* ligature not found, rollback and try default lexer *) - Ulexing.rollback lexbuf; - k lexbuf - | default_lig :: _ -> (* ligatures found, use the default one *) - return_symbol lexbuf default_lig) - | eof -> return_eoi lexbuf - | _ -> (* not a ligature, rollback and try default lexer *) - Ulexing.rollback lexbuf; - k lexbuf - -module LocalizeOD = - struct - type t = Stdpp.location - let compare = Pervasives.compare - end - -let update_table loc desc href loctable = - if desc <> None || href <> None - then - (let s,e = HExtlib.loc_of_floc loc in - prerr_endline (Printf.sprintf "*** [%d,%d] \"%s\",\"%s\"" - s e (so_pp href) (so_pp desc)); - LocalizeEnv.add loc (href,desc) loctable) - else loctable -;; - -let get_hot_spots = - let rec aux loc1 desc href = - lexer - | hreftag -> aux_in_tag (Ulexing.loc lexbuf) None None lexbuf - | hrefclose -> - try - let loc1 = HExtlib.floc_of_loc (HExtlib.unopt loc1) in - let loc2 = HExtlib.floc_of_loc (Ulexing.loc lexbuf) in - (loc1,loc2,href,desc) :: aux None None None lexbuf - with Failure _ -> aux None None None lexbuf - | beginnote -> (* FIXME commenti come in smallLexer *) - let _comment = comment_token (Ulexing.utf8_lexeme lexbuf) 0 lexbuf in - (* let comment = - Ulexing.utf8_sub_lexeme lexbuf 2 (Ulexing.lexeme_length lexbuf - 4) - in - return lexbuf ("NOTE", comment) *) - ligatures_token (aux desc href) lexbuf - | begincomment -> return lexbuf ("BEGINCOMMENT","") - | endcomment -> return lexbuf ("ENDCOMMENT","") - | eof -> [] - | _ -> aux loc1 desc href lexbuf - and aux_in_tag loc1 desc href = lexer - | utf8_blank+ -> aux_in_tag loc1 desc href lexbuf - | href -> - aux_in_tag loc1 desc - (Some (Ulexing.utf8_sub_lexeme lexbuf 6 (Ulexing.lexeme_length lexbuf - 7))) - lexbuf - | hreftitle -> - aux_in_tag loc1 - (Some (Ulexing.utf8_sub_lexeme lexbuf 7 (Ulexing.lexeme_length lexbuf - 8))) - href lexbuf - | ">" -> - let merge (a,b) (c,d) = (a,d) in - aux (Some (merge loc1 (Ulexing.loc lexbuf))) desc href lexbuf - | _ -> aux None None None lexbuf - in aux None None None - -- 2.39.2