X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2Fsoftware%2Fcomponents%2Fcontent_pres%2FcicNotationLexer.ml;h=bbc901dab7db3e2f139e37e6e65639efe1d2abd4;hb=279b11c00cdaacb4858e1c8dc6d05ea631bc1358;hp=67340d37a255662b1270a26ce757c18d82768de7;hpb=5376f7de4cc1b0687a197b9e42b10337b95f5742;p=helm.git diff --git a/helm/software/components/content_pres/cicNotationLexer.ml b/helm/software/components/content_pres/cicNotationLexer.ml index 67340d37a..bbc901dab 100644 --- a/helm/software/components/content_pres/cicNotationLexer.ml +++ b/helm/software/components/content_pres/cicNotationLexer.ml @@ -30,6 +30,14 @@ open Printf exception Error of int * int * string let regexp number = xml_digit+ +let regexp utf8_blank = " " | "\r\n" | "\n" | "\t" | [160] (* this is a nbsp *) +let regexp percentage = + ('-' | "") [ '0' - '9' ] + '%' +let regexp floatwithunit = + ('-' | "") [ '0' - '9' ] + ["."] [ '0' - '9' ] + ([ 'a' - 'z' ] + | "" ) +let regexp color = "#" [ '0' - '9' 'a' - 'f' 'A' - 'F' ] [ '0' - '9' 'a' - 'f' +'A' - 'F' ] [ '0' - '9' 'a' - 'f' 'A' - 'F' ] [ '0' - '9' 'a' - 'f' 'A' - 'F' ] +[ '0' - '9' 'a' - 'f' 'A' - 'F' ] [ '0' - '9' 'a' - 'f' 'A' - 'F' ] (* ZACK: breaks unicode's binder followed by an ascii letter without blank *) (* let regexp ident_letter = xml_letter *) @@ -40,15 +48,10 @@ let regexp ident_letter = [ 'a' - 'z' 'A' - 'Z' ] let regexp ligature_char = [ "'`~!?@*()[]<>-+=|:;.,/\"" ] let regexp ligature = ligature_char ligature_char+ -let is_ligature_char = - (* must be in sync with "regexp ligature_char" above *) - let chars = "'`~!?@*()[]<>-+=|:;.,/\"" in - (fun char -> - (try - ignore (String.index chars char); - true - with Not_found -> false)) - +let regexp we_proved = "we" utf8_blank+ "proved" +let regexp we_have = "we" utf8_blank+ "have" +let regexp let_rec = "let" utf8_blank+ "rec" +let regexp let_corec = "let" utf8_blank+ "corec" let regexp ident_decoration = '\'' | '?' | '`' let regexp ident_cont = ident_letter | xml_digit | '_' let regexp ident = ident_letter ident_cont* ident_decoration* @@ -75,7 +78,7 @@ let regexp meta_ident = "$" ident let regexp meta_anonymous = "$_" let regexp qstring = '"' [^ '"']* '"' -let regexp begincomment = "(**" xml_blank +let regexp begincomment = "(**" utf8_blank let regexp beginnote = "(*" let regexp endcomment = "*)" (* let regexp comment_char = [^'*'] | '*'[^')'] @@ -93,11 +96,11 @@ let level1_keywords = "break"; "list0"; "list1"; "sep"; "opt"; - "term"; "ident"; "number" + "term"; "ident"; "number"; "mstyle" ; "mpadded" ] @ level1_layouts let level2_meta_keywords = - [ "if"; "then"; "else"; + [ "if"; "then"; "elCicNotationParser.se"; "fold"; "left"; "right"; "rec"; "fail"; "default"; @@ -105,27 +108,35 @@ let level2_meta_keywords = ] (* (string, unit) Hashtbl.t, to exploit multiple bindings *) -let level2_ast_keywords = Hashtbl.create 23 -let _ = - List.iter (fun k -> Hashtbl.add level2_ast_keywords k ()) - [ "CProp"; "Prop"; "Type"; "Set"; "let"; "rec"; "corec"; "match"; - "with"; "in"; "and"; "to"; "as"; "on"; "return" ] +let initial_level2_ast_keywords () = Hashtbl.create 23;; + +let level2_ast_keywords = ref (initial_level2_ast_keywords ()) + +let initialize_keywords () = + List.iter (fun k -> Hashtbl.add !level2_ast_keywords k ()) + [ "CProp"; "Prop"; "Type"; "Set"; "let"; "match"; + "with"; "in"; "and"; "to"; "as"; "on"; "return"; "done" ] +;; + +let _ = initialize_keywords ();; -let add_level2_ast_keyword k = Hashtbl.add level2_ast_keywords k () -let remove_level2_ast_keyword k = Hashtbl.remove level2_ast_keywords k +let add_level2_ast_keyword k = Hashtbl.add !level2_ast_keywords k () +let remove_level2_ast_keyword k = Hashtbl.remove !level2_ast_keywords k (* (string, int) Hashtbl.t, with multiple bindings. * int is the unicode codepoint *) let ligatures = Hashtbl.create 23 + let _ = List.iter (fun (ligature, symbol) -> Hashtbl.add ligatures ligature symbol) [ ("->", <:unicode>); ("=>", <:unicode>); ("<=", <:unicode>); (">=", <:unicode>); ("<>", <:unicode>); (":=", <:unicode>); + ("==", <:unicode>); ] -let regexp uri_step = [ 'a' - 'z' 'A' - 'Z' '0' - '9' '_' '-' ]+ +let regexp uri_step = [ 'a' - 'z' 'A' - 'Z' '0' - '9' '_' '-' ''' ]+ let regexp uri = ("cic:/" | "theory:/") (* schema *) @@ -142,14 +153,8 @@ let error_at_end lexbuf msg = raise (Error (begin_cnum, end_cnum, msg)) let return_with_loc token begin_cnum end_cnum = - (* TODO handle line/column numbers *) - let flocation_begin = - { Lexing.pos_fname = ""; - Lexing.pos_lnum = -1; Lexing.pos_bol = -1; - Lexing.pos_cnum = begin_cnum } - in - let flocation_end = { flocation_begin with Lexing.pos_cnum = end_cnum } in - (token, (flocation_begin, flocation_end)) + let flocation = HExtlib.floc_of_loc (begin_cnum,end_cnum) in + token, flocation let return lexbuf token = let begin_cnum, end_cnum = Ulexing.loc lexbuf in @@ -167,17 +172,17 @@ let mk_lexer token = (* let lexbuf = Ulexing.from_utf8_stream stream in *) (** XXX Obj.magic rationale. * The problem. - * camlp4 constraints the tok_func field of Token.glexer to have type: + * camlp5 constraints the tok_func field of Token.glexer to have type: * Stream.t char -> (Stream.t 'te * flocation_function) * In order to use ulex we have (in theory) to instantiate a new lexbuf each * time a char Stream.t is passed, destroying the previous lexbuf which may * have consumed a character from the old stream which is lost forever :-( * The "solution". - * Instead of passing to camlp4 a char Stream.t we pass a lexbuf, casting it to + * Instead of passing to camlp5 a char Stream.t we pass a lexbuf, casting it to * char Stream.t with Obj.magic where needed. *) let lexbuf = Obj.magic stream in - Token.make_stream_and_flocation + Token.make_stream_and_location (fun () -> try token lexbuf @@ -228,7 +233,7 @@ let read_unparsed_group token_name lexbuf = let rec level2_meta_token = lexer - | xml_blank+ -> level2_meta_token lexbuf + | utf8_blank+ -> level2_meta_token lexbuf | ident -> let s = Ulexing.utf8_lexeme lexbuf in begin @@ -278,7 +283,11 @@ let rec ligatures_token k = and level2_ast_token = lexer - | xml_blank+ -> ligatures_token level2_ast_token lexbuf + | let_rec -> return lexbuf ("LETREC","") + | let_corec -> return lexbuf ("LETCOREC","") + | we_proved -> return lexbuf ("WEPROVED","") + | we_have -> return lexbuf ("WEHAVE","") + | utf8_blank+ -> ligatures_token level2_ast_token lexbuf | meta -> let s = Ulexing.utf8_lexeme lexbuf in return lexbuf ("META", String.sub s 1 (String.length s - 1)) @@ -286,7 +295,7 @@ and level2_ast_token = | placeholder -> return lexbuf ("PLACEHOLDER", "") | ident -> let lexeme = Ulexing.utf8_lexeme lexbuf in - if Hashtbl.mem level2_ast_keywords lexeme then + if Hashtbl.mem !level2_ast_keywords lexeme then return lexbuf ("", lexeme) else return lexbuf ("IDENT", lexeme) @@ -319,7 +328,7 @@ and level2_ast_token = and level1_pattern_token = lexer - | xml_blank+ -> ligatures_token level1_pattern_token lexbuf + | utf8_blank+ -> ligatures_token level1_pattern_token lexbuf | number -> return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf) | ident -> let s = Ulexing.utf8_lexeme lexbuf in @@ -329,6 +338,11 @@ and level1_pattern_token = else return lexbuf ("IDENT", s) end + | color -> return lexbuf ("COLOR", Ulexing.utf8_lexeme lexbuf) + | percentage -> + return lexbuf ("PERCENTAGE", Ulexing.utf8_lexeme lexbuf) + | floatwithunit -> + return lexbuf ("FLOATWITHUNIT", Ulexing.utf8_lexeme lexbuf) | tex_token -> return lexbuf (expand_macro lexbuf) | qkeyword -> return lexbuf ("QKEYWORD", remove_quotes (Ulexing.utf8_lexeme lexbuf)) @@ -342,14 +356,40 @@ let level2_ast_token = ligatures_token level2_ast_token (* API implementation *) -let level1_pattern_lexer = mk_lexer level1_pattern_token -let level2_ast_lexer = mk_lexer level2_ast_token -let level2_meta_lexer = mk_lexer level2_meta_token - -let lookup_ligatures lexeme = - try - if lexeme.[0] = '\\' - then [ Utf8Macro.expand (String.sub lexeme 1 (String.length lexeme - 1)) ] - else List.rev (Hashtbl.find_all ligatures lexeme) - with Invalid_argument _ | Utf8Macro.Macro_not_found _ -> [] +let initial_level1_pattern_lexer () = mk_lexer level1_pattern_token +let initial_level2_ast_lexer () = mk_lexer level2_ast_token +let initial_level2_meta_lexer () = mk_lexer level2_meta_token + + +let level1_pattern_lexer_ref = ref (initial_level1_pattern_lexer ()) +let level2_ast_lexer_ref = ref (initial_level2_ast_lexer ()) +let level2_meta_lexer_ref = ref (initial_level2_meta_lexer ()) + +let level1_pattern_lexer () = !level1_pattern_lexer_ref +let level2_ast_lexer () = !level2_ast_lexer_ref +let level2_meta_lexer () = !level2_meta_lexer_ref + +let history = ref [];; + +let push () = + history := + (!level2_ast_keywords,!level1_pattern_lexer_ref, + !level2_ast_lexer_ref,!level2_meta_lexer_ref) :: !history; + level2_ast_keywords := initial_level2_ast_keywords (); + initialize_keywords (); + level1_pattern_lexer_ref := initial_level1_pattern_lexer (); + level2_ast_lexer_ref := initial_level2_ast_lexer (); + level2_meta_lexer_ref := initial_level2_meta_lexer (); +;; + +let pop () = + match !history with + | [] -> assert false + | (kwd,pl,al,ml) :: tl -> + level2_ast_keywords := kwd; + level1_pattern_lexer_ref := pl; + level2_ast_lexer_ref := al; + level2_meta_lexer_ref := ml; + history := tl +;;