X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;ds=sidebyside;f=helm%2Fsoftware%2Fcomponents%2Fcontent_pres%2FcicNotationLexer.ml;h=21031d002fc10fba43eb915623f054d45743ecd0;hb=f9abd21eb0d26cf9b632af4df819225be4d091e3;hp=4221417a0183dbf19a2e54cf140a802fdaad9ba4;hpb=3e1e59644a24ed855a7f21bf9eab76f96577fd17;p=helm.git diff --git a/helm/software/components/content_pres/cicNotationLexer.ml b/helm/software/components/content_pres/cicNotationLexer.ml index 4221417a0..21031d002 100644 --- a/helm/software/components/content_pres/cicNotationLexer.ml +++ b/helm/software/components/content_pres/cicNotationLexer.ml @@ -31,8 +31,10 @@ exception Error of int * int * string let regexp number = xml_digit+ let regexp utf8_blank = " " | "\r\n" | "\n" | "\t" | [160] (* this is a nbsp *) +let regexp percentage = + ('-' | "") [ '0' - '9' ] + '%' let regexp floatwithunit = - [ '0' - '9' ] + ["."] [ '0' - '9' ] + ([ 'a' - 'z' ] + | "" ) + ('-' | "") [ '0' - '9' ] + ["."] [ '0' - '9' ] + ([ 'a' - 'z' ] + | "" ) let regexp color = "#" [ '0' - '9' 'a' - 'f' 'A' - 'F' ] [ '0' - '9' 'a' - 'f' 'A' - 'F' ] [ '0' - '9' 'a' - 'f' 'A' - 'F' ] [ '0' - '9' 'a' - 'f' 'A' - 'F' ] [ '0' - '9' 'a' - 'f' 'A' - 'F' ] [ '0' - '9' 'a' - 'f' 'A' - 'F' ] @@ -46,29 +48,25 @@ let regexp ident_letter = [ 'a' - 'z' 'A' - 'Z' ] let regexp ligature_char = [ "'`~!?@*()[]<>-+=|:;.,/\"" ] let regexp ligature = ligature_char ligature_char+ -let is_ligature_char = - (* must be in sync with "regexp ligature_char" above *) - let chars = "'`~!?@*()[]<>-+=|:;.,/\"" in - (fun char -> - (try - ignore (String.index chars char); - true - with Not_found -> false)) - let regexp we_proved = "we" utf8_blank+ "proved" let regexp we_have = "we" utf8_blank+ "have" let regexp let_rec = "let" utf8_blank+ "rec" let regexp let_corec = "let" utf8_blank+ "corec" +let regexp nlet_rec = "nlet" utf8_blank+ "rec" +let regexp nlet_corec = "nlet" utf8_blank+ "corec" let regexp ident_decoration = '\'' | '?' | '`' let regexp ident_cont = ident_letter | xml_digit | '_' -let regexp ident = ident_letter ident_cont* ident_decoration* +let regexp ident_start = ident_letter +let regexp ident = ident_letter ident_cont* ident_decoration* +let regexp variable_ident = '_' '_' number +let regexp pident = '_' ident let regexp tex_token = '\\' ident let regexp delim_begin = "\\[" let regexp delim_end = "\\]" -let regexp qkeyword = "'" ident "'" +let regexp qkeyword = "'" ( ident | pident ) "'" let regexp implicit = '?' let regexp placeholder = '%' @@ -95,7 +93,8 @@ let level1_layouts = [ "sub"; "sup"; "below"; "above"; "over"; "atop"; "frac"; - "sqrt"; "root" + "sqrt"; "root"; "mstyle" ; "mpadded"; "maction" + ] let level1_keywords = @@ -103,11 +102,11 @@ let level1_keywords = "break"; "list0"; "list1"; "sep"; "opt"; - "term"; "ident"; "number"; "mstyle" + "term"; "ident"; "number"; ] @ level1_layouts let level2_meta_keywords = - [ "if"; "then"; "else"; + [ "if"; "then"; "elCicNotationParser.se"; "fold"; "left"; "right"; "rec"; "fail"; "default"; @@ -115,25 +114,30 @@ let level2_meta_keywords = ] (* (string, unit) Hashtbl.t, to exploit multiple bindings *) -let level2_ast_keywords = Hashtbl.create 23 -let _ = - List.iter (fun k -> Hashtbl.add level2_ast_keywords k ()) +let initial_level2_ast_keywords () = Hashtbl.create 23;; + +let level2_ast_keywords = ref (initial_level2_ast_keywords ()) + +let initialize_keywords () = + List.iter (fun k -> Hashtbl.add !level2_ast_keywords k ()) [ "CProp"; "Prop"; "Type"; "Set"; "let"; "match"; "with"; "in"; "and"; "to"; "as"; "on"; "return"; "done" ] +;; + +let _ = initialize_keywords ();; -let add_level2_ast_keyword k = Hashtbl.add level2_ast_keywords k () -let remove_level2_ast_keyword k = Hashtbl.remove level2_ast_keywords k +let add_level2_ast_keyword k = Hashtbl.add !level2_ast_keywords k () +let remove_level2_ast_keyword k = Hashtbl.remove !level2_ast_keywords k (* (string, int) Hashtbl.t, with multiple bindings. * int is the unicode codepoint *) let ligatures = Hashtbl.create 23 + let _ = List.iter (fun (ligature, symbol) -> Hashtbl.add ligatures ligature symbol) [ ("->", <:unicode>); ("=>", <:unicode>); - ("<=", <:unicode>); (">=", <:unicode>); - ("<>", <:unicode>); (":=", <:unicode>); - ("==", <:unicode>); + (":=", <:unicode>); ] let regexp uri_step = [ 'a' - 'z' 'A' - 'Z' '0' - '9' '_' '-' ''' ]+ @@ -145,6 +149,17 @@ let regexp uri = ('.' ident)+ (* ext *) ("#xpointer(" number ('/' number)+ ")")? (* xpointer *) +let regexp nreference = + "cic:/" (* schema *) + uri_step ('/' uri_step)* (* path *) + '.' + ( "dec" + | "def" "(" number ")" + | "fix" "(" number "," number "," number ")" + | "cfx" "(" number ")" + | "ind" "(" number "," number "," number ")" + | "con" "(" number "," number "," number ")") (* ext + reference *) + let error lexbuf msg = let begin_cnum, end_cnum = Ulexing.loc lexbuf in raise (Error (begin_cnum, end_cnum, msg)) @@ -206,7 +221,9 @@ let expand_macro lexbuf = in try ("SYMBOL", Utf8Macro.expand macro) - with Utf8Macro.Macro_not_found _ -> "SYMBOL", Ulexing.utf8_lexeme lexbuf + with Utf8Macro.Macro_not_found _ -> +(* FG: unexpanded TeX macros are terminated by a space for rendering *) + "SYMBOL", (Ulexing.utf8_lexeme lexbuf ^ " ") let remove_quotes s = String.sub s 1 (String.length s - 2) let remove_left_quote s = String.sub s 1 (String.length s - 1) @@ -231,17 +248,22 @@ let read_unparsed_group token_name lexbuf = let end_cnum = level2_pattern_token_group 0 buffer lexbuf in return_with_loc (token_name, Buffer.contents buffer) begin_cnum end_cnum +let handle_keywords lexbuf k name = + let s = Ulexing.utf8_lexeme lexbuf in + if k s then + return lexbuf ("", s) + else + return lexbuf (name, s) +;; + let rec level2_meta_token = lexer | utf8_blank+ -> level2_meta_token lexbuf | ident -> - let s = Ulexing.utf8_lexeme lexbuf in - begin - if List.mem s level2_meta_keywords then - return lexbuf ("", s) - else - return lexbuf ("IDENT", s) - end + handle_keywords lexbuf (fun x -> List.mem x level2_meta_keywords) "IDENT" + | variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf) + | pident -> + handle_keywords lexbuf (fun x -> List.mem x level2_meta_keywords) "PIDENT" | "@{" -> read_unparsed_group "UNPARSED_AST" lexbuf | ast_ident -> return lexbuf ("UNPARSED_AST", @@ -285,6 +307,8 @@ and level2_ast_token = lexer | let_rec -> return lexbuf ("LETREC","") | let_corec -> return lexbuf ("LETCOREC","") + | nlet_rec -> return lexbuf ("NLETREC","") + | nlet_corec -> return lexbuf ("NLETCOREC","") | we_proved -> return lexbuf ("WEPROVED","") | we_have -> return lexbuf ("WEHAVE","") | utf8_blank+ -> ligatures_token level2_ast_token lexbuf @@ -293,14 +317,12 @@ and level2_ast_token = return lexbuf ("META", String.sub s 1 (String.length s - 1)) | implicit -> return lexbuf ("IMPLICIT", "") | placeholder -> return lexbuf ("PLACEHOLDER", "") - | ident -> - let lexeme = Ulexing.utf8_lexeme lexbuf in - if Hashtbl.mem level2_ast_keywords lexeme then - return lexbuf ("", lexeme) - else - return lexbuf ("IDENT", lexeme) + | ident -> handle_keywords lexbuf (Hashtbl.mem !level2_ast_keywords) "IDENT" + | variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf) + | pident -> handle_keywords lexbuf (Hashtbl.mem !level2_ast_keywords) "PIDENT" | number -> return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf) | tex_token -> return lexbuf (expand_macro lexbuf) + | nreference -> return lexbuf ("NREF", Ulexing.utf8_lexeme lexbuf) | uri -> return lexbuf ("URI", Ulexing.utf8_lexeme lexbuf) | qstring -> return lexbuf ("QSTRING", remove_quotes (Ulexing.utf8_lexeme lexbuf)) @@ -330,15 +352,12 @@ and level1_pattern_token = lexer | utf8_blank+ -> ligatures_token level1_pattern_token lexbuf | number -> return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf) - | ident -> - let s = Ulexing.utf8_lexeme lexbuf in - begin - if List.mem s level1_keywords then - return lexbuf ("", s) - else - return lexbuf ("IDENT", s) - end + | ident ->handle_keywords lexbuf (fun x -> List.mem x level1_keywords) "IDENT" + | variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf) + | pident->handle_keywords lexbuf (fun x->List.mem x level1_keywords) "PIDENT" | color -> return lexbuf ("COLOR", Ulexing.utf8_lexeme lexbuf) + | percentage -> + return lexbuf ("PERCENTAGE", Ulexing.utf8_lexeme lexbuf) | floatwithunit -> return lexbuf ("FLOATWITHUNIT", Ulexing.utf8_lexeme lexbuf) | tex_token -> return lexbuf (expand_macro lexbuf) @@ -354,14 +373,40 @@ let level2_ast_token = ligatures_token level2_ast_token (* API implementation *) -let level1_pattern_lexer = mk_lexer level1_pattern_token -let level2_ast_lexer = mk_lexer level2_ast_token -let level2_meta_lexer = mk_lexer level2_meta_token - -let lookup_ligatures lexeme = - try - if lexeme.[0] = '\\' - then [ Utf8Macro.expand (String.sub lexeme 1 (String.length lexeme - 1)) ] - else List.rev (Hashtbl.find_all ligatures lexeme) - with Invalid_argument _ | Utf8Macro.Macro_not_found _ -> [] +let initial_level1_pattern_lexer () = mk_lexer level1_pattern_token +let initial_level2_ast_lexer () = mk_lexer level2_ast_token +let initial_level2_meta_lexer () = mk_lexer level2_meta_token + + +let level1_pattern_lexer_ref = ref (initial_level1_pattern_lexer ()) +let level2_ast_lexer_ref = ref (initial_level2_ast_lexer ()) +let level2_meta_lexer_ref = ref (initial_level2_meta_lexer ()) + +let level1_pattern_lexer () = !level1_pattern_lexer_ref +let level2_ast_lexer () = !level2_ast_lexer_ref +let level2_meta_lexer () = !level2_meta_lexer_ref + +let history = ref [];; + +let push () = + history := + (!level2_ast_keywords,!level1_pattern_lexer_ref, + !level2_ast_lexer_ref,!level2_meta_lexer_ref) :: !history; + level2_ast_keywords := initial_level2_ast_keywords (); + initialize_keywords (); + level1_pattern_lexer_ref := initial_level1_pattern_lexer (); + level2_ast_lexer_ref := initial_level2_ast_lexer (); + level2_meta_lexer_ref := initial_level2_meta_lexer (); +;; + +let pop () = + match !history with + | [] -> assert false + | (kwd,pl,al,ml) :: tl -> + level2_ast_keywords := kwd; + level1_pattern_lexer_ref := pl; + level2_ast_lexer_ref := al; + level2_meta_lexer_ref := ml; + history := tl +;;