X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2Focaml%2Fcic_notation%2FcicNotationLexer.ml;h=33fb8fd78494085a819c9d65fb11e2020a654c42;hb=4167cea65ca58897d1a3dbb81ff95de5074700cc;hp=13a8b3883dd86addc046c1e17ab308e0f3897b2a;hpb=9e8c5d2163e701413517153f00a52dac1cd31ecd;p=helm.git diff --git a/helm/ocaml/cic_notation/cicNotationLexer.ml b/helm/ocaml/cic_notation/cicNotationLexer.ml index 13a8b3883..33fb8fd78 100644 --- a/helm/ocaml/cic_notation/cicNotationLexer.ml +++ b/helm/ocaml/cic_notation/cicNotationLexer.ml @@ -34,9 +34,19 @@ let regexp number = xml_digit+ let regexp ident_letter = [ 'a' - 'z' 'A' - 'Z' ] + (* must be in sync with "is_ligature_char" below *) let regexp ligature_char = [ "'`~!?@*()[]<>-+=|:;.,/\"" ] let regexp ligature = ligature_char ligature_char+ +let is_ligature_char = + (* must be in sync with "regexp ligature_char" above *) + let chars = "'`~!?@*()[]<>-+=|:;.,/\"" in + (fun char -> + (try + ignore (String.index chars char); + true + with Not_found -> false)) + let regexp ident_decoration = '\'' | '?' | '`' let regexp ident_cont = ident_letter | xml_digit | '_' let regexp ident = ident_letter ident_cont* ident_decoration* @@ -64,9 +74,10 @@ let regexp meta_anonymous = "$_" let regexp qstring = '"' [^ '"']* '"' let regexp begincomment = "(**" xml_blank +let regexp beginnote = "(*" let regexp endcomment = "*)" -let regexp comment_char = [^'*'] | '*'[^')'] -let regexp note = "(*" ([^'*'] | "**") comment_char* "*)" +(* let regexp comment_char = [^'*'] | '*'[^')'] +let regexp note = "|+" ([^'*'] | "**") comment_char* "+|" *) let level1_layouts = [ "sub"; "sup"; @@ -96,7 +107,7 @@ let level2_ast_keywords = Hashtbl.create 23 let _ = List.iter (fun k -> Hashtbl.add level2_ast_keywords k ()) [ "CProp"; "Prop"; "Type"; "Set"; "let"; "rec"; "corec"; "match"; - "with"; "in"; "and"; "to"; "as"; "on" ] + "with"; "in"; "and"; "to"; "as"; "on"; "return" ] let add_level2_ast_keyword k = Hashtbl.add level2_ast_keywords k () let remove_level2_ast_keyword k = Hashtbl.remove level2_ast_keywords k @@ -112,9 +123,12 @@ let _ = ("<>", <:unicode>); (":=", <:unicode>); ] +let regexp uri_step = [ 'a' - 'z' 'A' - 'Z' '0' - '9' '_' '-' ]+ + let regexp uri = ("cic:/" | "theory:/") (* schema *) - ident ('/' ident)* (* path *) +(* ident ('/' ident)* |+ path +| *) + uri_step ('/' uri_step)* (* path *) ('.' ident)+ (* ext *) ("#xpointer(" number ('/' number)+ ")")? (* xpointer *) @@ -230,17 +244,30 @@ let rec level2_meta_token = remove_left_quote (Ulexing.utf8_lexeme lexbuf)) | eof -> return_eoi lexbuf +let rec comment_token acc depth = + lexer + | beginnote -> + let acc = acc ^ Ulexing.utf8_lexeme lexbuf in + comment_token acc (depth + 1) lexbuf + | endcomment -> + let acc = acc ^ Ulexing.utf8_lexeme lexbuf in + if depth = 0 + then acc + else comment_token acc (depth - 1) lexbuf + | _ -> + let acc = acc ^ Ulexing.utf8_lexeme lexbuf in + comment_token acc depth lexbuf + (** @param k continuation to be invoked when no ligature has been found *) let rec ligatures_token k = lexer | ligature -> let lexeme = Ulexing.utf8_lexeme lexbuf in - (match Hashtbl.find_all ligatures lexeme with + (match List.rev (Hashtbl.find_all ligatures lexeme) with | [] -> (* ligature not found, rollback and try default lexer *) Ulexing.rollback lexbuf; k lexbuf - | ligs -> (* ligatures found, use the default one *) - let default_lig = List.hd (List.rev ligs) in + | default_lig :: _ -> (* ligatures found, use the default one *) return_symbol lexbuf default_lig) | eof -> return_eoi lexbuf | _ -> (* not a ligature, rollback and try default lexer *) @@ -274,11 +301,13 @@ and level2_ast_token = return lexbuf ("UNPARSED_META", remove_left_quote (Ulexing.utf8_lexeme lexbuf)) | meta_anonymous -> return lexbuf ("UNPARSED_META", "anonymous") - | note -> - let comment = + | beginnote -> + let comment = comment_token (Ulexing.utf8_lexeme lexbuf) 0 lexbuf in +(* let comment = Ulexing.utf8_sub_lexeme lexbuf 2 (Ulexing.lexeme_length lexbuf - 4) in - return lexbuf ("NOTE", comment) + return lexbuf ("NOTE", comment) *) + ligatures_token level2_ast_token lexbuf | begincomment -> return lexbuf ("BEGINCOMMENT","") | endcomment -> return lexbuf ("ENDCOMMENT","") | eof -> return_eoi lexbuf @@ -313,3 +342,10 @@ let level1_pattern_lexer = mk_lexer level1_pattern_token let level2_ast_lexer = mk_lexer level2_ast_token let level2_meta_lexer = mk_lexer level2_meta_token +let lookup_ligatures lexeme = + try + if lexeme.[0] = '\\' + then [ Utf8Macro.expand (String.sub lexeme 1 (String.length lexeme - 1)) ] + else List.rev (Hashtbl.find_all ligatures lexeme) + with Invalid_argument _ | Utf8Macro.Macro_not_found _ as exn -> [] +