X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=matitaB%2Fcomponents%2Fcontent_pres%2FcicNotationLexer.ml;h=f85d2e776c588cf84513dce614d13c3deca229a3;hb=0eb55693fa50c695866cf5205b04cdf9bf5e8e9d;hp=524575d0387d6a48430d597079cfb3cd47154d47;hpb=6d4277977478ef9bcadaffbef3d4bb04ac0250a5;p=helm.git diff --git a/matitaB/components/content_pres/cicNotationLexer.ml b/matitaB/components/content_pres/cicNotationLexer.ml index 524575d03..f85d2e776 100644 --- a/matitaB/components/content_pres/cicNotationLexer.ml +++ b/matitaB/components/content_pres/cicNotationLexer.ml @@ -73,8 +73,22 @@ let regexp uri = ("(" number (',' number)* ")")? (* reference spec *) let regexp qstring = '"' [^ '"']* '"' -let regexp hreftag = "" -let regexp hrefclose = "" +let regexp xmarkup = '\005' +let regexp ymarkup = '\006' + +let regexp hreftag = xmarkup [ 'A' 'a' ] +let regexp href = "href=\"" uri "\"" +let regexp hreftitle = "title=" qstring +let regexp hrefclose = xmarkup "/" [ 'A' 'a' ] ymarkup + +let regexp tag_cont = ident_letter | xml_digit | "_" | "-" +let regexp gtagname = [ 'B' - 'Z' 'b' - 'z' ] | ident_letter tag_cont+ +let regexp attrname = ident_letter tag_cont* +let regexp gopentag = xmarkup gtagname +let regexp closetag = xmarkup "/" gtagname ymarkup +let regexp attribute = attrname "=" qstring +let regexp generictag = + gopentag (utf8_blank+ attribute)* ymarkup let regexp tex_token = '\\' ident @@ -156,6 +170,9 @@ let error_at_end lexbuf msg = let begin_cnum, end_cnum = Ulexing.loc lexbuf in raise (Error (begin_cnum, end_cnum, msg)) +let loc_of_buf lexbuf = + HExtlib.floc_of_loc (Ulexing.loc lexbuf) + let return_with_loc token begin_cnum end_cnum = let flocation = HExtlib.floc_of_loc (begin_cnum,end_cnum) in token, flocation @@ -245,15 +262,13 @@ let handle_keywords lexbuf k name = return lexbuf (name, s) ;; -let get_uri buf = - Ulexing.utf8_sub_lexeme buf 9 (Ulexing.lexeme_length buf - 11) -;; - let rec level2_meta_token = lexer | utf8_blank+ -> level2_meta_token lexbuf - | hreftag -> prerr_endline ("****** HREF : " ^ (Ulexing.utf8_lexeme lexbuf));return lexbuf ("HREF", get_uri lexbuf) - | hrefclose -> return lexbuf ("HREFEND","") + | hreftag -> return lexbuf ("ATAG","") + | hrefclose -> return lexbuf ("ATAGEND","") + | generictag + | closetag -> level2_meta_token lexbuf | ident -> handle_keywords lexbuf (fun x -> List.mem x level2_meta_keywords) "IDENT" | variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf) @@ -298,58 +313,146 @@ let ligatures_token k = Ulexing.rollback lexbuf; k lexbuf -let rec level2_ast_token status = - lexer - | let_rec -> return lexbuf ("LETREC","") - | let_corec -> return lexbuf ("LETCOREC","") - | we_proved -> return lexbuf ("WEPROVED","") - | we_have -> return lexbuf ("WEHAVE","") - | utf8_blank+ -> ligatures_token (level2_ast_token status) lexbuf - | meta -> - let s = Ulexing.utf8_lexeme lexbuf in - return lexbuf ("META", String.sub s 1 (String.length s - 1)) - | implicit -> return lexbuf ("IMPLICIT", "") - | placeholder -> return lexbuf ("PLACEHOLDER", "") - | hreftag -> prerr_endline ("****** HREF : " ^ (Ulexing.utf8_lexeme lexbuf));return lexbuf ("HREF", get_uri lexbuf) - | hrefclose -> return lexbuf ("HREFEND","") - | ident -> handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT" - | variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf) - | pident -> handle_keywords lexbuf (fun x -> StringSet.mem x status) "PIDENT" - | number -> return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf) - | tex_token -> return lexbuf (expand_macro lexbuf) - | nreference -> return lexbuf ("NREF", Ulexing.utf8_lexeme lexbuf) - | uri -> return lexbuf ("URI", Ulexing.utf8_lexeme lexbuf) - | qstring -> - return lexbuf ("QSTRING", remove_quotes (Ulexing.utf8_lexeme lexbuf)) - | csymbol -> - return lexbuf ("CSYMBOL", remove_left_quote (Ulexing.utf8_lexeme lexbuf)) - | "${" -> read_unparsed_group "UNPARSED_META" lexbuf - | "@{" -> read_unparsed_group "UNPARSED_AST" lexbuf - | '(' -> return lexbuf ("LPAREN", "") - | ')' -> return lexbuf ("RPAREN", "") - | meta_ident -> - return lexbuf ("UNPARSED_META", - remove_left_quote (Ulexing.utf8_lexeme lexbuf)) - | meta_anonymous -> return lexbuf ("UNPARSED_META", "anonymous") - | beginnote -> - let _comment = comment_token (Ulexing.utf8_lexeme lexbuf) 0 lexbuf in -(* let comment = - Ulexing.utf8_sub_lexeme lexbuf 2 (Ulexing.lexeme_length lexbuf - 4) - in - return lexbuf ("NOTE", comment) *) - ligatures_token (level2_ast_token status) lexbuf - | begincomment -> return lexbuf ("BEGINCOMMENT","") - | endcomment -> return lexbuf ("ENDCOMMENT","") - | eof -> return_eoi lexbuf - | _ -> return_symbol lexbuf (Ulexing.utf8_lexeme lexbuf) +module LocalizeOD = + struct + type t = Stdpp.location + let compare = Pervasives.compare + end + +module LocalizeEnv = Map.Make (LocalizeOD) + +let so_pp = function + | None -> "()" + | Some s -> "*" ^ s +;; + +let update_table loc desc href loctable = + if desc <> None || href <> None + then + (let s,e = HExtlib.loc_of_floc loc in + + prerr_endline (Printf.sprintf "*** [%d,%d] \"%s\",\"%s\"" + s e (so_pp href) (so_pp desc)); + + Printf.printf "*** [%d,%d] \"%s\",\"%s\"" + s e (so_pp href) (so_pp desc); + LocalizeEnv.add loc (href,desc) loctable) + else loctable +;; -and level1_pattern_token = +let level2_ast_token loctable status = + let rec aux desc href in_tag = + lexer + | let_rec -> return lexbuf ("LETREC","") + | let_corec -> return lexbuf ("LETCOREC","") + | we_proved -> return lexbuf ("WEPROVED","") + | we_have -> return lexbuf ("WEHAVE","") + | utf8_blank+ -> ligatures_token (aux desc href in_tag) lexbuf + | meta -> + let s = Ulexing.utf8_lexeme lexbuf in + return lexbuf ("META", String.sub s 1 (String.length s - 1)) + | implicit -> return lexbuf ("IMPLICIT", "") + | placeholder -> return lexbuf ("PLACEHOLDER", "") + | hreftag -> aux_attr None None lexbuf + | hrefclose -> return lexbuf ("LEXING_ERROR", Ulexing.utf8_lexeme lexbuf) + (* ignore other tags *) + | generictag + | closetag -> ligatures_token (aux desc href in_tag) lexbuf + | ident -> if in_tag then + aux_close_tag desc href ("IDENT", Ulexing.utf8_lexeme lexbuf) lexbuf + else handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT" + | variable_ident -> + return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf) + | pident -> if in_tag then + aux_close_tag desc href ("PIDENT", Ulexing.utf8_lexeme lexbuf) lexbuf + else handle_keywords lexbuf (fun x -> StringSet.mem x status) "PIDENT" + | number -> let token = "NUMBER", Ulexing.utf8_lexeme lexbuf + in + if in_tag then + aux_close_tag desc href token lexbuf + else return lexbuf token + | tex_token -> let token = expand_macro lexbuf + in + if in_tag then + aux_close_tag desc href token lexbuf + else return lexbuf token + | nreference -> return lexbuf ("NREF", Ulexing.utf8_lexeme lexbuf) + | uri -> return lexbuf ("URI", Ulexing.utf8_lexeme lexbuf) + | qstring -> + return lexbuf ("QSTRING", remove_quotes (Ulexing.utf8_lexeme lexbuf)) + | csymbol -> + return lexbuf ("CSYMBOL", remove_left_quote (Ulexing.utf8_lexeme lexbuf)) + | "${" -> read_unparsed_group "UNPARSED_META" lexbuf + | "@{" -> read_unparsed_group "UNPARSED_AST" lexbuf + | '(' -> return lexbuf ("LPAREN", "") + | ')' -> return lexbuf ("RPAREN", "") + | meta_ident -> + return lexbuf ("UNPARSED_META", + remove_left_quote (Ulexing.utf8_lexeme lexbuf)) + | meta_anonymous -> return lexbuf ("UNPARSED_META", "anonymous") + | beginnote -> + let _comment = comment_token (Ulexing.utf8_lexeme lexbuf) 0 lexbuf in + (* let comment = + Ulexing.utf8_sub_lexeme lexbuf 2 (Ulexing.lexeme_length lexbuf - 4) + in + return lexbuf ("NOTE", comment) *) + ligatures_token (aux desc href in_tag) lexbuf + | begincomment -> return lexbuf ("BEGINCOMMENT","") + | endcomment -> return lexbuf ("ENDCOMMENT","") + | eof -> return_eoi lexbuf + | _ -> let token = "SYMBOL", (Ulexing.utf8_lexeme lexbuf) + in + if in_tag then + aux_close_tag desc href token lexbuf + else return lexbuf token + + and aux_attr desc href = lexer + | utf8_blank+ -> ligatures_token (aux_attr desc href) lexbuf + | href -> + aux_attr desc + (Some (Ulexing.utf8_sub_lexeme lexbuf 6 (Ulexing.lexeme_length lexbuf - 7))) + lexbuf + | hreftitle -> + aux_attr + (Some (Ulexing.utf8_sub_lexeme lexbuf 7 (Ulexing.lexeme_length lexbuf - 8))) + href lexbuf + | ymarkup -> aux desc href true lexbuf + | _ -> return lexbuf ("LEXING_ERROR", Ulexing.utf8_lexeme lexbuf) + +(* and aux_in_tag desc href = lexer + | ident -> loctable := + update_table (loc_of_buf lexbuf) desc href !loctable; + handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT" + | variable_ident -> + return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf) + | pident -> loctable := + update_table (loc_of_buf lexbuf) desc href !loctable; + handle_keywords lexbuf (fun x -> StringSet.mem x status) "PIDENT" + | number -> loctable := + update_table (loc_of_buf lexbuf) desc href !loctable; + return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf) + | tex_token -> loctable := + update_table (loc_of_buf lexbuf) desc href !loctable; + return lexbuf (expand_macro lexbuf) + | _ -> loctable := + update_table (loc_of_buf lexbuf) desc href !loctable; + return_symbol lexbuf (Ulexing.utf8_lexeme lexbuf) + *) + and aux_close_tag desc href token = lexer + | hrefclose -> loctable := update_table (loc_of_buf lexbuf) desc href !loctable; + return lexbuf token + | _ -> return lexbuf ("LEXING_ERROR", Ulexing.utf8_lexeme lexbuf) + in aux None None false + +let rec level1_pattern_token = lexer | utf8_blank+ -> ligatures_token level1_pattern_token lexbuf | number -> return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf) - | hreftag -> prerr_endline ("****** HREF : " ^ (Ulexing.utf8_lexeme lexbuf));return lexbuf ("HREF", get_uri lexbuf) - | hrefclose -> return lexbuf ("HREFEND","") - | ident ->handle_keywords lexbuf (fun x -> List.mem x level1_keywords) "IDENT" + | hreftag -> return lexbuf ("ATAG", "") + | hrefclose -> return lexbuf ("ATAGEND","") + | generictag + | closetag -> ligatures_token level1_pattern_token lexbuf + | ident -> handle_keywords lexbuf (fun x -> List.mem x level1_keywords) "IDENT" | variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf) | pident->handle_keywords lexbuf (fun x->List.mem x level1_keywords) "PIDENT" | color -> return lexbuf ("COLOR", Ulexing.utf8_lexeme lexbuf) @@ -366,7 +469,8 @@ and level1_pattern_token = | _ -> return_symbol lexbuf (Ulexing.utf8_lexeme lexbuf) let level1_pattern_token = ligatures_token level1_pattern_token -let level2_ast_token status = ligatures_token (level2_ast_token status) +let level2_ast_token loctable status = + ligatures_token (level2_ast_token loctable status) (* API implementation *) type lexers = { @@ -375,7 +479,7 @@ type lexers = { level2_meta_lexer : (string * string) Token.glexer } -let mk_lexers keywords = +let mk_lexers loctable keywords = let initial_keywords = [ "CProp"; "Prop"; "Type"; "Set"; "let"; "match"; "with"; "in"; "and"; "to"; "as"; "on"; "return"; "done" ] @@ -385,7 +489,7 @@ let mk_lexers keywords = in { level1_pattern_lexer = mk_lexer level1_pattern_token; - level2_ast_lexer = mk_lexer (level2_ast_token status); + level2_ast_lexer = mk_lexer (level2_ast_token loctable status); level2_meta_lexer = mk_lexer level2_meta_token } ;;