X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=matitaB%2Fcomponents%2Fcontent_pres%2FcicNotationLexer.ml;fp=matitaB%2Fcomponents%2Fcontent_pres%2FcicNotationLexer.ml;h=3c84e0bb5056f94e1f2e9bd138f4185c32b609e3;hb=ae6757199708cc32166961debb52d48114c0eb74;hp=f85d2e776c588cf84513dce614d13c3deca229a3;hpb=b40e4e96e85103c7072985990c6b541371fd5a48;p=helm.git diff --git a/matitaB/components/content_pres/cicNotationLexer.ml b/matitaB/components/content_pres/cicNotationLexer.ml index f85d2e776..3c84e0bb5 100644 --- a/matitaB/components/content_pres/cicNotationLexer.ml +++ b/matitaB/components/content_pres/cicNotationLexer.ml @@ -254,8 +254,9 @@ let read_unparsed_group token_name lexbuf = let end_cnum = level2_pattern_token_group 0 buffer lexbuf in return_with_loc (token_name, Buffer.contents buffer) begin_cnum end_cnum -let handle_keywords lexbuf k name = +let handle_keywords lexbuf k name = let s = Ulexing.utf8_lexeme lexbuf in + prerr_endline (Printf.sprintf "handling \"%s\" as a keyword or %s?" s name); if k s then return lexbuf ("", s) else @@ -341,40 +342,51 @@ let update_table loc desc href loctable = ;; let level2_ast_token loctable status = - let rec aux desc href in_tag = + prerr_endline ("X keywords = " ^ (String.concat ", " + (StringSet.elements status))); + (* start = starting point of last open A tag (default -1 = no tag) *) + let rec aux desc href start = lexer | let_rec -> return lexbuf ("LETREC","") | let_corec -> return lexbuf ("LETCOREC","") | we_proved -> return lexbuf ("WEPROVED","") | we_have -> return lexbuf ("WEHAVE","") - | utf8_blank+ -> ligatures_token (aux desc href in_tag) lexbuf + | utf8_blank+ -> ligatures_token (aux desc href start) lexbuf | meta -> let s = Ulexing.utf8_lexeme lexbuf in return lexbuf ("META", String.sub s 1 (String.length s - 1)) | implicit -> return lexbuf ("IMPLICIT", "") | placeholder -> return lexbuf ("PLACEHOLDER", "") - | hreftag -> aux_attr None None lexbuf + | hreftag -> + let start = Ulexing.lexeme_start lexbuf in + aux_attr None None start lexbuf | hrefclose -> return lexbuf ("LEXING_ERROR", Ulexing.utf8_lexeme lexbuf) (* ignore other tags *) | generictag - | closetag -> ligatures_token (aux desc href in_tag) lexbuf - | ident -> if in_tag then - aux_close_tag desc href ("IDENT", Ulexing.utf8_lexeme lexbuf) lexbuf - else handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT" + | closetag -> ligatures_token (aux desc href start) lexbuf + | ident -> + if start <> ~-1 then + let idtok,_ = + handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT" + in aux_close_tag desc href start idtok lexbuf +(* ("IDENT", Ulexing.utf8_lexeme lexbuf) lexbuf *) + else + handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT" | variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf) - | pident -> if in_tag then - aux_close_tag desc href ("PIDENT", Ulexing.utf8_lexeme lexbuf) lexbuf - else handle_keywords lexbuf (fun x -> StringSet.mem x status) "PIDENT" + | pident -> if start <> ~-1 then + aux_close_tag desc href start ("PIDENT", Ulexing.utf8_lexeme lexbuf) lexbuf + else + handle_keywords lexbuf (fun x -> StringSet.mem x status) "PIDENT" | number -> let token = "NUMBER", Ulexing.utf8_lexeme lexbuf in - if in_tag then - aux_close_tag desc href token lexbuf + if start <> ~-1 then + aux_close_tag desc href start token lexbuf else return lexbuf token | tex_token -> let token = expand_macro lexbuf in - if in_tag then - aux_close_tag desc href token lexbuf + if start <> ~-1 then + aux_close_tag desc href start token lexbuf else return lexbuf token | nreference -> return lexbuf ("NREF", Ulexing.utf8_lexeme lexbuf) | uri -> return lexbuf ("URI", Ulexing.utf8_lexeme lexbuf) @@ -396,27 +408,27 @@ let level2_ast_token loctable status = Ulexing.utf8_sub_lexeme lexbuf 2 (Ulexing.lexeme_length lexbuf - 4) in return lexbuf ("NOTE", comment) *) - ligatures_token (aux desc href in_tag) lexbuf + ligatures_token (aux desc href start) lexbuf | begincomment -> return lexbuf ("BEGINCOMMENT","") | endcomment -> return lexbuf ("ENDCOMMENT","") | eof -> return_eoi lexbuf | _ -> let token = "SYMBOL", (Ulexing.utf8_lexeme lexbuf) in - if in_tag then - aux_close_tag desc href token lexbuf + if start <> ~-1 then + aux_close_tag desc href start token lexbuf else return lexbuf token - and aux_attr desc href = lexer - | utf8_blank+ -> ligatures_token (aux_attr desc href) lexbuf + and aux_attr desc href start = lexer + | utf8_blank+ -> ligatures_token (aux_attr desc href start) lexbuf | href -> aux_attr desc (Some (Ulexing.utf8_sub_lexeme lexbuf 6 (Ulexing.lexeme_length lexbuf - 7))) - lexbuf + start lexbuf | hreftitle -> aux_attr (Some (Ulexing.utf8_sub_lexeme lexbuf 7 (Ulexing.lexeme_length lexbuf - 8))) - href lexbuf - | ymarkup -> aux desc href true lexbuf + href start lexbuf + | ymarkup -> aux desc href start lexbuf | _ -> return lexbuf ("LEXING_ERROR", Ulexing.utf8_lexeme lexbuf) (* and aux_in_tag desc href = lexer @@ -438,11 +450,14 @@ let level2_ast_token loctable status = update_table (loc_of_buf lexbuf) desc href !loctable; return_symbol lexbuf (Ulexing.utf8_lexeme lexbuf) *) - and aux_close_tag desc href token = lexer - | hrefclose -> loctable := update_table (loc_of_buf lexbuf) desc href !loctable; - return lexbuf token + and aux_close_tag desc href start token = lexer + | hrefclose -> let _,b = Ulexing.loc lexbuf in + loctable := update_table (HExtlib.floc_of_loc (start,b)) desc href !loctable; + prerr_endline + (Printf.sprintf "adding loc (%d,%d) to table" start b); + return_with_loc token start b | _ -> return lexbuf ("LEXING_ERROR", Ulexing.utf8_lexeme lexbuf) - in aux None None false + in aux None None ~-1 let rec level1_pattern_token = lexer @@ -454,7 +469,7 @@ let rec level1_pattern_token = | closetag -> ligatures_token level1_pattern_token lexbuf | ident -> handle_keywords lexbuf (fun x -> List.mem x level1_keywords) "IDENT" | variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf) - | pident->handle_keywords lexbuf (fun x->List.mem x level1_keywords) "PIDENT" + | pident-> handle_keywords lexbuf (fun x->List.mem x level1_keywords) "PIDENT" | color -> return lexbuf ("COLOR", Ulexing.utf8_lexeme lexbuf) | percentage -> return lexbuf ("PERCENTAGE", Ulexing.utf8_lexeme lexbuf) @@ -476,20 +491,22 @@ let level2_ast_token loctable status = type lexers = { level1_pattern_lexer : (string * string) Token.glexer; level2_ast_lexer : (string * string) Token.glexer; - level2_meta_lexer : (string * string) Token.glexer + level2_meta_lexer : (string * string) Token.glexer; } -let mk_lexers loctable keywords = - let initial_keywords = +let initial_keywords = [ "CProp"; "Prop"; "Type"; "Set"; "let"; "match"; "with"; "in"; "and"; "to"; "as"; "on"; "return"; "done" ] - in +;; + +let mk_lexers loctable keywords = + prerr_endline ("mk_lexers keywords: " ^ (String.concat ", " keywords)); let status = List.fold_right StringSet.add (initial_keywords @ keywords) StringSet.empty in { level1_pattern_lexer = mk_lexer level1_pattern_token; level2_ast_lexer = mk_lexer (level2_ast_token loctable status); - level2_meta_lexer = mk_lexer level2_meta_token + level2_meta_lexer = mk_lexer level2_meta_token; } ;;