X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=matitaB%2Fcomponents%2Fcontent_pres%2FcicNotationLexer.ml;h=f85d2e776c588cf84513dce614d13c3deca229a3;hb=b8a57655cd700e4c4513a71fa592eb4b982febf7;hp=ce8fcec54b9fba727c1f97c40c6c203029946f72;hpb=2914bfbeac3c2e0f53ba8c612cd11b3b2afbabce;p=helm.git diff --git a/matitaB/components/content_pres/cicNotationLexer.ml b/matitaB/components/content_pres/cicNotationLexer.ml index ce8fcec54..f85d2e776 100644 --- a/matitaB/components/content_pres/cicNotationLexer.ml +++ b/matitaB/components/content_pres/cicNotationLexer.ml @@ -73,10 +73,22 @@ let regexp uri = ("(" number (',' number)* ")")? (* reference spec *) let regexp qstring = '"' [^ '"']* '"' -let regexp hreftag = "<" [ 'A' 'a' ] +let regexp xmarkup = '\005' +let regexp ymarkup = '\006' + +let regexp hreftag = xmarkup [ 'A' 'a' ] let regexp href = "href=\"" uri "\"" let regexp hreftitle = "title=" qstring -let regexp hrefclose = "" +let regexp hrefclose = xmarkup "/" [ 'A' 'a' ] ymarkup + +let regexp tag_cont = ident_letter | xml_digit | "_" | "-" +let regexp gtagname = [ 'B' - 'Z' 'b' - 'z' ] | ident_letter tag_cont+ +let regexp attrname = ident_letter tag_cont* +let regexp gopentag = xmarkup gtagname +let regexp closetag = xmarkup "/" gtagname ymarkup +let regexp attribute = attrname "=" qstring +let regexp generictag = + gopentag (utf8_blank+ attribute)* ymarkup let regexp tex_token = '\\' ident @@ -255,6 +267,8 @@ let rec level2_meta_token = | utf8_blank+ -> level2_meta_token lexbuf | hreftag -> return lexbuf ("ATAG","") | hrefclose -> return lexbuf ("ATAGEND","") + | generictag + | closetag -> level2_meta_token lexbuf | ident -> handle_keywords lexbuf (fun x -> List.mem x level2_meta_keywords) "IDENT" | variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf) @@ -316,41 +330,52 @@ let update_table loc desc href loctable = if desc <> None || href <> None then (let s,e = HExtlib.loc_of_floc loc in + prerr_endline (Printf.sprintf "*** [%d,%d] \"%s\",\"%s\"" s e (so_pp href) (so_pp desc)); + + Printf.printf "*** [%d,%d] \"%s\",\"%s\"" + s e (so_pp href) (so_pp desc); LocalizeEnv.add loc (href,desc) loctable) else loctable ;; let level2_ast_token loctable status = - let rec aux desc href = + let rec aux desc href in_tag = lexer | let_rec -> return lexbuf ("LETREC","") | let_corec -> return lexbuf ("LETCOREC","") | we_proved -> return lexbuf ("WEPROVED","") | we_have -> return lexbuf ("WEHAVE","") - | utf8_blank+ -> ligatures_token (aux desc href) lexbuf + | utf8_blank+ -> ligatures_token (aux desc href in_tag) lexbuf | meta -> let s = Ulexing.utf8_lexeme lexbuf in return lexbuf ("META", String.sub s 1 (String.length s - 1)) | implicit -> return lexbuf ("IMPLICIT", "") | placeholder -> return lexbuf ("PLACEHOLDER", "") - | hreftag -> aux_in_tag None None lexbuf - | hrefclose -> aux None None lexbuf - | ident -> loctable := - update_table (loc_of_buf lexbuf) desc href !loctable; - handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT" + | hreftag -> aux_attr None None lexbuf + | hrefclose -> return lexbuf ("LEXING_ERROR", Ulexing.utf8_lexeme lexbuf) + (* ignore other tags *) + | generictag + | closetag -> ligatures_token (aux desc href in_tag) lexbuf + | ident -> if in_tag then + aux_close_tag desc href ("IDENT", Ulexing.utf8_lexeme lexbuf) lexbuf + else handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT" | variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf) - | pident -> loctable := - update_table (loc_of_buf lexbuf) desc href !loctable; - handle_keywords lexbuf (fun x -> StringSet.mem x status) "PIDENT" - | number -> loctable := - update_table (loc_of_buf lexbuf) desc href !loctable; - return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf) - | tex_token -> loctable := - update_table (loc_of_buf lexbuf) desc href !loctable; - return lexbuf (expand_macro lexbuf) + | pident -> if in_tag then + aux_close_tag desc href ("PIDENT", Ulexing.utf8_lexeme lexbuf) lexbuf + else handle_keywords lexbuf (fun x -> StringSet.mem x status) "PIDENT" + | number -> let token = "NUMBER", Ulexing.utf8_lexeme lexbuf + in + if in_tag then + aux_close_tag desc href token lexbuf + else return lexbuf token + | tex_token -> let token = expand_macro lexbuf + in + if in_tag then + aux_close_tag desc href token lexbuf + else return lexbuf token | nreference -> return lexbuf ("NREF", Ulexing.utf8_lexeme lexbuf) | uri -> return lexbuf ("URI", Ulexing.utf8_lexeme lexbuf) | qstring -> @@ -371,27 +396,53 @@ let level2_ast_token loctable status = Ulexing.utf8_sub_lexeme lexbuf 2 (Ulexing.lexeme_length lexbuf - 4) in return lexbuf ("NOTE", comment) *) - ligatures_token (aux desc href) lexbuf + ligatures_token (aux desc href in_tag) lexbuf | begincomment -> return lexbuf ("BEGINCOMMENT","") | endcomment -> return lexbuf ("ENDCOMMENT","") | eof -> return_eoi lexbuf - | _ -> loctable := - update_table (loc_of_buf lexbuf) desc href !loctable; - return_symbol lexbuf (Ulexing.utf8_lexeme lexbuf) - - and aux_in_tag desc href = lexer - | utf8_blank+ -> ligatures_token (aux_in_tag desc href) lexbuf + | _ -> let token = "SYMBOL", (Ulexing.utf8_lexeme lexbuf) + in + if in_tag then + aux_close_tag desc href token lexbuf + else return lexbuf token + + and aux_attr desc href = lexer + | utf8_blank+ -> ligatures_token (aux_attr desc href) lexbuf | href -> - aux_in_tag desc + aux_attr desc (Some (Ulexing.utf8_sub_lexeme lexbuf 6 (Ulexing.lexeme_length lexbuf - 7))) lexbuf | hreftitle -> - aux_in_tag + aux_attr (Some (Ulexing.utf8_sub_lexeme lexbuf 7 (Ulexing.lexeme_length lexbuf - 8))) href lexbuf - | ">" -> aux desc href lexbuf - | _ -> aux None None lexbuf - in aux None None + | ymarkup -> aux desc href true lexbuf + | _ -> return lexbuf ("LEXING_ERROR", Ulexing.utf8_lexeme lexbuf) + +(* and aux_in_tag desc href = lexer + | ident -> loctable := + update_table (loc_of_buf lexbuf) desc href !loctable; + handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT" + | variable_ident -> + return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf) + | pident -> loctable := + update_table (loc_of_buf lexbuf) desc href !loctable; + handle_keywords lexbuf (fun x -> StringSet.mem x status) "PIDENT" + | number -> loctable := + update_table (loc_of_buf lexbuf) desc href !loctable; + return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf) + | tex_token -> loctable := + update_table (loc_of_buf lexbuf) desc href !loctable; + return lexbuf (expand_macro lexbuf) + | _ -> loctable := + update_table (loc_of_buf lexbuf) desc href !loctable; + return_symbol lexbuf (Ulexing.utf8_lexeme lexbuf) + *) + and aux_close_tag desc href token = lexer + | hrefclose -> loctable := update_table (loc_of_buf lexbuf) desc href !loctable; + return lexbuf token + | _ -> return lexbuf ("LEXING_ERROR", Ulexing.utf8_lexeme lexbuf) + in aux None None false let rec level1_pattern_token = lexer @@ -399,7 +450,9 @@ let rec level1_pattern_token = | number -> return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf) | hreftag -> return lexbuf ("ATAG", "") | hrefclose -> return lexbuf ("ATAGEND","") - | ident ->handle_keywords lexbuf (fun x -> List.mem x level1_keywords) "IDENT" + | generictag + | closetag -> ligatures_token level1_pattern_token lexbuf + | ident -> handle_keywords lexbuf (fun x -> List.mem x level1_keywords) "IDENT" | variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf) | pident->handle_keywords lexbuf (fun x->List.mem x level1_keywords) "PIDENT" | color -> return lexbuf ("COLOR", Ulexing.utf8_lexeme lexbuf)