("(" number (',' number)* ")")? (* reference spec *)
let regexp qstring = '"' [^ '"']* '"'
-let regexp hreftag = "<A href=\"" uri "\">"
-let regexp hrefclose = "</A>"
+let regexp xmarkup = '\005'
+let regexp ymarkup = '\006'
+
+let regexp hreftag = xmarkup [ 'A' 'a' ]
+let regexp href = "href=\"" uri "\""
+let regexp hreftitle = "title=" qstring
+let regexp hrefclose = xmarkup "/" [ 'A' 'a' ] ymarkup
+
+let regexp tag_cont = ident_letter | xml_digit | "_" | "-"
+let regexp tagname = ident_letter tag_cont*
+let regexp opentag = xmarkup tagname
+let regexp closetag = xmarkup "/" tagname ymarkup
+let regexp attribute = tagname "=" qstring
+let regexp generictag =
+ opentag (utf8_blank+ attribute)* ymarkup
let regexp tex_token = '\\' ident
let begin_cnum, end_cnum = Ulexing.loc lexbuf in
raise (Error (begin_cnum, end_cnum, msg))
+let loc_of_buf lexbuf =
+ HExtlib.floc_of_loc (Ulexing.loc lexbuf)
+
let return_with_loc token begin_cnum end_cnum =
let flocation = HExtlib.floc_of_loc (begin_cnum,end_cnum) in
token, flocation
return lexbuf (name, s)
;;
-let get_uri buf =
- Ulexing.utf8_sub_lexeme buf 9 (Ulexing.lexeme_length buf - 11)
-;;
-
let rec level2_meta_token =
lexer
| utf8_blank+ -> level2_meta_token lexbuf
- | hreftag -> prerr_endline ("****** HREF : " ^ (Ulexing.utf8_lexeme lexbuf));return lexbuf ("HREF", get_uri lexbuf)
- | hrefclose -> return lexbuf ("HREFEND","")
+ | hreftag -> return lexbuf ("ATAG","")
+ | hrefclose -> return lexbuf ("ATAGEND","")
+ | generictag
+ | closetag -> level2_meta_token lexbuf
| ident ->
handle_keywords lexbuf (fun x -> List.mem x level2_meta_keywords) "IDENT"
| variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf)
Ulexing.rollback lexbuf;
k lexbuf
-let rec level2_ast_token status =
- lexer
- | let_rec -> return lexbuf ("LETREC","")
- | let_corec -> return lexbuf ("LETCOREC","")
- | we_proved -> return lexbuf ("WEPROVED","")
- | we_have -> return lexbuf ("WEHAVE","")
- | utf8_blank+ -> ligatures_token (level2_ast_token status) lexbuf
- | meta ->
- let s = Ulexing.utf8_lexeme lexbuf in
- return lexbuf ("META", String.sub s 1 (String.length s - 1))
- | implicit -> return lexbuf ("IMPLICIT", "")
- | placeholder -> return lexbuf ("PLACEHOLDER", "")
- | hreftag -> prerr_endline ("****** HREF : " ^ (Ulexing.utf8_lexeme lexbuf));return lexbuf ("HREF", get_uri lexbuf)
- | hrefclose -> return lexbuf ("HREFEND","")
- | ident -> handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT"
- | variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf)
- | pident -> handle_keywords lexbuf (fun x -> StringSet.mem x status) "PIDENT"
- | number -> return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf)
- | tex_token -> return lexbuf (expand_macro lexbuf)
- | nreference -> return lexbuf ("NREF", Ulexing.utf8_lexeme lexbuf)
- | uri -> return lexbuf ("URI", Ulexing.utf8_lexeme lexbuf)
- | qstring ->
- return lexbuf ("QSTRING", remove_quotes (Ulexing.utf8_lexeme lexbuf))
- | csymbol ->
- return lexbuf ("CSYMBOL", remove_left_quote (Ulexing.utf8_lexeme lexbuf))
- | "${" -> read_unparsed_group "UNPARSED_META" lexbuf
- | "@{" -> read_unparsed_group "UNPARSED_AST" lexbuf
- | '(' -> return lexbuf ("LPAREN", "")
- | ')' -> return lexbuf ("RPAREN", "")
- | meta_ident ->
- return lexbuf ("UNPARSED_META",
- remove_left_quote (Ulexing.utf8_lexeme lexbuf))
- | meta_anonymous -> return lexbuf ("UNPARSED_META", "anonymous")
- | beginnote ->
- let _comment = comment_token (Ulexing.utf8_lexeme lexbuf) 0 lexbuf in
-(* let comment =
- Ulexing.utf8_sub_lexeme lexbuf 2 (Ulexing.lexeme_length lexbuf - 4)
- in
- return lexbuf ("NOTE", comment) *)
- ligatures_token (level2_ast_token status) lexbuf
- | begincomment -> return lexbuf ("BEGINCOMMENT","")
- | endcomment -> return lexbuf ("ENDCOMMENT","")
- | eof -> return_eoi lexbuf
- | _ -> return_symbol lexbuf (Ulexing.utf8_lexeme lexbuf)
+module LocalizeOD =
+ struct
+ type t = Stdpp.location
+ let compare = Pervasives.compare
+ end
+
+module LocalizeEnv = Map.Make (LocalizeOD)
+
+let so_pp = function
+ | None -> "()"
+ | Some s -> "*" ^ s
+;;
+
+let update_table loc desc href loctable =
+ if desc <> None || href <> None
+ then
+ (let s,e = HExtlib.loc_of_floc loc in
+ prerr_endline (Printf.sprintf "*** [%d,%d] \"%s\",\"%s\""
+ s e (so_pp href) (so_pp desc));
+ LocalizeEnv.add loc (href,desc) loctable)
+ else loctable
+;;
-and level1_pattern_token =
+let level2_ast_token loctable status =
+ let rec aux desc href =
+ lexer
+ | let_rec -> return lexbuf ("LETREC","")
+ | let_corec -> return lexbuf ("LETCOREC","")
+ | we_proved -> return lexbuf ("WEPROVED","")
+ | we_have -> return lexbuf ("WEHAVE","")
+ | utf8_blank+ -> ligatures_token (aux desc href) lexbuf
+ | meta ->
+ let s = Ulexing.utf8_lexeme lexbuf in
+ return lexbuf ("META", String.sub s 1 (String.length s - 1))
+ | implicit -> return lexbuf ("IMPLICIT", "")
+ | placeholder -> return lexbuf ("PLACEHOLDER", "")
+ | hreftag -> aux_in_tag None None lexbuf
+ | hrefclose -> aux None None lexbuf
+ | generictag
+ | closetag -> ligatures_token (aux desc href) lexbuf
+ | ident -> loctable :=
+ update_table (loc_of_buf lexbuf) desc href !loctable;
+ handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT"
+ | variable_ident ->
+ return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf)
+ | pident -> loctable :=
+ update_table (loc_of_buf lexbuf) desc href !loctable;
+ handle_keywords lexbuf (fun x -> StringSet.mem x status) "PIDENT"
+ | number -> loctable :=
+ update_table (loc_of_buf lexbuf) desc href !loctable;
+ return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf)
+ | tex_token -> loctable :=
+ update_table (loc_of_buf lexbuf) desc href !loctable;
+ return lexbuf (expand_macro lexbuf)
+ | nreference -> return lexbuf ("NREF", Ulexing.utf8_lexeme lexbuf)
+ | uri -> return lexbuf ("URI", Ulexing.utf8_lexeme lexbuf)
+ | qstring ->
+ return lexbuf ("QSTRING", remove_quotes (Ulexing.utf8_lexeme lexbuf))
+ | csymbol ->
+ return lexbuf ("CSYMBOL", remove_left_quote (Ulexing.utf8_lexeme lexbuf))
+ | "${" -> read_unparsed_group "UNPARSED_META" lexbuf
+ | "@{" -> read_unparsed_group "UNPARSED_AST" lexbuf
+ | '(' -> return lexbuf ("LPAREN", "")
+ | ')' -> return lexbuf ("RPAREN", "")
+ | meta_ident ->
+ return lexbuf ("UNPARSED_META",
+ remove_left_quote (Ulexing.utf8_lexeme lexbuf))
+ | meta_anonymous -> return lexbuf ("UNPARSED_META", "anonymous")
+ | beginnote ->
+ let _comment = comment_token (Ulexing.utf8_lexeme lexbuf) 0 lexbuf in
+ (* let comment =
+ Ulexing.utf8_sub_lexeme lexbuf 2 (Ulexing.lexeme_length lexbuf - 4)
+ in
+ return lexbuf ("NOTE", comment) *)
+ ligatures_token (aux desc href) lexbuf
+ | begincomment -> return lexbuf ("BEGINCOMMENT","")
+ | endcomment -> return lexbuf ("ENDCOMMENT","")
+ | eof -> return_eoi lexbuf
+ | _ -> loctable :=
+ update_table (loc_of_buf lexbuf) desc href !loctable;
+ return_symbol lexbuf (Ulexing.utf8_lexeme lexbuf)
+
+ and aux_in_tag desc href = lexer
+ | utf8_blank+ -> ligatures_token (aux_in_tag desc href) lexbuf
+ | href ->
+ aux_in_tag desc
+ (Some (Ulexing.utf8_sub_lexeme lexbuf 6 (Ulexing.lexeme_length lexbuf - 7)))
+ lexbuf
+ | hreftitle ->
+ aux_in_tag
+ (Some (Ulexing.utf8_sub_lexeme lexbuf 7 (Ulexing.lexeme_length lexbuf - 8)))
+ href lexbuf
+ | ">" -> aux desc href lexbuf
+ | _ -> aux None None lexbuf
+ in aux None None
+
+let rec level1_pattern_token =
lexer
| utf8_blank+ -> ligatures_token level1_pattern_token lexbuf
| number -> return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf)
- | hreftag -> prerr_endline ("****** HREF : " ^ (Ulexing.utf8_lexeme lexbuf));return lexbuf ("HREF", get_uri lexbuf)
- | hrefclose -> return lexbuf ("HREFEND","")
+ | hreftag -> return lexbuf ("ATAG", "")
+ | hrefclose -> return lexbuf ("ATAGEND","")
+ | generictag
+ | closetag -> ligatures_token level1_pattern_token lexbuf
| ident ->handle_keywords lexbuf (fun x -> List.mem x level1_keywords) "IDENT"
| variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf)
| pident->handle_keywords lexbuf (fun x->List.mem x level1_keywords) "PIDENT"
| _ -> return_symbol lexbuf (Ulexing.utf8_lexeme lexbuf)
let level1_pattern_token = ligatures_token level1_pattern_token
-let level2_ast_token status = ligatures_token (level2_ast_token status)
+let level2_ast_token loctable status =
+ ligatures_token (level2_ast_token loctable status)
(* API implementation *)
type lexers = {
level2_meta_lexer : (string * string) Token.glexer
}
-let mk_lexers keywords =
+let mk_lexers loctable keywords =
let initial_keywords =
[ "CProp"; "Prop"; "Type"; "Set"; "let"; "match";
"with"; "in"; "and"; "to"; "as"; "on"; "return"; "done" ]
in
{
level1_pattern_lexer = mk_lexer level1_pattern_token;
- level2_ast_lexer = mk_lexer (level2_ast_token status);
+ level2_ast_lexer = mk_lexer (level2_ast_token loctable status);
level2_meta_lexer = mk_lexer level2_meta_token
}
;;