Keeping track of locations of disambiguated ids and symbols.

[helm.git] / matitaB / components / content_pres / cicNotationLexer.ml
diff --git a/matitaB/components/content_pres/cicNotationLexer.ml b/matitaB/components/content_pres/cicNotationLexer.ml

index eaabf61e7a483d684bf327d912e84c33098507b3..524575d0387d6a48430d597079cfb3cd47154d47 100644 (file)
--- a/matitaB/components/content_pres/cicNotationLexer.ml
+++ b/matitaB/components/content_pres/cicNotationLexer.ml
@@ -62,6 +62,20 @@ let regexp ident = ident_letter ident_cont* ident_decoration*
  let regexp variable_ident = '_' '_' number
  let regexp pident = '_' ident
  
+let regexp uri_step = [ 'a' - 'z' 'A' - 'Z' '0' - '9' '_' '-' ''' ]+
+
+let regexp uri =
+  ("cic:/" | "theory:/")              (* schema *)
+(*   ident ('/' ident)*                  |+ path +| *)
+  uri_step ('/' uri_step)*            (* path *)
+  ('.' ident)+                        (* ext *)
+(*  ("#xpointer(" number ('/' number)+ ")")?  (* xpointer *) *)
+  ("(" number (',' number)* ")")?  (* reference spec *)
+
+let regexp qstring = '"' [^ '"']* '"'
+let regexp hreftag = "<A href=\"" uri "\">" 
+let regexp hrefclose = "</A>"
+
  let regexp tex_token = '\\' ident
  
  let regexp delim_begin = "\\["
@@ -82,7 +96,6 @@ let regexp ast_ident = "@" ident
  let regexp ast_csymbol = "@" csymbol
  let regexp meta_ident = "$" ident
  let regexp meta_anonymous = "$_"
-let regexp qstring = '"' [^ '"']* '"'
  
  let regexp begincomment = "(**" utf8_blank
  let regexp beginnote = "(*"
@@ -125,15 +138,6 @@ let _ =
        (":=", <:unicode<def>>);
      ]
  
-let regexp uri_step = [ 'a' - 'z' 'A' - 'Z' '0' - '9' '_' '-' ''' ]+
-
-let regexp uri =
-  ("cic:/" | "theory:/")              (* schema *)
-(*   ident ('/' ident)*                  |+ path +| *)
-  uri_step ('/' uri_step)*            (* path *)
-  ('.' ident)+                        (* ext *)
-  ("#xpointer(" number ('/' number)+ ")")?  (* xpointer *)
-
  let regexp nreference =
    "cic:/"                             (* schema *)
    uri_step ('/' uri_step)*            (* path *)
@@ -241,9 +245,15 @@ let handle_keywords lexbuf k name =
             return lexbuf (name, s)
  ;;
  
+let get_uri buf =
+  Ulexing.utf8_sub_lexeme buf 9 (Ulexing.lexeme_length buf - 11)
+;;
+
  let rec level2_meta_token =
    lexer
    | utf8_blank+ -> level2_meta_token lexbuf
+  | hreftag -> prerr_endline ("****** HREF : " ^ (Ulexing.utf8_lexeme lexbuf));return lexbuf ("HREF", get_uri lexbuf)
+  | hrefclose -> return lexbuf ("HREFEND","")
    | ident ->
        handle_keywords lexbuf (fun x -> List.mem x level2_meta_keywords) "IDENT"
    | variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf)
@@ -300,6 +310,8 @@ let rec level2_ast_token status =
        return lexbuf ("META", String.sub s 1 (String.length s - 1))
    | implicit -> return lexbuf ("IMPLICIT", "")
    | placeholder -> return lexbuf ("PLACEHOLDER", "")
+  | hreftag -> prerr_endline ("****** HREF : " ^ (Ulexing.utf8_lexeme lexbuf));return lexbuf ("HREF", get_uri lexbuf)
+  | hrefclose -> return lexbuf ("HREFEND","")
    | ident -> handle_keywords lexbuf (fun x -> StringSet.mem x status) "IDENT"
    | variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf)
    | pident -> handle_keywords lexbuf (fun x -> StringSet.mem x status) "PIDENT"
@@ -335,6 +347,8 @@ and level1_pattern_token =
    lexer
    | utf8_blank+ -> ligatures_token level1_pattern_token lexbuf
    | number -> return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf)
+  | hreftag -> prerr_endline ("****** HREF : " ^ (Ulexing.utf8_lexeme lexbuf));return lexbuf ("HREF", get_uri lexbuf)
+  | hrefclose -> return lexbuf ("HREFEND","")
    | ident ->handle_keywords lexbuf (fun x -> List.mem x level1_keywords) "IDENT"
    | variable_ident -> return lexbuf ("IDENT", Ulexing.utf8_lexeme lexbuf)
    | pident->handle_keywords lexbuf (fun x->List.mem x level1_keywords) "PIDENT"