X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;ds=sidebyside;f=helm%2Fsoftware%2Fcomponents%2Fcontent_pres%2FcicNotationLexer.ml;h=4221417a0183dbf19a2e54cf140a802fdaad9ba4;hb=3e1e59644a24ed855a7f21bf9eab76f96577fd17;hp=8848a3ce5045b4648e1d3f8b49fe5db27444e931;hpb=55b82bd235d82ff7f0a40d980effe1efde1f5073;p=helm.git

diff --git a/helm/software/components/content_pres/cicNotationLexer.ml b/helm/software/components/content_pres/cicNotationLexer.ml
index 8848a3ce5..4221417a0 100644
--- a/helm/software/components/content_pres/cicNotationLexer.ml
+++ b/helm/software/components/content_pres/cicNotationLexer.ml
@@ -30,6 +30,12 @@ open Printf
 exception Error of int * int * string
 
 let regexp number = xml_digit+
+let regexp utf8_blank = " " | "\r\n" | "\n" | "\t" | [160] (* this is a nbsp *)
+let regexp floatwithunit = 
+  [ '0' - '9' ] + ["."] [ '0' - '9' ] + ([ 'a' - 'z' ] + | "" )
+let regexp color = "#" [ '0' - '9' 'a' - 'f' 'A' - 'F' ] [ '0' - '9' 'a' - 'f'
+'A' - 'F' ] [ '0' - '9' 'a' - 'f' 'A' - 'F' ] [ '0' - '9' 'a' - 'f' 'A' - 'F' ]
+[ '0' - '9' 'a' - 'f' 'A' - 'F' ] [ '0' - '9' 'a' - 'f' 'A' - 'F' ]
 
   (* ZACK: breaks unicode's binder followed by an ascii letter without blank *)
 (* let regexp ident_letter = xml_letter *)
@@ -49,6 +55,10 @@ let is_ligature_char =
       true
     with Not_found -> false))
 
+let regexp we_proved = "we" utf8_blank+ "proved"
+let regexp we_have = "we" utf8_blank+ "have"
+let regexp let_rec = "let" utf8_blank+ "rec" 
+let regexp let_corec = "let" utf8_blank+  "corec"
 let regexp ident_decoration = '\'' | '?' | '`'
 let regexp ident_cont = ident_letter | xml_digit | '_'
 let regexp ident = ident_letter ident_cont* ident_decoration*
@@ -75,7 +85,7 @@ let regexp meta_ident = "$" ident
 let regexp meta_anonymous = "$_"
 let regexp qstring = '"' [^ '"']* '"'
 
-let regexp begincomment = "(**" xml_blank
+let regexp begincomment = "(**" utf8_blank
 let regexp beginnote = "(*"
 let regexp endcomment = "*)"
 (* let regexp comment_char = [^'*'] | '*'[^')']
@@ -93,7 +103,7 @@ let level1_keywords =
     "break";
     "list0"; "list1"; "sep";
     "opt";
-    "term"; "ident"; "number"
+    "term"; "ident"; "number"; "mstyle"
   ] @ level1_layouts
 
 let level2_meta_keywords =
@@ -108,8 +118,8 @@ let level2_meta_keywords =
 let level2_ast_keywords = Hashtbl.create 23
 let _ =
   List.iter (fun k -> Hashtbl.add level2_ast_keywords k ())
-  [ "CProp"; "Prop"; "Type"; "Set"; "let"; "rec"; "corec"; "match";
-    "with"; "in"; "and"; "to"; "as"; "on"; "return" ]
+  [ "CProp"; "Prop"; "Type"; "Set"; "let"; "match";
+  "with"; "in"; "and"; "to"; "as"; "on"; "return"; "done" ]
 
 let add_level2_ast_keyword k = Hashtbl.add level2_ast_keywords k ()
 let remove_level2_ast_keyword k = Hashtbl.remove level2_ast_keywords k
@@ -123,9 +133,10 @@ let _ =
     [ ("->", <:unicode<to>>);   ("=>", <:unicode<Rightarrow>>);
       ("<=", <:unicode<leq>>);  (">=", <:unicode<geq>>);
       ("<>", <:unicode<neq>>);  (":=", <:unicode<def>>);
+      ("==", <:unicode<equiv>>);
     ]
 
-let regexp uri_step = [ 'a' - 'z' 'A' - 'Z' '0' - '9' '_' '-' ]+
+let regexp uri_step = [ 'a' - 'z' 'A' - 'Z' '0' - '9' '_' '-' ''' ]+
 
 let regexp uri =
   ("cic:/" | "theory:/")              (* schema *)
@@ -142,14 +153,8 @@ let error_at_end lexbuf msg =
   raise (Error (begin_cnum, end_cnum, msg))
 
 let return_with_loc token begin_cnum end_cnum =
-  (* TODO handle line/column numbers *)
-  let flocation_begin =
-    { Lexing.pos_fname = "";
-      Lexing.pos_lnum = -1; Lexing.pos_bol = -1;
-      Lexing.pos_cnum = begin_cnum }
-  in
-  let flocation_end = { flocation_begin with Lexing.pos_cnum = end_cnum } in
-  (token, (flocation_begin, flocation_end))
+  let flocation = HExtlib.floc_of_loc (begin_cnum,end_cnum) in
+   token, flocation
 
 let return lexbuf token =
   let begin_cnum, end_cnum = Ulexing.loc lexbuf in
@@ -167,17 +172,17 @@ let mk_lexer token =
 (*     let lexbuf = Ulexing.from_utf8_stream stream in *)
 (** XXX Obj.magic rationale.
  * The problem.
- *  camlp4 constraints the tok_func field of Token.glexer to have type:
+ *  camlp5 constraints the tok_func field of Token.glexer to have type:
  *    Stream.t char -> (Stream.t 'te * flocation_function)
  *  In order to use ulex we have (in theory) to instantiate a new lexbuf each
  *  time a char Stream.t is passed, destroying the previous lexbuf which may
  *  have consumed a character from the old stream which is lost forever :-(
  * The "solution".
- *  Instead of passing to camlp4 a char Stream.t we pass a lexbuf, casting it to
+ *  Instead of passing to camlp5 a char Stream.t we pass a lexbuf, casting it to
  *  char Stream.t with Obj.magic where needed.
  *)
     let lexbuf = Obj.magic stream in
-    Token.make_stream_and_flocation
+    Token.make_stream_and_location
       (fun () ->
         try
           token lexbuf
@@ -228,7 +233,7 @@ let read_unparsed_group token_name lexbuf =
 
 let rec level2_meta_token =
   lexer
-  | xml_blank+ -> level2_meta_token lexbuf
+  | utf8_blank+ -> level2_meta_token lexbuf
   | ident ->
       let s = Ulexing.utf8_lexeme lexbuf in
 	begin
@@ -278,8 +283,14 @@ let rec ligatures_token k =
 
 and level2_ast_token =
   lexer
-  | xml_blank+ -> ligatures_token level2_ast_token lexbuf
-  | meta -> return lexbuf ("META", Ulexing.utf8_lexeme lexbuf)
+  | let_rec -> return lexbuf ("LETREC","")
+  | let_corec -> return lexbuf ("LETCOREC","")
+  | we_proved -> return lexbuf ("WEPROVED","")
+  | we_have -> return lexbuf ("WEHAVE","")
+  | utf8_blank+ -> ligatures_token level2_ast_token lexbuf
+  | meta ->
+     let s = Ulexing.utf8_lexeme lexbuf in
+      return lexbuf ("META", String.sub s 1 (String.length s - 1))
   | implicit -> return lexbuf ("IMPLICIT", "")
   | placeholder -> return lexbuf ("PLACEHOLDER", "")
   | ident ->
@@ -317,7 +328,7 @@ and level2_ast_token =
 
 and level1_pattern_token =
   lexer
-  | xml_blank+ -> ligatures_token level1_pattern_token lexbuf
+  | utf8_blank+ -> ligatures_token level1_pattern_token lexbuf
   | number -> return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf)
   | ident ->
       let s = Ulexing.utf8_lexeme lexbuf in
@@ -327,6 +338,9 @@ and level1_pattern_token =
 	  else
 	    return lexbuf ("IDENT", s)
 	end
+  | color -> return lexbuf ("COLOR", Ulexing.utf8_lexeme lexbuf)
+  | floatwithunit -> 
+      return lexbuf ("FLOATWITHUNIT", Ulexing.utf8_lexeme lexbuf)
   | tex_token -> return lexbuf (expand_macro lexbuf)
   | qkeyword ->
       return lexbuf ("QKEYWORD", remove_quotes (Ulexing.utf8_lexeme lexbuf))