removing (only from the interface) functions related to ligatures that now live in...

[helm.git] / helm / software / components / content_pres / cicNotationLexer.ml
diff --git a/helm/software/components/content_pres/cicNotationLexer.ml b/helm/software/components/content_pres/cicNotationLexer.ml

index 67340d37a255662b1270a26ce757c18d82768de7..bbc901dab7db3e2f139e37e6e65639efe1d2abd4 100644 (file)
--- a/helm/software/components/content_pres/cicNotationLexer.ml
+++ b/helm/software/components/content_pres/cicNotationLexer.ml
@@ -30,6 +30,14 @@ open Printf
  exception Error of int * int * string
  
  let regexp number = xml_digit+
+let regexp utf8_blank = " " | "\r\n" | "\n" | "\t" | [160] (* this is a nbsp *)
+let regexp percentage = 
+  ('-' | "") [ '0' - '9' ] + '%'
+let regexp floatwithunit = 
+  ('-' | "") [ '0' - '9' ] + ["."] [ '0' - '9' ] + ([ 'a' - 'z' ] + | "" )
+let regexp color = "#" [ '0' - '9' 'a' - 'f' 'A' - 'F' ] [ '0' - '9' 'a' - 'f'
+'A' - 'F' ] [ '0' - '9' 'a' - 'f' 'A' - 'F' ] [ '0' - '9' 'a' - 'f' 'A' - 'F' ]
+[ '0' - '9' 'a' - 'f' 'A' - 'F' ] [ '0' - '9' 'a' - 'f' 'A' - 'F' ]
  
    (* ZACK: breaks unicode's binder followed by an ascii letter without blank *)
  (* let regexp ident_letter = xml_letter *)
@@ -40,15 +48,10 @@ let regexp ident_letter = [ 'a' - 'z' 'A' - 'Z' ]
  let regexp ligature_char = [ "'`~!?@*()[]<>-+=|:;.,/\"" ]
  let regexp ligature = ligature_char ligature_char+
  
-let is_ligature_char =
-  (* must be in sync with "regexp ligature_char" above *)
-  let chars = "'`~!?@*()[]<>-+=|:;.,/\"" in
-  (fun char ->
-    (try
-      ignore (String.index chars char);
-      true
-    with Not_found -> false))
-
+let regexp we_proved = "we" utf8_blank+ "proved"
+let regexp we_have = "we" utf8_blank+ "have"
+let regexp let_rec = "let" utf8_blank+ "rec" 
+let regexp let_corec = "let" utf8_blank+  "corec"
  let regexp ident_decoration = '\'' | '?' | '`'
  let regexp ident_cont = ident_letter | xml_digit | '_'
  let regexp ident = ident_letter ident_cont* ident_decoration*
@@ -75,7 +78,7 @@ let regexp meta_ident = "$" ident
  let regexp meta_anonymous = "$_"
  let regexp qstring = '"' [^ '"']* '"'
  
-let regexp begincomment = "(**" xml_blank
+let regexp begincomment = "(**" utf8_blank
  let regexp beginnote = "(*"
  let regexp endcomment = "*)"
  (* let regexp comment_char = [^'*'] | '*'[^')']
@@ -93,11 +96,11 @@ let level1_keywords =
      "break";
      "list0"; "list1"; "sep";
      "opt";
-    "term"; "ident"; "number"
+    "term"; "ident"; "number"; "mstyle" ; "mpadded"
    ] @ level1_layouts
  
  let level2_meta_keywords =
-  [ "if"; "then"; "else";
+  [ "if"; "then"; "elCicNotationParser.se";
      "fold"; "left"; "right"; "rec";
      "fail";
      "default";
@@ -105,27 +108,35 @@ let level2_meta_keywords =
    ]
  
    (* (string, unit) Hashtbl.t, to exploit multiple bindings *)
-let level2_ast_keywords = Hashtbl.create 23
-let _ =
-  List.iter (fun k -> Hashtbl.add level2_ast_keywords k ())
-  [ "CProp"; "Prop"; "Type"; "Set"; "let"; "rec"; "corec"; "match";
-    "with"; "in"; "and"; "to"; "as"; "on"; "return" ]
+let initial_level2_ast_keywords () = Hashtbl.create 23;;
+
+let level2_ast_keywords = ref (initial_level2_ast_keywords ())
+
+let initialize_keywords () =
+  List.iter (fun k -> Hashtbl.add !level2_ast_keywords k ())
+  [ "CProp"; "Prop"; "Type"; "Set"; "let"; "match";
+  "with"; "in"; "and"; "to"; "as"; "on"; "return"; "done" ]
+;;
+
+let _ = initialize_keywords ();;
  
-let add_level2_ast_keyword k = Hashtbl.add level2_ast_keywords k ()
-let remove_level2_ast_keyword k = Hashtbl.remove level2_ast_keywords k
+let add_level2_ast_keyword k = Hashtbl.add !level2_ast_keywords k ()
+let remove_level2_ast_keyword k = Hashtbl.remove !level2_ast_keywords k
  
    (* (string, int) Hashtbl.t, with multiple bindings.
     * int is the unicode codepoint *)
  let ligatures = Hashtbl.create 23
+
  let _ =
    List.iter
      (fun (ligature, symbol) -> Hashtbl.add ligatures ligature symbol)
      [ ("->", <:unicode<to>>);   ("=>", <:unicode<Rightarrow>>);
        ("<=", <:unicode<leq>>);  (">=", <:unicode<geq>>);
        ("<>", <:unicode<neq>>);  (":=", <:unicode<def>>);
+      ("==", <:unicode<equiv>>);
      ]
  
-let regexp uri_step = [ 'a' - 'z' 'A' - 'Z' '0' - '9' '_' '-' ]+
+let regexp uri_step = [ 'a' - 'z' 'A' - 'Z' '0' - '9' '_' '-' ''' ]+
  
  let regexp uri =
    ("cic:/" | "theory:/")              (* schema *)
@@ -142,14 +153,8 @@ let error_at_end lexbuf msg =
    raise (Error (begin_cnum, end_cnum, msg))
  
  let return_with_loc token begin_cnum end_cnum =
-  (* TODO handle line/column numbers *)
-  let flocation_begin =
-    { Lexing.pos_fname = "";
-      Lexing.pos_lnum = -1; Lexing.pos_bol = -1;
-      Lexing.pos_cnum = begin_cnum }
-  in
-  let flocation_end = { flocation_begin with Lexing.pos_cnum = end_cnum } in
-  (token, (flocation_begin, flocation_end))
+  let flocation = HExtlib.floc_of_loc (begin_cnum,end_cnum) in
+   token, flocation
  
  let return lexbuf token =
    let begin_cnum, end_cnum = Ulexing.loc lexbuf in
@@ -167,17 +172,17 @@ let mk_lexer token =
  (*     let lexbuf = Ulexing.from_utf8_stream stream in *)
  (** XXX Obj.magic rationale.
   * The problem.
- *  camlp4 constraints the tok_func field of Token.glexer to have type:
+ *  camlp5 constraints the tok_func field of Token.glexer to have type:
   *    Stream.t char -> (Stream.t 'te * flocation_function)
   *  In order to use ulex we have (in theory) to instantiate a new lexbuf each
   *  time a char Stream.t is passed, destroying the previous lexbuf which may
   *  have consumed a character from the old stream which is lost forever :-(
   * The "solution".
- *  Instead of passing to camlp4 a char Stream.t we pass a lexbuf, casting it to
+ *  Instead of passing to camlp5 a char Stream.t we pass a lexbuf, casting it to
   *  char Stream.t with Obj.magic where needed.
   *)
      let lexbuf = Obj.magic stream in
-    Token.make_stream_and_flocation
+    Token.make_stream_and_location
        (fun () ->
          try
            token lexbuf
@@ -228,7 +233,7 @@ let read_unparsed_group token_name lexbuf =
  
  let rec level2_meta_token =
    lexer
-  | xml_blank+ -> level2_meta_token lexbuf
+  | utf8_blank+ -> level2_meta_token lexbuf
    | ident ->
        let s = Ulexing.utf8_lexeme lexbuf in
         begin
@@ -278,7 +283,11 @@ let rec ligatures_token k =
  
  and level2_ast_token =
    lexer
-  | xml_blank+ -> ligatures_token level2_ast_token lexbuf
+  | let_rec -> return lexbuf ("LETREC","")
+  | let_corec -> return lexbuf ("LETCOREC","")
+  | we_proved -> return lexbuf ("WEPROVED","")
+  | we_have -> return lexbuf ("WEHAVE","")
+  | utf8_blank+ -> ligatures_token level2_ast_token lexbuf
    | meta ->
       let s = Ulexing.utf8_lexeme lexbuf in
        return lexbuf ("META", String.sub s 1 (String.length s - 1))
@@ -286,7 +295,7 @@ and level2_ast_token =
    | placeholder -> return lexbuf ("PLACEHOLDER", "")
    | ident ->
        let lexeme = Ulexing.utf8_lexeme lexbuf in
-      if Hashtbl.mem level2_ast_keywords lexeme then
+      if Hashtbl.mem !level2_ast_keywords lexeme then
          return lexbuf ("", lexeme)
        else
          return lexbuf ("IDENT", lexeme)
@@ -319,7 +328,7 @@ and level2_ast_token =
  
  and level1_pattern_token =
    lexer
-  | xml_blank+ -> ligatures_token level1_pattern_token lexbuf
+  | utf8_blank+ -> ligatures_token level1_pattern_token lexbuf
    | number -> return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf)
    | ident ->
        let s = Ulexing.utf8_lexeme lexbuf in
@@ -329,6 +338,11 @@ and level1_pattern_token =
           else
             return lexbuf ("IDENT", s)
         end
+  | color -> return lexbuf ("COLOR", Ulexing.utf8_lexeme lexbuf)
+  | percentage -> 
+      return lexbuf ("PERCENTAGE", Ulexing.utf8_lexeme lexbuf)
+  | floatwithunit -> 
+      return lexbuf ("FLOATWITHUNIT", Ulexing.utf8_lexeme lexbuf)
    | tex_token -> return lexbuf (expand_macro lexbuf)
    | qkeyword ->
        return lexbuf ("QKEYWORD", remove_quotes (Ulexing.utf8_lexeme lexbuf))
@@ -342,14 +356,40 @@ let level2_ast_token = ligatures_token level2_ast_token
  
  (* API implementation *)
  
-let level1_pattern_lexer = mk_lexer level1_pattern_token
-let level2_ast_lexer = mk_lexer level2_ast_token
-let level2_meta_lexer = mk_lexer level2_meta_token
-
-let lookup_ligatures lexeme =
-  try
-    if lexeme.[0] = '\\'
-    then [ Utf8Macro.expand (String.sub lexeme 1 (String.length lexeme - 1)) ]
-    else List.rev (Hashtbl.find_all ligatures lexeme)
-  with Invalid_argument _ | Utf8Macro.Macro_not_found _ -> []
+let initial_level1_pattern_lexer () = mk_lexer level1_pattern_token
+let initial_level2_ast_lexer () = mk_lexer level2_ast_token
+let initial_level2_meta_lexer () = mk_lexer level2_meta_token
+
+
+let level1_pattern_lexer_ref = ref (initial_level1_pattern_lexer ())
+let level2_ast_lexer_ref = ref (initial_level2_ast_lexer ())
+let level2_meta_lexer_ref = ref (initial_level2_meta_lexer ())
+
+let level1_pattern_lexer () = !level1_pattern_lexer_ref
+let level2_ast_lexer () = !level2_ast_lexer_ref
+let level2_meta_lexer () = !level2_meta_lexer_ref 
+
+let history = ref [];;
+
+let push () =
+  history :=
+    (!level2_ast_keywords,!level1_pattern_lexer_ref,
+     !level2_ast_lexer_ref,!level2_meta_lexer_ref) :: !history;
+  level2_ast_keywords := initial_level2_ast_keywords ();
+  initialize_keywords ();
+  level1_pattern_lexer_ref := initial_level1_pattern_lexer ();
+  level2_ast_lexer_ref := initial_level2_ast_lexer ();
+  level2_meta_lexer_ref := initial_level2_meta_lexer ();
+;;
+
+let pop () =
+  match !history with
+  | [] -> assert false
+  | (kwd,pl,al,ml) :: tl -> 
+      level2_ast_keywords := kwd;
+      level1_pattern_lexer_ref := pl;
+      level2_ast_lexer_ref := al;
+      level2_meta_lexer_ref := ml;
+      history := tl
+;;