let load_notation fname =
let ic = open_in fname in
- let istream = Stream.of_channel ic in
+ let lexbuf = Ulexing.from_utf8_channel ic in
try
while true do
- match GrafiteParser.parse_statement istream with
+ match GrafiteParser.parse_statement lexbuf with
| Executable (_, Command (_, cmd)) -> ignore (process_notation cmd)
| _ -> ()
done
let regexp ident_letter = [ 'a' - 'z' 'A' - 'Z' ]
+let regexp ligature_char = [ "'`~!?@*()[]<>-+=|:;.,/\"" ]
+let regexp ligature = ligature_char ligature_char+
+
let regexp ident_decoration = '\'' | '!' | '?' | '`'
let regexp ident_cont = ident_letter | xml_digit | '_'
let regexp ident = ident_letter ident_cont* ident_decoration*
let add_level2_ast_keyword k = Hashtbl.add level2_ast_keywords k ()
let remove_level2_ast_keyword k = Hashtbl.remove level2_ast_keywords k
+ (* (string, int) Hashtbl.t, with multiple bindings.
+ * int is the unicode codepoint *)
+let ligatures = Hashtbl.create 23
+let _ =
+ List.iter
+ (fun (ligature, symbol) -> Hashtbl.add ligatures ligature symbol)
+ [ ("->", <:unicode<to>>); ("=>", <:unicode<Rightarrow>>);
+ ("<=", <:unicode<leq>>); (">=", <:unicode<geq>>);
+ ("<>", <:unicode<neq>>); (":=", <:unicode<def>>);
+ ]
+
let regexp uri =
("cic:/" | "theory:/") (* schema *)
ident ('/' ident)* (* path *)
let return_lexeme lexbuf name = return lexbuf (name, Ulexing.utf8_lexeme lexbuf)
+let return_symbol lexbuf s = return lexbuf ("SYMBOL", s)
+let return_eoi lexbuf = return lexbuf ("EOI", "")
+
let remove_quotes s = String.sub s 1 (String.length s - 2)
-let mk_lexer token =
+let mk_lexer ?(use_ligatures = false) token =
let tok_func stream =
- let lexbuf = Ulexing.from_utf8_stream stream in
+(* let lexbuf = Ulexing.from_utf8_stream stream in *)
+(** XXX Obj.magic rationale.
+ * The problem.
+ * camlp4 constraints the tok_func field of Token.glexer to have type:
+ * Stream.t char -> (Stream.t 'te * flocation_function)
+ * In order to use ulex we have (in theory) to instantiate a new lexbuf each
+ * time a char Stream.t is passed, destroying the previous lexbuf which may
+ * have consumed a character from the old stream which is lost forever :-(
+ * The "solution".
+ * Instead of passing to camlp4 a char Stream.t we pass a lexbuf, casting it to
+ * char Stream.t with Obj.magic where needed.
+ *)
+ let lexbuf = Obj.magic stream in
Token.make_stream_and_flocation
(fun () ->
try
let remove_quotes s = String.sub s 1 (String.length s - 2)
let remove_left_quote s = String.sub s 1 (String.length s - 1)
-let rec level2_pattern_token_group counter buffer = lexer
+let rec level2_pattern_token_group counter buffer =
+ lexer
| end_group ->
if (counter > 0) then
Buffer.add_string buffer (Ulexing.utf8_lexeme lexbuf) ;
let end_cnum = level2_pattern_token_group 0 buffer lexbuf in
return_with_loc (token_name, Buffer.contents buffer) begin_cnum end_cnum
-let rec level2_meta_token = lexer
+let rec level2_meta_token =
+ lexer
| xml_blank+ -> level2_meta_token lexbuf
| ident ->
let s = Ulexing.utf8_lexeme lexbuf in
| ast_csymbol ->
return lexbuf ("UNPARSED_AST",
remove_left_quote (Ulexing.utf8_lexeme lexbuf))
- | eof -> return lexbuf ("EOI", "")
+ | eof -> return_eoi lexbuf
-let rec level2_ast_token = lexer
- | xml_blank+ -> level2_ast_token lexbuf
+ (** @param k continuation to be invoked when no ligature has been found *)
+let rec ligatures_token k =
+ lexer
+ | ligature ->
+ let lexeme = Ulexing.utf8_lexeme lexbuf in
+ (match Hashtbl.find_all ligatures lexeme with
+ | [] -> (* ligature not found, rollback and try default lexer *)
+ Ulexing.rollback lexbuf;
+ k lexbuf
+ | ligs -> (* ligatures found, use the default one *)
+ let default_lig = List.hd (List.rev ligs) in
+ return_symbol lexbuf default_lig)
+ | eof -> return_eoi lexbuf
+ | _ -> (* not a ligature, rollback and try default lexer *)
+ Ulexing.rollback lexbuf;
+ k lexbuf
+
+and level2_ast_token =
+ lexer
+ | xml_blank+ -> ligatures_token level2_ast_token lexbuf
| meta -> return lexbuf ("META", Ulexing.utf8_lexeme lexbuf)
| implicit -> return lexbuf ("IMPLICIT", "")
| placeholder -> return lexbuf ("PLACEHOLDER", "")
return lexbuf ("NOTE", comment)
| begincomment -> return lexbuf ("BEGINCOMMENT","")
| endcomment -> return lexbuf ("ENDCOMMENT","")
- | eof -> return lexbuf ("EOI", "")
- | _ -> return lexbuf ("SYMBOL", Ulexing.utf8_lexeme lexbuf)
+ | eof -> return_eoi lexbuf
+ | _ -> return_symbol lexbuf (Ulexing.utf8_lexeme lexbuf)
-let rec level1_pattern_token = lexer
- | xml_blank+ -> level1_pattern_token lexbuf
+and level1_pattern_token =
+ lexer
+ | xml_blank+ -> ligatures_token level1_pattern_token lexbuf
| number -> return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf)
| ident ->
let s = Ulexing.utf8_lexeme lexbuf in
return lexbuf ("QKEYWORD", remove_quotes (Ulexing.utf8_lexeme lexbuf))
| '(' -> return lexbuf ("LPAREN", "")
| ')' -> return lexbuf ("RPAREN", "")
- | eof -> return lexbuf ("EOI", "")
- | _ -> return lexbuf ("SYMBOL", Ulexing.utf8_lexeme lexbuf)
+ | eof -> return_eoi lexbuf
+ | _ -> return_symbol lexbuf (Ulexing.utf8_lexeme lexbuf)
+
+let level1_pattern_token = ligatures_token level1_pattern_token
+let level2_ast_token = ligatures_token level2_ast_token
(* API implementation *)
* error message *)
exception Error of int * int * string
+ (** XXX ZACK DEFCON 4 BEGIN: never use the tok_func field of the glexers below
+ * passing values of type char Stream.t, they should be in fact Ulexing.lexbuf
+ * casted with Obj.magic :-/ Read the comment in the .ml for the rationale *)
+
val level1_pattern_lexer: (string * string) Token.glexer
val level2_ast_lexer: (string * string) Token.glexer
val level2_meta_lexer: (string * string) Token.glexer
+ (** XXX ZACK DEFCON 4 END *)
+
val add_level2_ast_keyword: string -> unit (** non idempotent *)
val remove_level2_ast_keyword: string -> unit (** non idempotent *)
+(* val lookup_ligatures: string -> string list *)
+
function
[] -> Gramext.action (fun (loc: Ast.location) -> action vl loc)
| NoBinding :: tl -> Gramext.action (fun _ -> aux vl tl)
- (* LUCA: DEFCON 4 BEGIN *)
+ (* LUCA: DEFCON 3 BEGIN *)
| Binding (name, Env.TermType) :: tl ->
Gramext.action
(fun (v:Ast.term) ->
aux ((name, (Env.ListType t, Env.ListValue v)) :: vl) tl)
| Env _ :: tl ->
Gramext.action (fun (v:CicNotationEnv.t) -> aux (v @ vl) tl)
- (* LUCA: DEFCON 4 END *)
+ (* LUCA: DEFCON 3 END *)
in
aux [] (List.rev bindings)
level2_meta: [
[ magic = l2_magic -> Ast.Magic magic
| var = l2_variable -> Ast.Variable var
- | blob = UNPARSED_AST -> !parse_level2_ast_ref (Stream.of_string blob)
+ | blob = UNPARSED_AST ->
+ !parse_level2_ast_ref (Ulexing.from_utf8_string blob)
]
];
END
List.map (fun n -> Ast.Ident (n, None)) names, Some ty
| name = IDENT -> [Ast.Ident (name, None)], None
| blob = UNPARSED_META ->
- let meta = !parse_level2_meta_ref (Stream.of_string blob) in
+ let meta = !parse_level2_meta_ref (Ulexing.from_utf8_string blob) in
match meta with
| Ast.Variable (Ast.FreshVar _) -> [meta], None
| Ast.Variable (Ast.TermVar "_") -> [Ast.Ident ("_", None)], None
single_arg: [
[ name = IDENT -> Ast.Ident (name, None)
| blob = UNPARSED_META ->
- let meta = !parse_level2_meta_ref (Stream.of_string blob) in
+ let meta = !parse_level2_meta_ref (Ulexing.from_utf8_string blob) in
match meta with
| Ast.Variable (Ast.FreshVar _)
| Ast.Variable (Ast.IdentVar _) -> meta
| LPAREN; p1 = term; SYMBOL ":"; p2 = term; RPAREN ->
return_term loc (Ast.Cast (p1, p2))
| LPAREN; p = term; RPAREN -> p
- | blob = UNPARSED_META -> !parse_level2_meta_ref (Stream.of_string blob)
+ | blob = UNPARSED_META ->
+ !parse_level2_meta_ref (Ulexing.from_utf8_string blob)
]
];
END
| Stdpp.Exc_located (floc, exn) ->
raise (Parse_error (floc, (Printexc.to_string exn)))
-let parse_level1_pattern stream =
- exc_located_wrapper (fun () -> Grammar.Entry.parse level1_pattern stream)
-let parse_level2_ast stream =
- exc_located_wrapper (fun () -> Grammar.Entry.parse level2_ast stream)
-let parse_level2_meta stream =
- exc_located_wrapper (fun () -> Grammar.Entry.parse level2_meta stream)
+let parse_level1_pattern lexbuf =
+ exc_located_wrapper
+ (fun () -> Grammar.Entry.parse level1_pattern (Obj.magic lexbuf))
+
+let parse_level2_ast lexbuf =
+ exc_located_wrapper
+ (fun () -> Grammar.Entry.parse level2_ast (Obj.magic lexbuf))
+
+let parse_level2_meta lexbuf =
+ exc_located_wrapper
+ (fun () -> Grammar.Entry.parse level2_meta (Obj.magic lexbuf))
let _ =
parse_level1_pattern_ref := parse_level1_pattern;
(** {2 Parsing functions} *)
(** concrete syntax pattern: notation level 1 *)
-val parse_level1_pattern: char Stream.t -> CicNotationPt.term
+val parse_level1_pattern: Ulexing.lexbuf -> CicNotationPt.term
(** AST pattern: notation level 2 *)
-val parse_level2_ast: char Stream.t -> CicNotationPt.term
-val parse_level2_meta: char Stream.t -> CicNotationPt.term
+val parse_level2_ast: Ulexing.lexbuf -> CicNotationPt.term
+val parse_level2_meta: Ulexing.lexbuf -> CicNotationPt.term
(** {2 Grammar extension} *)
val level2_ast_grammar: Grammar.g
val term : CicNotationPt.term Grammar.Entry.e
+
val let_defs :
- (CicNotationPt.capture_variable * CicNotationPt.term * int) list Grammar.Entry.e
+ (CicNotationPt.capture_variable * CicNotationPt.term * int) list
+ Grammar.Entry.e
(** {2 Debugging} *)
p2 =
[ blob = UNPARSED_AST ->
add_raw_attribute ~text:(sprintf "@{%s}" blob)
- (CicNotationParser.parse_level2_ast (Stream.of_string blob))
+ (CicNotationParser.parse_level2_ast
+ (Ulexing.from_utf8_string blob))
| blob = UNPARSED_META ->
add_raw_attribute ~text:(sprintf "${%s}" blob)
- (CicNotationParser.parse_level2_meta (Stream.of_string blob))
+ (CicNotationParser.parse_level2_meta
+ (Ulexing.from_utf8_string blob))
] ->
let assoc =
match assoc with
in
let p1 =
add_raw_attribute ~text:s
- (CicNotationParser.parse_level1_pattern (Stream.of_string s))
+ (CicNotationParser.parse_level1_pattern
+ (Ulexing.from_utf8_string s))
in
(dir, p1, assoc, prec, p2)
]
| Stdpp.Exc_located (floc, exn) ->
raise (CicNotationParser.Parse_error (floc, (Printexc.to_string exn)))
-let parse_statement stream =
- exc_located_wrapper (fun () -> (Grammar.Entry.parse statement stream))
+let parse_statement lexbuf =
+ exc_located_wrapper
+ (fun () -> (Grammar.Entry.parse statement (Obj.magic lexbuf)))
-let parse_dependencies stream =
- let tok_stream,_ = CicNotationLexer.level2_ast_lexer.Token.tok_func stream in
+let parse_dependencies lexbuf =
+ let tok_stream,_ =
+ CicNotationLexer.level2_ast_lexer.Token.tok_func (Obj.magic lexbuf)
+ in
let rec parse acc =
(parser
| [< '("URI", u) >] ->
(** @raise End_of_file *)
val parse_statement:
- char Stream.t ->
+ Ulexing.lexbuf ->
(CicNotationPt.term, CicNotationPt.term, GrafiteAst.reduction,
GrafiteAst.obj, string)
GrafiteAst.statement
(** @raise End_of_file *)
-val parse_dependencies: char Stream.t -> GrafiteAst.dependency list
+val parse_dependencies: Ulexing.lexbuf -> GrafiteAst.dependency list
ic := open_in fname
in
Arg.parse [] open_file usage;
- let deps = GrafiteParser.parse_dependencies (Stream.of_channel !ic) in
+ let deps =
+ GrafiteParser.parse_dependencies (Ulexing.from_utf8_channel !ic)
+ in
List.iter (fun dep -> print_endline (GrafiteAstPp.pp_dependency dep)) deps
+
prerr_endline (Printf.sprintf "Unsupported level %s" l);
exit 2
in
- let token_stream = fst (lexer.Token.tok_func (Stream.of_channel !ic)) in
+ let token_stream =
+ fst (lexer.Token.tok_func (Obj.magic (Ulexing.from_utf8_channel !ic)))
+ in
Printf.printf "Lexing notation level %s\n" !level; flush stdout;
let rec dump () =
let (a,b) = Stream.next token_stream in
CicNotation.load_notation (Helm_registry.get "notation.core_file");
print_endline "done.";
flush stdout;
- process_stream (Stream.of_channel stdin)
+ process_stream (Ulexing.from_utf8_channel stdin)