-cicNotationLexer.cmo: cicNotationLexer.cmi
-cicNotationLexer.cmx: cicNotationLexer.cmi
-cicNotationParser.cmo: cicNotationLexer.cmi cicNotationParser.cmi
-cicNotationParser.cmx: cicNotationLexer.cmx cicNotationParser.cmi
REQUIRES = \
helm-utf8_macros \
ulex
-NOTATIONS =
NULL =
INTERFACE_FILES = \
cicNotationLexer.mli \
$(NULL)
IMPLEMENTATION_FILES = \
$(patsubst %.mli, %.ml, $(INTERFACE_FILES)) \
- $(patsubst %,%_notation.ml,$(NOTATIONS)) \
$(NULL)
all:
-cicNotationLexer.cmo: cicNotationLexer.ml
+cicNotationLexer.cmo: cicNotationLexer.ml cicNotationLexer.cmi
$(OCAMLC_P4) -c $<
-cicNotationLexer.cmx: cicNotationLexer.ml
+cicNotationLexer.cmx: cicNotationLexer.ml cicNotationLexer.cmi
$(OCAMLOPT_P4) -c $<
-cicNotationParser.cmo: cicNotationParser.ml
+cicNotationParser.cmo: REQUIRES = helm-utf8_macros camlp4.gramlib
+cicNotationParser.cmo: cicNotationParser.ml cicNotationParser.cmi cicNotationLexer.cmi
$(OCAMLC_P4) -c $<
-cicNotationParser.cmx: cicNotationParser.ml
- $(OCAMLOPT_P4) -c $<
-
-%_notation.cmo: %_notation.ml
- $(OCAMLC_P4) -c $<
-%_notation.cmx: %_notation.ml
+cicNotationParser.cmx: REQUIRES = helm-utf8_macros camlp4.gramlib
+cicNotationParser.cmx: cicNotationParser.ml cicNotationParser.cmi cicNotationLexer.cmi
$(OCAMLOPT_P4) -c $<
LOCAL_LINKOPTS = -package helm-cic_notation -linkpkg
* http://helm.cs.unibo.it/
*)
-exception Error of int * int * string
+open Printf
-exception Not_an_extended_ident
+exception Error of int * int * string
let regexp number = xml_digit+
let regexp meta = implicit number
let error lexbuf msg =
- raise (Error (Ulexing.lexeme_start lexbuf, Ulexing.lexeme_end lexbuf, msg))
+ let begin_cnum, end_cnum = Ulexing.loc lexbuf in
+ raise (Error (begin_cnum, end_cnum, msg))
let error_at_end lexbuf msg =
- raise (Error (Ulexing.lexeme_end lexbuf, Ulexing.lexeme_end lexbuf, msg))
+ let begin_cnum, end_cnum = Ulexing.loc lexbuf in
+ raise (Error (begin_cnum, end_cnum, msg))
let return lexbuf token =
+ let begin_cnum, end_cnum = Ulexing.loc lexbuf in
+ (* TODO handle line/column numbers *)
let flocation_begin =
- { Lexing.pos_fname = ""; Lexing.pos_lnum = -1; Lexing.pos_bol = -1;
- Lexing.pos_cnum = Ulexing.lexeme_start lexbuf }
- in
- let flocation_end =
- { flocation_begin with Lexing.pos_cnum = Ulexing.lexeme_end lexbuf }
+ { Lexing.pos_fname = "";
+ Lexing.pos_lnum = -1; Lexing.pos_bol = -1;
+ Lexing.pos_cnum = begin_cnum }
in
+ let flocation_end = { flocation_begin with Lexing.pos_cnum = end_cnum } in
(token, (flocation_begin, flocation_end))
let remove_quotes s = String.sub s 1 (String.length s - 2)
-let tok_func token stream =
- let lexbuf = Ulexing.from_utf8_stream stream in
- Token.make_stream_and_flocation
- (fun () ->
- try
- token lexbuf
- with
- | Ulexing.Error -> error_at_end lexbuf "Unexpected character"
- | Ulexing.InvalidCodepoint i -> error_at_end lexbuf "Invalid code point")
-
-let mk_lexer token = {
- Token.tok_func = token;
- Token.tok_using = (fun _ -> ());
- Token.tok_removing = (fun _ -> ());
- Token.tok_match = Token.default_match;
- Token.tok_text = Token.lexer_text;
- Token.tok_comm = None;
-}
+let mk_lexer token =
+ let tok_func stream =
+ let lexbuf = Ulexing.from_utf8_stream stream in
+ Token.make_stream_and_flocation
+ (fun () ->
+ try
+ token lexbuf
+ with
+ | Ulexing.Error -> error_at_end lexbuf "Unexpected character"
+ | Ulexing.InvalidCodepoint p ->
+ error_at_end lexbuf (sprintf "Invalid code point: %d" p))
+ in
+ {
+ Token.tok_func = tok_func;
+ Token.tok_using = (fun _ -> ());
+ Token.tok_removing = (fun _ -> ());
+ Token.tok_match = Token.default_match;
+ Token.tok_text = Token.lexer_text;
+ Token.tok_comm = None;
+ }
let expand_macro lexbuf =
let macro =
| eof -> return lexbuf ("EOI", "")
| _ -> return lexbuf ("SYMBOL", Ulexing.utf8_lexeme lexbuf)
-let level1_tok_func stream = tok_func level1_token stream
-let level1_lexer = mk_lexer level1_tok_func
-
(** {2 Level 2 lexer} *)
let rec level2_token = lexer
| eof -> return lexbuf ("EOI", "")
| _ -> return lexbuf ("SYMBOL", Ulexing.utf8_lexeme lexbuf)
-let level2_tok_func stream = tok_func level2_token stream
-let level2_lexer = mk_lexer level2_tok_func
+(* API implementation *)
+
+let syntax_pattern_lexer = mk_lexer level1_token
+let ast_pattern_lexer = mk_lexer level2_token
* http://helm.cs.unibo.it/
*)
+ (** begin of error offset (counted in unicode codepoint)
+ * end of error offset (counted as above)
+ * error message *)
exception Error of int * int * string
(** lexer for concrete syntax patterns (notation level 1) *)
-val level1_lexer: (string * string) Token.glexer
+val syntax_pattern_lexer: (string * string) Token.glexer
(** lexer for ast patterns (notation level 2) *)
-val level2_lexer: (string * string) Token.glexer
+val ast_pattern_lexer: (string * string) Token.glexer
exception Parse_error of Token.flocation * string
-let grammar = Grammar.gcreate CicNotationLexer.level1_lexer
-
-let level1 = Grammar.Entry.create grammar "level1"
+module Level1Lexer =
+struct
+ type te = string * string
+ let lexer = CicNotationLexer.syntax_pattern_lexer
+end
+module Level1Parser = Grammar.GMake (Level1Lexer)
+
+module Level2Lexer =
+struct
+ type te = string * string
+ let lexer = CicNotationLexer.ast_pattern_lexer
+end
+module Level2Parser = Grammar.GMake (Level2Lexer)
+
+let level1_pattern = Level1Parser.Entry.create "level1_pattern"
+let level2_pattern = Level2Parser.Entry.create "level2_pattern"
let return_term loc term = ()
with Failure _ ->
failwith (sprintf "Lexer failure: string_of_int \"%s\" failed" s)
-EXTEND
- GLOBAL: level1;
-
- level1: [ [ p = pattern -> () ] ];
+(* {{{ Grammar for concrete syntax patterns, notation level 1 *)
+GEXTEND Level1Parser
+ GLOBAL: level1_pattern;
+ level1_pattern: [ [ p = pattern -> () ] ];
pattern: [ [ p = LIST1 simple_pattern -> () ] ];
]
];
- layout_schemata: [
- [ SYMBOL "\\ARRAY"; p = simple_pattern; fsep = OPT field_sep;
- rsep = OPT row_sep ->
- ()
- | SYMBOL "\\FRAC"; p1 = simple_pattern; p2 = simple_pattern -> ()
- | SYMBOL "\\SQRT"; p = simple_pattern -> ()
- | SYMBOL "\\ROOT"; p1 = simple_pattern; SYMBOL "\\OF";
- p2 = simple_pattern ->
- ()
- (* TODO XXX many issues here:
- * - "^^" is lexed as two "^" symbols
- * - "a_b" is lexed as IDENT "a_b" *)
- | p1 = simple_pattern; SYMBOL "^"; p2 = simple_pattern -> ()
- | p1 = simple_pattern; SYMBOL "^"; SYMBOL "^"; p2 = simple_pattern -> ()
- | p1 = simple_pattern; SYMBOL "_"; p2 = simple_pattern -> ()
- | p1 = simple_pattern; SYMBOL "_"; SYMBOL "_"; p2 = simple_pattern -> ()
- ]
- ];
+ simple_pattern:
+ [ "infix" LEFTA
+ (* TODO XXX many issues here:
+ * - "^^" is lexed as two "^" symbols
+ * - "a_b" is lexed as IDENT "a_b" *)
+ [ p1 = SELF; SYMBOL "^"; p2 = SELF -> ()
+ | p1 = SELF; SYMBOL "^"; SYMBOL "^"; p2 = SELF -> ()
+ | p1 = SELF; SYMBOL "_"; p2 = SELF -> ()
+ | p1 = SELF; SYMBOL "_"; SYMBOL "_"; p2 = SELF -> ()
+ ]
+ | "simple" NONA
+ [ SYMBOL "\\LIST0"; p = SELF; sep = OPT sep -> ()
+ | SYMBOL "\\LIST1"; p = SELF; sep = OPT sep -> ()
+ | b = box_token -> ()
+ | id = IDENT -> ()
+ | SYMBOL "\\NUM"; id = IDENT -> ()
+ | SYMBOL "\\IDENT"; id = IDENT -> ()
+ | SYMBOL "\\OPT"; p = SELF -> ()
+ | SYMBOL "\\ARRAY"; p = SELF; fsep = OPT field_sep; rsep = OPT row_sep ->
+ ()
+ | SYMBOL "\\FRAC"; p1 = SELF; p2 = SELF -> ()
+ | SYMBOL "\\SQRT"; p = SELF -> ()
+ | SYMBOL "\\ROOT"; p1 = SELF; SYMBOL "\\OF"; p2 = SELF ->
+ ()
+ | SYMBOL "["; p = pattern; SYMBOL "]" -> ()
+ ]
+ ];
+END
+(* }}} *)
- simple_pattern: [
- [ SYMBOL "\\LIST0"; p = simple_pattern; sep = OPT sep -> ()
- | SYMBOL "\\LIST1"; p = simple_pattern; sep = OPT sep -> ()
- | b = box_token -> ()
- | id = IDENT -> ()
- | SYMBOL "\\NUM"; id = IDENT -> ()
- | SYMBOL "\\IDENT"; id = IDENT -> ()
- | SYMBOL "\\OPT"; p = simple_pattern -> ()
- | l = layout_schemata -> ()
- | SYMBOL "["; p = pattern; SYMBOL "]" -> ()
- ]
- ];
+(* {{{ Grammar for ast patterns, notation level 2 *)
+GEXTEND Level2Parser
+ GLOBAL: level2_pattern;
+ level2_pattern: [ [ p = pattern -> () ] ];
+
+ pattern: [ [ a = ast -> () ] ];
+
+ ast: [ [ SYMBOL "\\FOO" -> () ] ];
END
+(* }}} *)
let exc_located_wrapper f =
try
| Stdpp.Exc_located (floc, exn) ->
raise (Parse_error (floc, (Printexc.to_string exn)))
-let parse_level1_pattern stream =
- exc_located_wrapper (fun () -> (Grammar.Entry.parse level1 stream))
+let parse_syntax_pattern stream =
+ exc_located_wrapper
+ (fun () ->
+ (Level1Parser.Entry.parse level1_pattern (Level1Parser.parsable stream)))
+
+let parse_ast_pattern stream =
+ exc_located_wrapper
+ (fun () ->
+ (Level2Parser.Entry.parse level2_pattern (Level2Parser.parsable stream)))
-(* vim:set encoding=utf8: *)
+(* vim:set encoding=utf8 foldmethod=marker: *)
(** {2 Parsing functions} *)
-val parse_level1_pattern: char Stream.t -> unit
-(*val parse_level2_pattern: char Stream.t -> unit*)
+ (** concrete syntax pattern: notation level 1 *)
+val parse_syntax_pattern: char Stream.t -> unit
-(** {2 Grammar extensions} *)
-
-(*val term: CicAst.term Grammar.Entry.e |+* recursive rule +|*)
-(*val term0: CicAst.term Grammar.Entry.e |+* top level rule +|*)
-
-(*val return_term: CicAst.location -> CicAst.term -> CicAst.term*)
-
- (** raise a parse error *)
-(*val fail: CicAst.location -> string -> 'a*)
+ (** AST pattern: notation level 2 *)
+val parse_ast_pattern: char Stream.t -> unit
* http://helm.cs.unibo.it/
*)
-let ic =
+let _ =
+ let level = ref ~-1 in
+ let ic = ref stdin in
+ let arg_spec = [ "-level", Arg.Set_int level, "set the notation level" ] in
+ let usage = "test_lexer -level { 1 | 2 | 3 } [ file ]" in
+ let open_file fname =
+ if !ic <> stdin then close_in !ic;
+ ic := open_in fname
+ in
+ Arg.parse arg_spec open_file usage;
+ let lexer =
+ match !level with
+ | 1 -> CicNotationLexer.syntax_pattern_lexer
+ | 2 -> CicNotationLexer.ast_pattern_lexer
+ | _ -> Arg.usage arg_spec usage; exit 1
+ in
+ let token_stream = fst (lexer.Token.tok_func (Stream.of_channel !ic)) in
+ Printf.printf "Lexing notation level %d\n" !level; flush stdout;
+ let rec dump () =
+ let (a,b) = Stream.next token_stream in
+ if a = "EOI" then raise Stream.Failure;
+ print_endline (Printf.sprintf "%s '%s'" a b);
+ dump ()
+ in
try
- open_in Sys.argv.(1)
- with Invalid_argument _ -> stdin
-in
-let token_stream =
- fst (CicNotationLexer.level1_lexer.Token.tok_func (Stream.of_channel ic))
-in
-let rec dump () =
- let (a,b) = Stream.next token_stream in
- if a = "EOI" then raise Stream.Failure;
- print_endline (Printf.sprintf "%s '%s'" a b);
- dump ()
-in
-try
- dump ()
-with Stream.Failure -> ()
+ dump ()
+ with Stream.Failure -> ()
+
open Printf
+(* cut and past from CicAst.loc_of_floc *)
+let loc_of_floc = function
+ | { Lexing.pos_cnum = loc_begin }, { Lexing.pos_cnum = loc_end } ->
+ (loc_begin, loc_end)
+
let _ =
- let ic = stdin in
- try
- while true do
- let line = input_line ic in
- let istream = Stream.of_string line in
- try
- let _ = CicNotationParser.parse_level1_pattern istream in
- print_endline "ok"
- with
- | CicNotationParser.Parse_error (floc, msg) ->
- eprintf "parse error: %s\n" msg; flush stderr
-(* let (x, y) = CicAst.loc_of_floc floc in*)
-(* let before = String.sub line 0 x in*)
-(* let error = String.sub line x (y - x) in*)
-(* let after = String.sub line y (String.length line - y) in*)
-(* eprintf "%s\e[01;31m%s\e[00m%s\n" before error after;*)
-(* prerr_endline (sprintf "at character %d-%d: %s" x y msg)*)
- done
- with End_of_file ->
- close_in ic
+ let level = ref ~-1 in
+ let arg_spec = [ "-level", Arg.Set_int level, "set the notation level" ] in
+ let usage = "test_parser -level { 1 | 2 | 3 }" in
+ Arg.parse arg_spec (fun _ -> raise (Arg.Bad usage)) usage;
+ let parse_fun =
+ match !level with
+ | 1 -> CicNotationParser.parse_syntax_pattern
+ | 2 -> CicNotationParser.parse_ast_pattern
+ | _ -> Arg.usage arg_spec usage; exit 1
+ in
+ let ic = stdin in
+ try
+ printf "Parsing notation level %d\n" !level; flush stdout;
+ while true do
+ let line = input_line ic in
+ let istream = Stream.of_string line in
+ try
+ let _ = parse_fun istream in
+ print_endline "ok"
+ with CicNotationParser.Parse_error (floc, msg) ->
+ let (x, y) = loc_of_floc floc in
+ let before = String.sub line 0 x in
+ let error = String.sub line x (y - x) in
+ let after = String.sub line y (String.length line - y) in
+ eprintf "%s\e[01;31m%s\e[00m%s\n" before error after;
+ prerr_endline (sprintf "at character %d-%d: %s" x y msg)
+ done
+ with End_of_file ->
+ close_in ic