--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Parser
+open Ast
+
+(* Overall scheme:
+ *
+ * The rules are translated to:
+ *
+ * let rec parse_<rule1> ... = ...
+ * and parse_<rule2> ... = ...
+ * and ...
+ * and parse_<ruleN> ... = ...
+ * in
+ *
+ * Every rule has at least two arguments: 'current' and 'get_next'.
+ * 'current()' is the token that should match the first symbol of the
+ * rule. 'get_next()' returns the next token.
+ *
+ * The rules may have further user arguments; these are the next arguments
+ * in turn.
+ *
+ * The rules return the user value. After they have returned to the caller
+ * the current token is the token that follows the sequence of tokens
+ * matching the rule.
+ *
+ * The rules will raise:
+ * - Not_found if the first token does not match
+ * - Parsing.Parse_error if the rest does not match.
+ *
+ * Rule scheme:
+ *
+ * rule(arg1,arg2,...):
+ * (l1:x1)
+ * {{ let-CODE }}
+ * (l2:y2(name1,...)) y3 ...
+ * {{ CODE }}
+ * ? {{ ?-CODE }}
+ * | x2 ...
+ * | ...
+ * | xN
+ *
+ * let parse_<rule> current get_next arg1 arg2 ... =
+ * match current() with
+ * S(x1) -> ...
+ * | S(x2) -> ...
+ * | ...
+ * | S(xN) -> ...
+* | _ -> raise Not_found
+ *
+ * Here, S(xi) denotes the set of tokens matched by xi without all tokens
+ * already matched by x1 to x(i-1). (If S(xi) = empty, a warning is printed,
+ * and this branch of the rule is omitted.)
+ *
+ * S(xi) may be a set because xi may be a reference to another rule. In this
+ * case, S(xi) bases on the set of tokens that match the first symbol of
+ * the other rule. (In general, S(xi) must be computed recursively.)
+ *
+ * If the "?" clause is present, every branch is embraced by the following:
+ *
+ * let position = ref "<Label of x1>" in
+ * ( try ...
+ * with Parsing.Parse_error -> ( <<?-CODE>> )
+ * )
+ *
+ * Next: The "..." is
+ *
+ * OPTIONAL: let <l1> = parse_<rule(x1)> in
+ * <<let-CODE>>
+ * M(y1)
+ * M(y2)
+ * ...
+ * M(yN)
+ * <<CODE>>
+ *
+ * If x1 is a rule invocation, it is now parsed, and the result is bound
+ * to a variable.
+ *
+ * Note: After x1 has matched, the Caml variable <l1> must be either
+ * bound to the result of the sub parsing, or to the value associated
+ * with the token (if any). The latter is already done in the main
+ * "match" statement, i.e. "match ... with S(x1) -> ..." is actually
+ * "match ... with Token1 <l1> -> ...".
+ *
+ * Note: After calling parse_<rule(x1)> the exception Not_found is NEVER
+ * converted to Parsing.Parse_error. It is simply not possible that this
+ * happens.
+
+ * For every remaining symbol yi of the rule, a matching statement M(yi)
+ * is produced. These statements have the form:
+ *
+ * OPTIONAL: position := "<Label of yi>";
+ * CASE: yi is a token without associated value
+ * let yy_i = get_next() OR current() in
+ * if yy_i <> Token(yi) then raise Parsing.Parse_error;
+ * CASE: yi is a token with value
+ * let yy_i = get_next() OR current() in
+ * let <li> = match yy_i with Token x -> x | _ -> raise Parsing.Parse_error
+ * in
+ * CASE: yi is a rule invocation
+ * OPTIONAL: let _ = get_next() in
+ * let <li> = try parse_<rule(yi)>
+ * with Not_found -> raise Parsing.Parse_error in
+ *
+ * yy_i is get_next() if y(i-1) was a token, and yy_i is current() if
+ * y(i-1) was a rule invocation.
+ *
+ * Repetitions:
+ *
+ * If yi = (yi')*:
+ *
+ * CASE no label given:
+ *
+ * ( try
+ * while true do
+ * M(yi') with the modification that top-level mismatches raise
+ * Not_found instead of Parsing.Parse_error
+ * done
+ * with Not_found -> ()
+ * )
+ *
+ * CASE a label <li> is given: The list of results must be bound to <li>!
+ *
+ * let yy_list = ref [] in
+ * ( try
+ * while true do
+ * let yy_first = M(yi') (with some modifications) in
+ * yy_list := yy_first :: !yy_list;
+ * done
+ * with Not_found -> ()
+ * );
+ * let <li> = List.rev !yy_list in
+ *
+ * Note that this scheme minimizes stack and heap allocations.
+ *
+ * Options:
+ *
+ * If yi = (yi')?:
+ *
+ * CASE no label given:
+ *
+ * ( try
+ * M(yi') with the modification that top-level mismatches raise
+ * Not_found instead of Parsing.Parse_error
+ * with Not_found -> ()
+ * )
+ *
+ * CASE a label <li> is given: The optional result must be bound to <li>!
+ *
+ * let <li> =
+ * try
+ * Some( M(yi') (with some modifications) )
+ * with Not_found -> None
+ * );
+ *)
+
+
+let lookup_rule tree name =
+ try
+ List.find (fun r -> r.rule_name = name) tree.text_rules
+ with
+ Not_found ->
+ failwith ("Rule `" ^ name ^ "' not found")
+;;
+
+
+let is_typed tree name =
+ (* Find out whether the token 'name' is typed or not *)
+ let decl =
+ try
+ List.find (fun d -> match d with
+ D_token n -> n = name
+ | D_typed_token n -> n = name
+ )
+ tree.text_decls
+ with
+ Not_found ->
+ failwith ("Token `" ^ name ^ "' not found")
+ in
+ match decl with
+ D_token _ -> false
+ | D_typed_token _ -> true
+;;
+
+
+let label_of_symbol tree sym =
+ match sym with
+ U_symbol (tok, lab) ->
+ (* if is_typed tree tok then lab else None *)
+ lab
+ | L_symbol (_, _, lab) -> lab
+ | L_indirect (_, _, lab) -> lab
+;;
+
+
+let is_untyped_U_symbol tree sym =
+ match sym with
+ U_symbol (tok, _) ->
+ not(is_typed tree tok)
+ | L_symbol (_, _, _) -> false
+ | L_indirect (_, _, _) -> false
+;;
+
+
+
+let rec set_of_list l =
+ (* Removes duplicate members of l *)
+ match l with
+ [] -> []
+ | x :: l' -> if List.mem x l' then set_of_list l' else x :: (set_of_list l')
+;;
+
+
+let selector_set_of_rule tree name =
+ (* Determines the set of tokens that match the first symbol of a rule *)
+
+ let rec collect visited_rules name =
+ if List.mem name visited_rules then
+ []
+ else
+ let r = lookup_rule tree name in
+ List.flatten
+ (List.map
+ (fun branch ->
+ match branch.branch_selector with
+ U_symbol (tok_name,_) ->
+ [ tok_name ]
+ | L_symbol (rule_name, _, _) ->
+ collect (name :: visited_rules) rule_name
+ | L_indirect (_, _, _) ->
+ failwith("The first symbol in rule `" ^ name ^
+ "' is an indirect call; this is not allowed")
+ )
+ r.rule_branches
+ )
+ in
+ set_of_list (collect [] name)
+;;
+
+
+let output_code_location b file_name (_, line, column) =
+ Buffer.add_string b "\n";
+ Buffer.add_string b ("# " ^ string_of_int line ^ " \"" ^
+ file_name ^ "\"\n");
+ Buffer.add_string b (String.make column ' ')
+;;
+
+
+let phantasy_line = ref 100000;;
+
+let output_code b file_name ((code, line, column) as triple) =
+ if code <> "" then begin
+ output_code_location b file_name triple;
+ Buffer.add_string b code;
+ Buffer.add_string b ("\n# " ^ string_of_int !phantasy_line ^ " \"<Generated Code>\"\n");
+ phantasy_line := !phantasy_line + 10000;
+ end
+;;
+
+
+let process_branch b file_name tree branch =
+
+ let make_rule_invocation called_rule args lab allow_not_found =
+ (* Produces: let <label> = parse_<called_rule> ... args in
+ * If not allow_not_found, the exception Not_found is caught and
+ * changed into Parsing.Parse_error.
+ *)
+ let r = lookup_rule tree called_rule in
+ if List.length r.rule_arguments <> List.length args then
+ failwith("Calling rule `" ^ called_rule ^ "' with the wrong number of arguments!");
+
+ Buffer.add_string b "let ";
+ begin match lab with
+ None -> Buffer.add_string b "_"
+ | Some l -> Buffer.add_string b l
+ end;
+ Buffer.add_string b " = ";
+ if not allow_not_found then
+ Buffer.add_string b "try ";
+ Buffer.add_string b "parse_";
+ Buffer.add_string b called_rule;
+ Buffer.add_string b " yy_current yy_get_next";
+ List.iter
+ (fun a -> Buffer.add_string b " ";
+ Buffer.add_string b a;
+ )
+ args;
+ if not allow_not_found then
+ Buffer.add_string b " with Not_found -> raise Parsing.Parse_error";
+ Buffer.add_string b " in\n"
+ in
+
+ let make_indirect_rule_invocation ml_name args lab allow_not_found =
+ (* Produces: let <label> = ml_name ... args in
+ * If not allow_not_found, the exception Not_found is caught and
+ * changed into Parsing.Parse_error.
+ *)
+ Buffer.add_string b "let ";
+ begin match lab with
+ None -> Buffer.add_string b "_"
+ | Some l -> Buffer.add_string b l
+ end;
+ Buffer.add_string b " = ";
+ if not allow_not_found then
+ Buffer.add_string b "try ";
+ Buffer.add_string b ml_name;
+ Buffer.add_string b " yy_current yy_get_next";
+ List.iter
+ (fun a -> Buffer.add_string b " ";
+ Buffer.add_string b a;
+ )
+ args;
+ if not allow_not_found then
+ Buffer.add_string b " with Not_found -> raise Parsing.Parse_error";
+ Buffer.add_string b " in\n"
+ in
+
+ let process_symbol sym previous_was_token allow_not_found =
+ match sym with
+ U_symbol(tok, lab) ->
+ (* Distinguish between simple tokens and typed tokens *)
+ if is_typed tree tok then begin
+ (* Typed token *)
+ Buffer.add_string b "let ";
+ begin match lab with
+ None -> Buffer.add_string b "_"
+ | Some l -> Buffer.add_string b l
+ end;
+ Buffer.add_string b " = match ";
+ if previous_was_token then
+ Buffer.add_string b "yy_get_next()"
+ else
+ Buffer.add_string b "yy_current()";
+ Buffer.add_string b " with ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " x -> x | _ -> raise ";
+ if allow_not_found then
+ Buffer.add_string b "Not_found"
+ else
+ Buffer.add_string b "Parsing.Parse_error";
+ Buffer.add_string b " in\n";
+ end
+ else begin
+ (* Simple token *)
+ Buffer.add_string b "if (";
+ if previous_was_token then
+ Buffer.add_string b "yy_get_next()"
+ else
+ Buffer.add_string b "yy_current()";
+ Buffer.add_string b ") <> ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " then raise ";
+ if allow_not_found then
+ Buffer.add_string b "Not_found;\n"
+ else
+ Buffer.add_string b "Parsing.Parse_error;\n"
+ end
+ | L_symbol(called_rule, args, lab) ->
+ if previous_was_token then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+ make_rule_invocation called_rule args lab allow_not_found
+ | L_indirect(ml_name, args, lab) ->
+ if previous_was_token then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+ make_indirect_rule_invocation ml_name args lab allow_not_found
+ in
+
+ let process_pattern (current_position, previous_was_token) pat =
+ (* Assign "position" if necessary. *)
+ let new_position =
+ if branch.branch_error_code <> None then begin
+ match pat.pat_symbol with
+ U_symbol(_,Some l) -> l
+ | L_symbol(_,_,Some l) -> l
+ | L_indirect(_,_,Some l) -> l
+ | _ -> ""
+ end
+ else ""
+ in
+ if new_position <> current_position then begin
+ Buffer.add_string b "yy_position := \"";
+ Buffer.add_string b new_position;
+ Buffer.add_string b "\";\n";
+ end;
+
+ let this_is_token =
+ match pat.pat_symbol with
+ U_symbol(_,_) -> pat.pat_modifier = Exact
+ | L_symbol(_,_,_) -> false
+ | L_indirect(_,_,_) -> false
+ in
+
+ (* First distinguish between Exact, Option, and Repetition: *)
+ begin match pat.pat_modifier with
+ Exact ->
+ process_symbol pat.pat_symbol previous_was_token false
+ | Option ->
+ begin match label_of_symbol tree pat.pat_symbol with
+ None ->
+ (* CASE: optional symbol without label *)
+ (* OPTIMIZATION: If the symbol is
+ * a token, the loop becomes very simple.
+ *)
+ if (match pat.pat_symbol with
+ U_symbol(t,_) -> not (is_typed tree t) | _ -> false)
+ then begin
+ let tok = match pat.pat_symbol with
+ U_symbol(t,_) -> t | _ -> assert false in
+ (* Optimized case *)
+ Buffer.add_string b "if ";
+ if previous_was_token then
+ Buffer.add_string b "yy_get_next()"
+ else
+ Buffer.add_string b "yy_current()";
+ Buffer.add_string b " = ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " then ignore(yy_get_next());\n";
+ end
+ else begin
+ (* General, non-optimized case: *)
+ Buffer.add_string b "( try (";
+ process_symbol pat.pat_symbol previous_was_token true;
+ Buffer.add_string b "ignore(yy_get_next());\n";
+ Buffer.add_string b ") with Not_found -> ());\n";
+ end
+ | Some l ->
+ (* CASE: optional symbol with label *)
+ if is_untyped_U_symbol tree pat.pat_symbol then begin
+ (* SUBCASE: The label becomes a boolean variable *)
+ Buffer.add_string b "let ";
+ Buffer.add_string b l;
+ Buffer.add_string b " = try (";
+ process_symbol pat.pat_symbol previous_was_token true;
+ Buffer.add_string b ");\n";
+ Buffer.add_string b "ignore(yy_get_next());\n";
+ Buffer.add_string b "true with Not_found -> false in\n";
+ end
+ else begin
+ (* SUBCASE: the symbol has a value *)
+ Buffer.add_string b "let ";
+ Buffer.add_string b l;
+ Buffer.add_string b " = try let yy_tok = Some(";
+ process_symbol pat.pat_symbol previous_was_token true;
+ Buffer.add_string b l;
+ Buffer.add_string b ") in\n";
+
+ if (match pat.pat_symbol with
+ U_symbol(_,_) -> true | _ -> false) then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+
+ Buffer.add_string b "yy_tok with Not_found -> None in\n";
+ end
+ end
+ | Repetition ->
+ begin match label_of_symbol tree pat.pat_symbol with
+ None ->
+ (* CASE: repeated symbol without label *)
+ (* OPTIMIZATION: If the symbol is
+ * a token, the loop becomes very simple.
+ *)
+ if (match pat.pat_symbol with
+ U_symbol(t,_) -> not (is_typed tree t) | _ -> false)
+ then begin
+ let tok = match pat.pat_symbol with
+ U_symbol(t,_) -> t | _ -> assert false in
+ if previous_was_token then begin
+ (* Optimized case I *)
+ Buffer.add_string b "while yy_get_next() = ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " do () done;\n";
+ end
+ else begin
+ (* Optimized case II *)
+ Buffer.add_string b "if yy_current() = ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " then (";
+ Buffer.add_string b "while yy_get_next() = ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " do () done);\n";
+ end
+ end
+ else begin
+ (* General, non-optimized case: *)
+ if previous_was_token then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+ Buffer.add_string b "( try while true do (";
+ process_symbol pat.pat_symbol false true;
+
+ if (match pat.pat_symbol with
+ U_symbol(_,_) -> true | _ -> false) then
+ Buffer.add_string b "ignore(yy_get_next());\n"
+ else
+ Buffer.add_string b "();\n";
+
+ Buffer.add_string b ") done with Not_found -> ());\n";
+ end
+ | Some l ->
+ (* CASE: repeated symbol with label *)
+ if is_untyped_U_symbol tree pat.pat_symbol then begin
+ (* SUBCASE: The label becomes an integer variable *)
+ if previous_was_token then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+ Buffer.add_string b "let yy_counter = ref 0 in\n";
+ Buffer.add_string b "( try while true do \n";
+ process_symbol pat.pat_symbol false true;
+ Buffer.add_string b "incr yy_counter;\n";
+
+ if (match pat.pat_symbol with
+ U_symbol(_,_) -> true | _ -> false) then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+
+ Buffer.add_string b "done with Not_found -> ());\n";
+ Buffer.add_string b "let ";
+ Buffer.add_string b l;
+ Buffer.add_string b " = !yy_counter in\n";
+ end
+ else begin
+ (* SUBCASE: the symbol has a value *)
+ if previous_was_token then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+ Buffer.add_string b "let yy_list = ref [] in\n";
+ Buffer.add_string b "( try while true do \n";
+ process_symbol pat.pat_symbol false true;
+ Buffer.add_string b "yy_list := ";
+ Buffer.add_string b l;
+ Buffer.add_string b " :: !yy_list;\n";
+
+ if (match pat.pat_symbol with
+ U_symbol(_,_) -> true | _ -> false) then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+
+ Buffer.add_string b "done with Not_found -> ());\n";
+ Buffer.add_string b "let ";
+ Buffer.add_string b l;
+ Buffer.add_string b " = List.rev !yy_list in\n";
+ end
+ end
+ end;
+
+ (* Continue: *)
+ (new_position, this_is_token)
+ in
+
+
+ let process_inner_branch current_position =
+ (* If there is "early code", run this now: *)
+ output_code b file_name branch.branch_early_code;
+ Buffer.add_string b "\n";
+
+ (* If the first symbol is a rule invocation, call the corresponding
+ * parser function now.
+ *)
+ let previous_was_token =
+ begin match branch.branch_selector with
+ U_symbol(_,_) ->
+ true
+ | L_symbol(called_rule, args, lab) ->
+ make_rule_invocation called_rule args lab true;
+ false
+ | L_indirect(_,_,_) ->
+ failwith("The first symbol in some rule is an indirect call; this is not allowed")
+ end
+ in
+
+ (* Now output the "let-CODE". *)
+ output_code b file_name branch.branch_binding_code;
+ Buffer.add_string b "\n";
+
+ (* Process the other symbols in turn: *)
+ let (_, previous_was_token') =
+ (List.fold_left
+ process_pattern
+ (current_position, previous_was_token)
+ branch.branch_pattern
+ )
+ in
+
+ (* Special case:
+ *
+ * If previous_was_token', we must invoke yy_get_next one more time.
+ * This is deferred until "CODE" is executed to give this code
+ * the chance to make the next token available (in XML, the next token
+ * might come from a different entity, and "CODE" must switch to this
+ * entity).
+ *)
+
+ (* Now output "CODE": *)
+ Buffer.add_string b "let result = \n";
+ output_code b file_name branch.branch_result_code;
+ Buffer.add_string b "\nin\n";
+
+ if previous_was_token' then
+ Buffer.add_string b "ignore(yy_get_next());\nresult\n"
+ else
+ Buffer.add_string b "result\n"
+ in
+
+ (* If we have a ? clause, generate now the "try" statement *)
+ match branch.branch_error_code with
+ None ->
+ Buffer.add_string b "( ";
+ process_inner_branch "";
+ Buffer.add_string b " )";
+ | Some code ->
+
+ (* let position = ref "<label>" in *)
+
+ Buffer.add_string b "let yy_position = ref \"";
+ let current_position =
+ match branch.branch_selector with
+ U_symbol(_,_) -> ""
+ | L_symbol(_,_,None) -> ""
+ | L_symbol(_,_,Some l) -> l
+ | L_indirect(_,_,None) -> ""
+ | L_indirect(_,_,Some l) -> l
+ in
+ Buffer.add_string b current_position;
+ Buffer.add_string b "\" in\n";
+
+ (* The "try" statement: *)
+
+ Buffer.add_string b "( try (\n";
+
+ process_inner_branch current_position;
+
+ Buffer.add_string b "\n) with Parsing.Parse_error -> (\n";
+ output_code b file_name code;
+ Buffer.add_string b "\n))\n"
+;;
+
+
+let process b file_name tree =
+ (* Iterate over the rules and output the parser functions: *)
+ let is_first = ref true in
+ List.iter
+ (fun r ->
+
+ (* Generate the function header: *)
+
+ if !is_first then
+ Buffer.add_string b "let rec "
+ else
+ Buffer.add_string b "and ";
+ is_first := false;
+ Buffer.add_string b "parse_";
+ Buffer.add_string b r.rule_name;
+ Buffer.add_string b " yy_current yy_get_next";
+ List.iter
+ (fun arg -> Buffer.add_string b " ";
+ Buffer.add_string b arg)
+ r.rule_arguments;
+ Buffer.add_string b " =\n";
+
+ (* Generate the "match" statement: *)
+
+ Buffer.add_string b "match yy_current() with\n";
+ let s_done = ref [] in
+ (* s_done: The set of already matched tokens *)
+
+ List.iter
+ (fun branch ->
+ match branch.branch_selector with
+ U_symbol(tok, lab) ->
+ (* A simple token *)
+ if List.mem tok !s_done then begin
+ prerr_endline("WARNING: In rule `" ^ r.rule_name ^
+ "': Match for token `" ^
+ tok ^ "' hidden by previous match");
+ end
+ else
+ if is_typed tree tok then begin
+ match lab with
+ None ->
+ Buffer.add_string b "| ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " _ -> ";
+ process_branch b file_name tree branch;
+ Buffer.add_string b "\n";
+ s_done := tok :: !s_done;
+ | Some l ->
+ Buffer.add_string b "| ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " ";
+ Buffer.add_string b l;
+ Buffer.add_string b " -> ";
+ process_branch b file_name tree branch;
+ Buffer.add_string b "\n";
+ s_done := tok :: !s_done;
+ end
+ else begin
+ Buffer.add_string b "| ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " -> ";
+ process_branch b file_name tree branch;
+ Buffer.add_string b "\n";
+ s_done := tok :: !s_done;
+ end
+ | L_symbol(called_rule, args, lab) ->
+ (* An invocation of a rule *)
+ let s_rule = selector_set_of_rule tree called_rule in
+ let s_rule' =
+ List.filter
+ (fun tok ->
+ if List.mem tok !s_done then begin
+ prerr_endline("WARNING: In rule `" ^ r.rule_name ^
+ "': Match for token `" ^
+ tok ^ "' hidden by previous match");
+ false
+ end
+ else true)
+ s_rule in
+ if s_rule' <> [] then begin
+ Buffer.add_string b "| ( ";
+ let is_first = ref true in
+ List.iter
+ (fun tok ->
+ if not !is_first then
+ Buffer.add_string b " | ";
+ is_first := false;
+ Buffer.add_string b tok;
+ if is_typed tree tok then
+ Buffer.add_string b " _";
+ )
+ s_rule';
+ Buffer.add_string b ") -> ";
+ process_branch b file_name tree branch;
+ Buffer.add_string b "\n";
+ s_done := s_rule' @ !s_done;
+ end
+ | L_indirect(ml_name, args, lab) ->
+ (* An invocation of an indirect rule *)
+ failwith("The first symbol in rule `" ^ r.rule_name ^
+ "' is an indirect call; this is not allowed")
+ )
+ r.rule_branches;
+
+ Buffer.add_string b "\n| _ -> raise Not_found\n";
+ )
+ tree.text_rules;
+
+ Buffer.add_string b " in\n"
+;;
+
+
+let count_lines s =
+ (* returns number of lines in s, number of columns of the last line *)
+ let l = String.length s in
+
+ let rec count n k no_cr no_lf =
+ let next_cr =
+ if no_cr then
+ (-1)
+ else
+ try String.index_from s k '\013' with Not_found -> (-1) in
+ let next_lf =
+ if no_lf then
+ (-1)
+ else
+ try String.index_from s k '\010' with Not_found -> (-1) in
+ if next_cr >= 0 & (next_lf < 0 or next_cr < next_lf) then begin
+ if next_cr+1 < l & s.[next_cr+1] = '\010' then
+ count (n+1) (next_cr+2) false (next_lf < 0)
+ else
+ count (n+1) (next_cr+1) false (next_lf < 0)
+ end
+ else if next_lf >= 0 then begin
+ count (n+1) (next_lf+1) (next_cr < 0) false
+ end
+ else
+ n, (l - k)
+
+ in
+ count 0 0 false false
+;;
+
+
+type scan_context =
+ { mutable old_line : int;
+ mutable old_column : int;
+ mutable line : int;
+ mutable column : int;
+ }
+;;
+
+
+let rec next_token context lexbuf =
+ let t = Lexer.scan_file lexbuf in
+ let line = context.line in
+ let column = context.column in
+ context.old_line <- line;
+ context.old_column <- column;
+ let n_lines, n_columns = count_lines (Lexing.lexeme lexbuf) in
+ if n_lines > 0 then begin
+ context.line <- line + n_lines;
+ context.column <- n_columns;
+ end
+ else
+ context.column <- column + n_columns;
+ match t with
+ Space -> next_token context lexbuf
+ | Code(s,_,_) -> Code(s,line,column + 2)
+ | Eof -> failwith "Unexpected end of file"
+ | _ -> t
+;;
+
+
+let parse_and_generate ch =
+ let b = Buffer.create 20000 in
+
+ let rec find_sep context lexbuf =
+ let t = Lexer.scan_header lexbuf in
+ let line = context.line in
+ let column = context.column in
+ context.old_line <- line;
+ context.old_column <- column;
+ let n_lines, n_columns = count_lines (Lexing.lexeme lexbuf) in
+ if n_lines > 0 then begin
+ context.line <- line + n_lines;
+ context.column <- n_columns;
+ end
+ else
+ context.column <- column + n_columns;
+ match t with
+ Code(s,_,_) ->
+ Buffer.add_string b s;
+ find_sep context lexbuf
+ | Eof -> failwith "Unexpected end of file"
+ | Separator -> ()
+ | _ -> assert false
+ in
+
+ let rec find_rest context lexbuf =
+ let t = Lexer.scan_header lexbuf in
+ let line = context.line in
+ let column = context.column in
+ context.old_line <- line;
+ context.old_column <- column;
+ let n_lines, n_columns = count_lines (Lexing.lexeme lexbuf) in
+ if n_lines > 0 then begin
+ context.line <- line + n_lines;
+ context.column <- n_columns;
+ end
+ else
+ context.column <- column + n_columns;
+ match t with
+ Code(s,_,_) ->
+ Buffer.add_string b s;
+ find_rest context lexbuf
+ | Eof -> ()
+ | _ -> assert false
+ in
+
+ (* First read until '%%' *)
+ let lexbuf = Lexing.from_channel ch in
+ let context = { old_line = 0; old_column = 0; line = 1; column = 0 } in
+ let file_name = "stdin" in
+ try
+ output_code_location b file_name ("", 1, 0);
+ find_sep context lexbuf;
+ (* Parse the following text *)
+ let text = (Parser.text (next_token context) lexbuf : Ast.text) in
+ (* Process it: *)
+ process b file_name text;
+ (* Read rest *)
+ output_code_location b file_name ("", context.line, context.column);
+ find_rest context lexbuf;
+ (* Output everything: *)
+ print_string (Buffer.contents b)
+ with
+ any ->
+ Printf.eprintf
+ "Error at line %d column %d: %s\n"
+ context.old_line
+ context.old_column
+ (Printexc.to_string any);
+ exit 1
+;;
+
+
+parse_and_generate stdin;;
+exit 0;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.7 2000/08/17 00:33:02 gerd
+ * Bugfix: tok* and tok? work now if tok is an untyped token
+ * without label.
+ *
+ * Revision 1.6 2000/05/14 20:59:24 gerd
+ * Added "phantasy line numbers" to help finding errorneous locations.
+ *
+ * Revision 1.5 2000/05/14 20:41:58 gerd
+ * x: Token? means: if Token is detected x=true else x=false.
+ * x: Token* means: x becomes the number of ocurrences of Token.
+ *
+ * Revision 1.4 2000/05/09 00:03:22 gerd
+ * Added [ ml_name ] symbols, where ml_name is an arbitrary
+ * OCaml identifier.
+ *
+ * Revision 1.3 2000/05/08 22:03:01 gerd
+ * It is now possible to have a $ {{ }} sequence right BEFORE
+ * the first token. This code is executed just after the first token
+ * has been recognized.
+ *
+ * Revision 1.2 2000/05/06 21:51:08 gerd
+ * Numerous bugfixes.
+ *
+ * Revision 1.1 2000/05/06 17:36:17 gerd
+ * Initial revision.
+ *
+ *
+ *)