+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-module StringOrd = struct
- type t = string
- let compare = (compare : string -> string -> int)
-end;;
-
-module StringMap = Map.Make(StringOrd);;
- (* 'a StringMap.t: the type of maps (dictionaries) from string to 'a *)
-
-module Graph = struct
- type vertex =
- { mutable edges_out : (string * vertex) list;
- mutable edges_out_map : vertex StringMap.t;
- mutable edges_in : (vertex * string) list;
- mutable graph : graph;
- mutable id : int;
- }
- and graph =
- { mutable vertexes : vertex list;
- mutable mid : int; (* maximum id + 1 *)
- }
-
- exception Edge_not_unique
-
- let create () =
- { vertexes = [];
- mid = 0;
- }
-
- let new_vertex g =
- let v =
- { edges_out = [];
- edges_out_map = StringMap.empty;
- edges_in = [];
- graph = g;
- id = g.mid;
- } in
- g.vertexes <- v :: g.vertexes;
- g.mid <- g.mid + 1;
- v
-
- let new_edge v_from e v_to =
- if v_from.graph != v_to.graph then
- invalid_arg "Pxp_dfa.Graph.new_edge";
- try
- let v = StringMap.find e v_from.edges_out_map in
- if v != v_to then
- raise Edge_not_unique;
- with
- Not_found ->
- v_from.edges_out <- (e, v_to) :: v_from.edges_out;
- v_from.edges_out_map <- StringMap.add e v_to v_from.edges_out_map;
- v_to.edges_in <- (v_from, e) :: v_to.edges_in;
- ()
-
- let graph_of_vertex v = v.graph
-
- let union g1 g2 =
- List.iter
- (fun v ->
- v.graph <- g1;
- v.id <- v.id + g1.mid;
- )
- g2.vertexes;
- g1.vertexes <- g2.vertexes @ g1.vertexes;
- g1.mid <- g1.mid + g2.mid;
- g2.vertexes <- [];
- g2.mid <- 0
-
- let outgoing_edges v =
- v.edges_out
-
- let ingoing_edges v =
- v.edges_in
-
- let follow_edge v e =
- StringMap.find e v.edges_out_map (* or raise Not_found *)
-end
-;;
-
-
-module VertexOrd = struct
- type t = Graph.vertex
- let compare v1 v2 =
- if v1.Graph.graph != v2.Graph.graph then
- invalid_arg "Pxp_dfa.VertexOrd.compare";
- compare v1.Graph.id v2.Graph.id
-end
-;;
-
-module VertexSet = Set.Make(VertexOrd);;
-
-
-type dfa_definition =
- { dfa_graph : Graph.graph;
- dfa_start : Graph.vertex;
- dfa_stops : VertexSet.t;
- dfa_null : bool;
- }
-;;
-
-(**********************************************************************)
-
-(* Now that we have all the auxiliary data types, it is time for the
- * algorithm that transforms regexps to DFAs.
- *)
-
-open Pxp_types
-
-let dfa_of_regexp_content_model re =
- let rec get_dfa re =
- match re with
- Child e ->
- let g = Graph.create() in
- let v1 = Graph.new_vertex g in
- let v2 = Graph.new_vertex g in
- Graph.new_edge v1 e v2;
- { dfa_graph = g;
- dfa_start = v1;
- dfa_stops = VertexSet.singleton v2;
- dfa_null = false;
- }
-
- | Seq [] ->
- invalid_arg "Pxp_dfa.dfa_of_regexp_content_model"
- | Seq [re'] ->
- get_dfa re'
- | Seq (re1 :: seq2) ->
- let dfa1 = get_dfa re1 in
- let dfa2 = get_dfa (Seq seq2) in
- (* Merge the two graphs. The result is in dfa1.dfa_graph: *)
- Graph.union dfa1.dfa_graph dfa2.dfa_graph;
- (* Concatenation I: Add additional edges to the graph such
- * that if w1 matches dfa1, and w2 matches dfa2, and w2 is not
- * empty, w1w2 will match the merged DFAs.
- *)
- List.iter
- (fun (e,v') ->
- VertexSet.iter
- (fun v ->
- Graph.new_edge v e v')
- dfa1.dfa_stops
- )
- (Graph.outgoing_edges dfa2.dfa_start);
- (* Concatenation II: If the emtpy string matches dfa2, the stop
- * nodes of dfa1 remain stop nodes.
- *)
- let stops =
- if dfa2.dfa_null then
- VertexSet.union dfa1.dfa_stops dfa2.dfa_stops
- else
- dfa2.dfa_stops
- in
- (* The resulting DFA: *)
- { dfa_graph = dfa1.dfa_graph;
- dfa_start = dfa1.dfa_start;
- dfa_stops = stops;
- dfa_null = dfa1.dfa_null && dfa2.dfa_null;
- }
-
- | Alt [] ->
- invalid_arg "Pxp_dfa.dfa_of_regexp_content_model"
- | Alt [re'] ->
- get_dfa re'
- | Alt alt ->
- let dfa_alt = List.map get_dfa alt in
- (* Merge the graphs. The result is in g: *)
- let g = (List.hd dfa_alt).dfa_graph in
- List.iter
- (fun dfa ->
- Graph.union g dfa.dfa_graph
- )
- (List.tl dfa_alt);
- (* Get the new start node: *)
- let start = Graph.new_vertex g in
- (* Add the new edges starting at 'start': *)
- List.iter
- (fun dfa ->
- List.iter
- (fun (e, v) ->
- Graph.new_edge start e v)
- (Graph.outgoing_edges dfa.dfa_start)
- )
- dfa_alt;
- (* If one of the old start nodes was a stop node, the new start
- * node will be a stop node, too.
- *)
- let null = List.exists (fun dfa -> dfa.dfa_null) dfa_alt in
- let stops =
- List.fold_left
- (fun s dfa -> VertexSet.union s dfa.dfa_stops)
- VertexSet.empty
- dfa_alt in
- let stops' =
- if null then
- VertexSet.union stops (VertexSet.singleton start)
- else
- stops in
- (* The resulting DFA: *)
- { dfa_graph = g;
- dfa_start = start;
- dfa_stops = stops';
- dfa_null = null;
- }
-
- | Optional re' ->
- let dfa' = get_dfa re' in
- if dfa'.dfa_null then
- (* simple case *)
- dfa'
- else begin
- (* Optimization possible: case ingoing_edges dfa_start = [] *)
- let start = Graph.new_vertex dfa'.dfa_graph in
- List.iter
- (fun (e, v) ->
- Graph.new_edge start e v)
- (Graph.outgoing_edges dfa'.dfa_start);
-
- (* The resulting DFA: *)
- { dfa_graph = dfa'.dfa_graph;
- dfa_start = start;
- dfa_stops = VertexSet.union dfa'.dfa_stops
- (VertexSet.singleton start);
- dfa_null = true;
- }
- end
-
- | Repeated1 re' ->
- let dfa' = get_dfa re' in
- List.iter
- (fun (e, v') ->
- VertexSet.iter
- (fun v ->
- Graph.new_edge v e v')
- dfa'.dfa_stops
- )
- (Graph.outgoing_edges dfa'.dfa_start);
-
- (* The resulting DFA: *)
- { dfa_graph = dfa'.dfa_graph;
- dfa_start = dfa'.dfa_start;
- dfa_stops = dfa'.dfa_stops;
- dfa_null = dfa'.dfa_null;
- }
-
- | Repeated re' ->
- get_dfa (Optional (Repeated1 re'))
-
- in
- try
- get_dfa re
- with
- Graph.Edge_not_unique -> raise Not_found
-;;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.1 2000/07/23 02:16:08 gerd
- * Initial revision.
- *
- *
- *)