--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+module StringOrd = struct
+ type t = string
+ let compare = (compare : string -> string -> int)
+end;;
+
+module StringMap = Map.Make(StringOrd);;
+ (* 'a StringMap.t: the type of maps (dictionaries) from string to 'a *)
+
+module Graph = struct
+ type vertex =
+ { mutable edges_out : (string * vertex) list;
+ mutable edges_out_map : vertex StringMap.t;
+ mutable edges_in : (vertex * string) list;
+ mutable graph : graph;
+ mutable id : int;
+ }
+ and graph =
+ { mutable vertexes : vertex list;
+ mutable mid : int; (* maximum id + 1 *)
+ }
+
+ exception Edge_not_unique
+
+ let create () =
+ { vertexes = [];
+ mid = 0;
+ }
+
+ let new_vertex g =
+ let v =
+ { edges_out = [];
+ edges_out_map = StringMap.empty;
+ edges_in = [];
+ graph = g;
+ id = g.mid;
+ } in
+ g.vertexes <- v :: g.vertexes;
+ g.mid <- g.mid + 1;
+ v
+
+ let new_edge v_from e v_to =
+ if v_from.graph != v_to.graph then
+ invalid_arg "Pxp_dfa.Graph.new_edge";
+ try
+ let v = StringMap.find e v_from.edges_out_map in
+ if v != v_to then
+ raise Edge_not_unique;
+ with
+ Not_found ->
+ v_from.edges_out <- (e, v_to) :: v_from.edges_out;
+ v_from.edges_out_map <- StringMap.add e v_to v_from.edges_out_map;
+ v_to.edges_in <- (v_from, e) :: v_to.edges_in;
+ ()
+
+ let graph_of_vertex v = v.graph
+
+ let union g1 g2 =
+ List.iter
+ (fun v ->
+ v.graph <- g1;
+ v.id <- v.id + g1.mid;
+ )
+ g2.vertexes;
+ g1.vertexes <- g2.vertexes @ g1.vertexes;
+ g1.mid <- g1.mid + g2.mid;
+ g2.vertexes <- [];
+ g2.mid <- 0
+
+ let outgoing_edges v =
+ v.edges_out
+
+ let ingoing_edges v =
+ v.edges_in
+
+ let follow_edge v e =
+ StringMap.find e v.edges_out_map (* or raise Not_found *)
+end
+;;
+
+
+module VertexOrd = struct
+ type t = Graph.vertex
+ let compare v1 v2 =
+ if v1.Graph.graph != v2.Graph.graph then
+ invalid_arg "Pxp_dfa.VertexOrd.compare";
+ compare v1.Graph.id v2.Graph.id
+end
+;;
+
+module VertexSet = Set.Make(VertexOrd);;
+
+
+type dfa_definition =
+ { dfa_graph : Graph.graph;
+ dfa_start : Graph.vertex;
+ dfa_stops : VertexSet.t;
+ dfa_null : bool;
+ }
+;;
+
+(**********************************************************************)
+
+(* Now that we have all the auxiliary data types, it is time for the
+ * algorithm that transforms regexps to DFAs.
+ *)
+
+open Pxp_types
+
+let dfa_of_regexp_content_model re =
+ let rec get_dfa re =
+ match re with
+ Child e ->
+ let g = Graph.create() in
+ let v1 = Graph.new_vertex g in
+ let v2 = Graph.new_vertex g in
+ Graph.new_edge v1 e v2;
+ { dfa_graph = g;
+ dfa_start = v1;
+ dfa_stops = VertexSet.singleton v2;
+ dfa_null = false;
+ }
+
+ | Seq [] ->
+ invalid_arg "Pxp_dfa.dfa_of_regexp_content_model"
+ | Seq [re'] ->
+ get_dfa re'
+ | Seq (re1 :: seq2) ->
+ let dfa1 = get_dfa re1 in
+ let dfa2 = get_dfa (Seq seq2) in
+ (* Merge the two graphs. The result is in dfa1.dfa_graph: *)
+ Graph.union dfa1.dfa_graph dfa2.dfa_graph;
+ (* Concatenation I: Add additional edges to the graph such
+ * that if w1 matches dfa1, and w2 matches dfa2, and w2 is not
+ * empty, w1w2 will match the merged DFAs.
+ *)
+ List.iter
+ (fun (e,v') ->
+ VertexSet.iter
+ (fun v ->
+ Graph.new_edge v e v')
+ dfa1.dfa_stops
+ )
+ (Graph.outgoing_edges dfa2.dfa_start);
+ (* Concatenation II: If the emtpy string matches dfa2, the stop
+ * nodes of dfa1 remain stop nodes.
+ *)
+ let stops =
+ if dfa2.dfa_null then
+ VertexSet.union dfa1.dfa_stops dfa2.dfa_stops
+ else
+ dfa2.dfa_stops
+ in
+ (* The resulting DFA: *)
+ { dfa_graph = dfa1.dfa_graph;
+ dfa_start = dfa1.dfa_start;
+ dfa_stops = stops;
+ dfa_null = dfa1.dfa_null && dfa2.dfa_null;
+ }
+
+ | Alt [] ->
+ invalid_arg "Pxp_dfa.dfa_of_regexp_content_model"
+ | Alt [re'] ->
+ get_dfa re'
+ | Alt alt ->
+ let dfa_alt = List.map get_dfa alt in
+ (* Merge the graphs. The result is in g: *)
+ let g = (List.hd dfa_alt).dfa_graph in
+ List.iter
+ (fun dfa ->
+ Graph.union g dfa.dfa_graph
+ )
+ (List.tl dfa_alt);
+ (* Get the new start node: *)
+ let start = Graph.new_vertex g in
+ (* Add the new edges starting at 'start': *)
+ List.iter
+ (fun dfa ->
+ List.iter
+ (fun (e, v) ->
+ Graph.new_edge start e v)
+ (Graph.outgoing_edges dfa.dfa_start)
+ )
+ dfa_alt;
+ (* If one of the old start nodes was a stop node, the new start
+ * node will be a stop node, too.
+ *)
+ let null = List.exists (fun dfa -> dfa.dfa_null) dfa_alt in
+ let stops =
+ List.fold_left
+ (fun s dfa -> VertexSet.union s dfa.dfa_stops)
+ VertexSet.empty
+ dfa_alt in
+ let stops' =
+ if null then
+ VertexSet.union stops (VertexSet.singleton start)
+ else
+ stops in
+ (* The resulting DFA: *)
+ { dfa_graph = g;
+ dfa_start = start;
+ dfa_stops = stops';
+ dfa_null = null;
+ }
+
+ | Optional re' ->
+ let dfa' = get_dfa re' in
+ if dfa'.dfa_null then
+ (* simple case *)
+ dfa'
+ else begin
+ (* Optimization possible: case ingoing_edges dfa_start = [] *)
+ let start = Graph.new_vertex dfa'.dfa_graph in
+ List.iter
+ (fun (e, v) ->
+ Graph.new_edge start e v)
+ (Graph.outgoing_edges dfa'.dfa_start);
+
+ (* The resulting DFA: *)
+ { dfa_graph = dfa'.dfa_graph;
+ dfa_start = start;
+ dfa_stops = VertexSet.union dfa'.dfa_stops
+ (VertexSet.singleton start);
+ dfa_null = true;
+ }
+ end
+
+ | Repeated1 re' ->
+ let dfa' = get_dfa re' in
+ List.iter
+ (fun (e, v') ->
+ VertexSet.iter
+ (fun v ->
+ Graph.new_edge v e v')
+ dfa'.dfa_stops
+ )
+ (Graph.outgoing_edges dfa'.dfa_start);
+
+ (* The resulting DFA: *)
+ { dfa_graph = dfa'.dfa_graph;
+ dfa_start = dfa'.dfa_start;
+ dfa_stops = dfa'.dfa_stops;
+ dfa_null = dfa'.dfa_null;
+ }
+
+ | Repeated re' ->
+ get_dfa (Optional (Repeated1 re'))
+
+ in
+ try
+ get_dfa re
+ with
+ Graph.Edge_not_unique -> raise Not_found
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/07/23 02:16:08 gerd
+ * Initial revision.
+ *
+ *
+ *)