2 * ----------------------------------------------------------------------
6 module StringOrd = struct
8 let compare = (compare : string -> string -> int)
11 module StringMap = Map.Make(StringOrd);;
12 (* 'a StringMap.t: the type of maps (dictionaries) from string to 'a *)
16 { mutable edges_out : (string * vertex) list;
17 mutable edges_out_map : vertex StringMap.t;
18 mutable edges_in : (vertex * string) list;
19 mutable graph : graph;
23 { mutable vertexes : vertex list;
24 mutable mid : int; (* maximum id + 1 *)
27 exception Edge_not_unique
37 edges_out_map = StringMap.empty;
42 g.vertexes <- v :: g.vertexes;
46 let new_edge v_from e v_to =
47 if v_from.graph != v_to.graph then
48 invalid_arg "Pxp_dfa.Graph.new_edge";
50 let v = StringMap.find e v_from.edges_out_map in
52 raise Edge_not_unique;
55 v_from.edges_out <- (e, v_to) :: v_from.edges_out;
56 v_from.edges_out_map <- StringMap.add e v_to v_from.edges_out_map;
57 v_to.edges_in <- (v_from, e) :: v_to.edges_in;
60 let graph_of_vertex v = v.graph
66 v.id <- v.id + g1.mid;
69 g1.vertexes <- g2.vertexes @ g1.vertexes;
70 g1.mid <- g1.mid + g2.mid;
74 let outgoing_edges v =
81 StringMap.find e v.edges_out_map (* or raise Not_found *)
86 module VertexOrd = struct
89 if v1.Graph.graph != v2.Graph.graph then
90 invalid_arg "Pxp_dfa.VertexOrd.compare";
91 compare v1.Graph.id v2.Graph.id
95 module VertexSet = Set.Make(VertexOrd);;
99 { dfa_graph : Graph.graph;
100 dfa_start : Graph.vertex;
101 dfa_stops : VertexSet.t;
106 (**********************************************************************)
108 (* Now that we have all the auxiliary data types, it is time for the
109 * algorithm that transforms regexps to DFAs.
114 let dfa_of_regexp_content_model re =
118 let g = Graph.create() in
119 let v1 = Graph.new_vertex g in
120 let v2 = Graph.new_vertex g in
121 Graph.new_edge v1 e v2;
124 dfa_stops = VertexSet.singleton v2;
129 invalid_arg "Pxp_dfa.dfa_of_regexp_content_model"
132 | Seq (re1 :: seq2) ->
133 let dfa1 = get_dfa re1 in
134 let dfa2 = get_dfa (Seq seq2) in
135 (* Merge the two graphs. The result is in dfa1.dfa_graph: *)
136 Graph.union dfa1.dfa_graph dfa2.dfa_graph;
137 (* Concatenation I: Add additional edges to the graph such
138 * that if w1 matches dfa1, and w2 matches dfa2, and w2 is not
139 * empty, w1w2 will match the merged DFAs.
145 Graph.new_edge v e v')
148 (Graph.outgoing_edges dfa2.dfa_start);
149 (* Concatenation II: If the emtpy string matches dfa2, the stop
150 * nodes of dfa1 remain stop nodes.
153 if dfa2.dfa_null then
154 VertexSet.union dfa1.dfa_stops dfa2.dfa_stops
158 (* The resulting DFA: *)
159 { dfa_graph = dfa1.dfa_graph;
160 dfa_start = dfa1.dfa_start;
162 dfa_null = dfa1.dfa_null && dfa2.dfa_null;
166 invalid_arg "Pxp_dfa.dfa_of_regexp_content_model"
170 let dfa_alt = List.map get_dfa alt in
171 (* Merge the graphs. The result is in g: *)
172 let g = (List.hd dfa_alt).dfa_graph in
175 Graph.union g dfa.dfa_graph
178 (* Get the new start node: *)
179 let start = Graph.new_vertex g in
180 (* Add the new edges starting at 'start': *)
185 Graph.new_edge start e v)
186 (Graph.outgoing_edges dfa.dfa_start)
189 (* If one of the old start nodes was a stop node, the new start
190 * node will be a stop node, too.
192 let null = List.exists (fun dfa -> dfa.dfa_null) dfa_alt in
195 (fun s dfa -> VertexSet.union s dfa.dfa_stops)
200 VertexSet.union stops (VertexSet.singleton start)
203 (* The resulting DFA: *)
211 let dfa' = get_dfa re' in
212 if dfa'.dfa_null then
216 (* Optimization possible: case ingoing_edges dfa_start = [] *)
217 let start = Graph.new_vertex dfa'.dfa_graph in
220 Graph.new_edge start e v)
221 (Graph.outgoing_edges dfa'.dfa_start);
223 (* The resulting DFA: *)
224 { dfa_graph = dfa'.dfa_graph;
226 dfa_stops = VertexSet.union dfa'.dfa_stops
227 (VertexSet.singleton start);
233 let dfa' = get_dfa re' in
238 Graph.new_edge v e v')
241 (Graph.outgoing_edges dfa'.dfa_start);
243 (* The resulting DFA: *)
244 { dfa_graph = dfa'.dfa_graph;
245 dfa_start = dfa'.dfa_start;
246 dfa_stops = dfa'.dfa_stops;
247 dfa_null = dfa'.dfa_null;
251 get_dfa (Optional (Repeated1 re'))
257 Graph.Edge_not_unique -> raise Not_found
260 (* ======================================================================
264 * Revision 1.1 2000/11/17 09:57:29 lpadovan
267 * Revision 1.1 2000/07/23 02:16:08 gerd