X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2FDEVEL%2Fpxp%2Fpxp%2Fpxp_dfa.ml;fp=helm%2FDEVEL%2Fpxp%2Fpxp%2Fpxp_dfa.ml;h=b7baeb4d6feba543998b12e4cf37c371a97db4a0;hb=c03d2c1fdab8d228cb88aaba5ca0f556318bebc5;hp=0000000000000000000000000000000000000000;hpb=758057e85325f94cd88583feb1fdf6b038e35055;p=helm.git diff --git a/helm/DEVEL/pxp/pxp/pxp_dfa.ml b/helm/DEVEL/pxp/pxp/pxp_dfa.ml new file mode 100644 index 000000000..b7baeb4d6 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/pxp_dfa.ml @@ -0,0 +1,271 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + +module StringOrd = struct + type t = string + let compare = (compare : string -> string -> int) +end;; + +module StringMap = Map.Make(StringOrd);; + (* 'a StringMap.t: the type of maps (dictionaries) from string to 'a *) + +module Graph = struct + type vertex = + { mutable edges_out : (string * vertex) list; + mutable edges_out_map : vertex StringMap.t; + mutable edges_in : (vertex * string) list; + mutable graph : graph; + mutable id : int; + } + and graph = + { mutable vertexes : vertex list; + mutable mid : int; (* maximum id + 1 *) + } + + exception Edge_not_unique + + let create () = + { vertexes = []; + mid = 0; + } + + let new_vertex g = + let v = + { edges_out = []; + edges_out_map = StringMap.empty; + edges_in = []; + graph = g; + id = g.mid; + } in + g.vertexes <- v :: g.vertexes; + g.mid <- g.mid + 1; + v + + let new_edge v_from e v_to = + if v_from.graph != v_to.graph then + invalid_arg "Pxp_dfa.Graph.new_edge"; + try + let v = StringMap.find e v_from.edges_out_map in + if v != v_to then + raise Edge_not_unique; + with + Not_found -> + v_from.edges_out <- (e, v_to) :: v_from.edges_out; + v_from.edges_out_map <- StringMap.add e v_to v_from.edges_out_map; + v_to.edges_in <- (v_from, e) :: v_to.edges_in; + () + + let graph_of_vertex v = v.graph + + let union g1 g2 = + List.iter + (fun v -> + v.graph <- g1; + v.id <- v.id + g1.mid; + ) + g2.vertexes; + g1.vertexes <- g2.vertexes @ g1.vertexes; + g1.mid <- g1.mid + g2.mid; + g2.vertexes <- []; + g2.mid <- 0 + + let outgoing_edges v = + v.edges_out + + let ingoing_edges v = + v.edges_in + + let follow_edge v e = + StringMap.find e v.edges_out_map (* or raise Not_found *) +end +;; + + +module VertexOrd = struct + type t = Graph.vertex + let compare v1 v2 = + if v1.Graph.graph != v2.Graph.graph then + invalid_arg "Pxp_dfa.VertexOrd.compare"; + compare v1.Graph.id v2.Graph.id +end +;; + +module VertexSet = Set.Make(VertexOrd);; + + +type dfa_definition = + { dfa_graph : Graph.graph; + dfa_start : Graph.vertex; + dfa_stops : VertexSet.t; + dfa_null : bool; + } +;; + +(**********************************************************************) + +(* Now that we have all the auxiliary data types, it is time for the + * algorithm that transforms regexps to DFAs. + *) + +open Pxp_types + +let dfa_of_regexp_content_model re = + let rec get_dfa re = + match re with + Child e -> + let g = Graph.create() in + let v1 = Graph.new_vertex g in + let v2 = Graph.new_vertex g in + Graph.new_edge v1 e v2; + { dfa_graph = g; + dfa_start = v1; + dfa_stops = VertexSet.singleton v2; + dfa_null = false; + } + + | Seq [] -> + invalid_arg "Pxp_dfa.dfa_of_regexp_content_model" + | Seq [re'] -> + get_dfa re' + | Seq (re1 :: seq2) -> + let dfa1 = get_dfa re1 in + let dfa2 = get_dfa (Seq seq2) in + (* Merge the two graphs. The result is in dfa1.dfa_graph: *) + Graph.union dfa1.dfa_graph dfa2.dfa_graph; + (* Concatenation I: Add additional edges to the graph such + * that if w1 matches dfa1, and w2 matches dfa2, and w2 is not + * empty, w1w2 will match the merged DFAs. + *) + List.iter + (fun (e,v') -> + VertexSet.iter + (fun v -> + Graph.new_edge v e v') + dfa1.dfa_stops + ) + (Graph.outgoing_edges dfa2.dfa_start); + (* Concatenation II: If the emtpy string matches dfa2, the stop + * nodes of dfa1 remain stop nodes. + *) + let stops = + if dfa2.dfa_null then + VertexSet.union dfa1.dfa_stops dfa2.dfa_stops + else + dfa2.dfa_stops + in + (* The resulting DFA: *) + { dfa_graph = dfa1.dfa_graph; + dfa_start = dfa1.dfa_start; + dfa_stops = stops; + dfa_null = dfa1.dfa_null && dfa2.dfa_null; + } + + | Alt [] -> + invalid_arg "Pxp_dfa.dfa_of_regexp_content_model" + | Alt [re'] -> + get_dfa re' + | Alt alt -> + let dfa_alt = List.map get_dfa alt in + (* Merge the graphs. The result is in g: *) + let g = (List.hd dfa_alt).dfa_graph in + List.iter + (fun dfa -> + Graph.union g dfa.dfa_graph + ) + (List.tl dfa_alt); + (* Get the new start node: *) + let start = Graph.new_vertex g in + (* Add the new edges starting at 'start': *) + List.iter + (fun dfa -> + List.iter + (fun (e, v) -> + Graph.new_edge start e v) + (Graph.outgoing_edges dfa.dfa_start) + ) + dfa_alt; + (* If one of the old start nodes was a stop node, the new start + * node will be a stop node, too. + *) + let null = List.exists (fun dfa -> dfa.dfa_null) dfa_alt in + let stops = + List.fold_left + (fun s dfa -> VertexSet.union s dfa.dfa_stops) + VertexSet.empty + dfa_alt in + let stops' = + if null then + VertexSet.union stops (VertexSet.singleton start) + else + stops in + (* The resulting DFA: *) + { dfa_graph = g; + dfa_start = start; + dfa_stops = stops'; + dfa_null = null; + } + + | Optional re' -> + let dfa' = get_dfa re' in + if dfa'.dfa_null then + (* simple case *) + dfa' + else begin + (* Optimization possible: case ingoing_edges dfa_start = [] *) + let start = Graph.new_vertex dfa'.dfa_graph in + List.iter + (fun (e, v) -> + Graph.new_edge start e v) + (Graph.outgoing_edges dfa'.dfa_start); + + (* The resulting DFA: *) + { dfa_graph = dfa'.dfa_graph; + dfa_start = start; + dfa_stops = VertexSet.union dfa'.dfa_stops + (VertexSet.singleton start); + dfa_null = true; + } + end + + | Repeated1 re' -> + let dfa' = get_dfa re' in + List.iter + (fun (e, v') -> + VertexSet.iter + (fun v -> + Graph.new_edge v e v') + dfa'.dfa_stops + ) + (Graph.outgoing_edges dfa'.dfa_start); + + (* The resulting DFA: *) + { dfa_graph = dfa'.dfa_graph; + dfa_start = dfa'.dfa_start; + dfa_stops = dfa'.dfa_stops; + dfa_null = dfa'.dfa_null; + } + + | Repeated re' -> + get_dfa (Optional (Repeated1 re')) + + in + try + get_dfa re + with + Graph.Edge_not_unique -> raise Not_found +;; + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:29 lpadovan + * Initial revision + * + * Revision 1.1 2000/07/23 02:16:08 gerd + * Initial revision. + * + * + *)