From: Claudio Sacerdoti Coen Date: Tue, 23 Sep 2003 16:10:16 +0000 (+0000) Subject: This version of xmlDiff is much much much smarter than the previous one: X-Git-Tag: V_0_4_3_4~14 X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=commitdiff_plain;h=4d9ab21899b5e0064ce04a0da9676fa846686b77;p=helm.git This version of xmlDiff is much much much smarter than the previous one: with the forthcoming version of gdome the performances will be doubled. [ Unfortunately, it is still deadly slow. ] --- diff --git a/helm/gTopLevel/xmlDiff.ml b/helm/gTopLevel/xmlDiff.ml index 2b9806c44..cd19beb0c 100644 --- a/helm/gTopLevel/xmlDiff.ml +++ b/helm/gTopLevel/xmlDiff.ml @@ -23,13 +23,27 @@ * http://cs.unibo.it/helm/. *) -let myprerr_endline = prerr_endline;; -let prerr_endline _ = ();; - let mathmlns = "http://www.w3.org/1998/Math/MathML";; let xmldiffns = "http://helm.cs.unibo.it/XmlDiff";; let helmns = "http://www.cs.unibo.it/helm";; +let ds_selection = Gdome.domString "selection";; +let ds_2 = Gdome.domString "2";; +let ds_mathmlns = Gdome.domString mathmlns;; +let ds_m_style = Gdome.domString "m:mstyle";; +let ds_mathbackground = Gdome.domString "mathbackground";; +let ds_xmldiffns = Gdome.domString xmldiffns;; +let ds_xmldiff_type = Gdome.domString "xmldiff:type";; +let ds_fake = Gdome.domString "fake";; +let ds_helmns = Gdome.domString helmns;; +let ds_xref = Gdome.domString "xref";; +let ds_type = Gdome.domString "type";; +let ds_yellow = Gdome.domString "yellow";; +let ds_green = Gdome.domString "#00ff00";; +let ds_maction = Gdome.domString "maction";; +let ds_mtr = Gdome.domString "mtr";; +let ds_mtd = Gdome.domString "mtd";; + type highlighted_nodes = Gdome.node list;; let rec make_visible (n: Gdome.node) = @@ -38,27 +52,28 @@ let rec make_visible (n: Gdome.node) = | Some p -> match p#get_namespaceURI, p#get_localName with Some nu, Some ln when - nu#to_string = mathmlns && ln#to_string = "maction" -> + nu#equals ds_mathmlns && ln#equals ds_maction -> (new Gdome.element_of_node p)#setAttribute - ~name:(Gdome.domString "selection") - ~value:(Gdome.domString "2") ; + ~name:ds_selection + ~value:ds_2 ; make_visible p | _,_ -> make_visible p ;; -let highlight_node ?(color="yellow") (doc: Gdome.document) (n: Gdome.node) = +let highlight_node_total_time = ref 0.0;; + +let highlight_node ?(color=ds_yellow) (doc: Gdome.document) (n: Gdome.node) = let highlight (n: Gdome.node) = let highlighter = doc#createElementNS - ~namespaceURI:(Some (Gdome.domString mathmlns)) - ~qualifiedName:(Gdome.domString "m:mstyle") + ~namespaceURI:(Some ds_mathmlns) + ~qualifiedName:ds_m_style in - highlighter#setAttribute ~name:(Gdome.domString "mathbackground") - ~value:(Gdome.domString color) ; + highlighter#setAttribute ~name:ds_mathbackground ~value:color ; highlighter#setAttributeNS - ~namespaceURI:(Some (Gdome.domString xmldiffns)) - ~qualifiedName:(Gdome.domString "xmldiff:type") - ~value:(Gdome.domString "fake") ; + ~namespaceURI:(Some ds_xmldiffns) + ~qualifiedName:ds_xmldiff_type + ~value:ds_fake ; let parent = match n#get_parentNode with None -> assert false @@ -72,22 +87,18 @@ let highlight_node ?(color="yellow") (doc: Gdome.document) (n: Gdome.node) = let rec find_mstylable_node n = match n#get_namespaceURI, n#get_localName with Some nu, Some ln when - nu#to_string = mathmlns && - let ln = ln#to_string in - ln <> "mtr" && ln <> "mtd" -> n + nu#equals ds_mathmlns && + (not (ln#equals ds_mtr)) && (not (ln#equals ds_mtd)) -> n | Some nu, Some ln when - nu#to_string = mathmlns && - let ln = ln#to_string in - ln = "mtr" || ln = "mtd" -> -prerr_endline "@@@ find_mstylable_node scendo"; - let true_child = - match n#get_firstChild with - None -> assert false - | Some n -> n - in - find_mstylable_node true_child + nu#equals ds_mathmlns && + ln#equals ds_mtr || ln#equals ds_mtd -> + let true_child = + match n#get_firstChild with + None -> assert false + | Some n -> n + in + find_mstylable_node true_child | _,_ -> -prerr_endline ("@@@ find_mstylable_node salgo da " ^ match n#get_localName with Some n -> n#to_string | None -> "_") ; match n#get_parentNode with None -> assert false | Some p -> find_mstylable_node p @@ -112,14 +123,12 @@ let iter_children ~f (n:Gdome.node) = let highlight_nodes ~xrefs (doc:Gdome.document) = let highlighted = ref [] in let rec aux (n:Gdome.element) = - if - List.mem - (n#getAttributeNS ~namespaceURI:(Gdome.domString helmns) - ~localName:(Gdome.domString "xref"))#to_string - xrefs - then + let attributeNS = + (n#getAttributeNS ~namespaceURI:ds_helmns + ~localName:ds_xref)#to_string in + if List.mem attributeNS xrefs then highlighted := - (highlight_node ~color:"#00ff00"(* light green *) doc (n :> Gdome.node)):: + (highlight_node ~color:ds_green doc (n :> Gdome.node)):: !highlighted ; iter_children (n :> Gdome.node) ~f:(function n -> @@ -136,8 +145,8 @@ let dim_nodes = assert (n#get_nodeType = GdomeNodeTypeT.ELEMENT_NODE && ((new Gdome.element_of_node n)#getAttributeNS - ~namespaceURI:(Gdome.domString xmldiffns) - ~localName:(Gdome.domString "type"))#to_string = "fake") ; + ~namespaceURI:ds_xmldiffns + ~localName:ds_type)#equals ds_fake) ; let true_child = match n#get_firstChild with None -> assert false @@ -153,254 +162,181 @@ let dim_nodes = let update_dom ~(from : Gdome.document) (d : Gdome.document) = let rec aux (p: Gdome.node) (f: Gdome.node) (t: Gdome.node) = -(* Perche' non andava? - if f#get_localName = t#get_localName && - f#get_namespaceURI = t#get_namespaceURI -*) + let replace t1 = + if + t1 = GdomeNodeTypeT.ELEMENT_NODE && + ((new Gdome.element_of_node f)#getAttributeNS + ~namespaceURI:ds_xmldiffns + ~localName:ds_type)#equals ds_fake + then + let true_child = + match f#get_firstChild with + None -> assert false + | Some n -> n + in + begin + ignore (p#replaceChild ~oldChild:f ~newChild:true_child) ; + aux p true_child t + end + else + let t' = from#importNode t true in + ignore (p#replaceChild ~newChild:t' ~oldChild:f) ; + ignore (highlight_node from t') + in match - f#get_nodeType,t#get_nodeType, - f#get_namespaceURI,t#get_namespaceURI, - f#get_localName,t#get_localName + f#get_nodeType,t#get_nodeType with - GdomeNodeTypeT.TEXT_NODE,GdomeNodeTypeT.TEXT_NODE,_,_,_,_ when - match f#get_nodeValue, t#get_nodeValue with - Some v, Some v' -> v#to_string = v'#to_string - | _,_ -> assert false - -> -prerr_endline ("XML_DIFF: preservo il nodo testo " ^ match f#get_nodeValue with Some v -> v#to_string | None -> assert false); - () - | GdomeNodeTypeT.ELEMENT_NODE,GdomeNodeTypeT.ELEMENT_NODE, - Some nu, Some nu', Some ln, Some ln' when - ln#to_string = ln'#to_string && nu#to_string = nu'#to_string -> -prerr_endline ("XML_DIFF: preservo il nodo "^ nu#to_string ^ ":" ^ln#to_string); - begin - match f#get_attributes, t#get_attributes with - Some fattrs, Some tattrs -> - let flen = fattrs#get_length in - let tlen = tattrs#get_length in - let processed = ref [] in - for i = 0 to flen -1 do - match fattrs#item i with - None -> () (* CSC: sigh, togliere un nodo rompe fa decrescere la lunghezza ==> passare a un while *) - | Some attr -> - match attr#get_namespaceURI with - None -> - (* Back to DOM Level 1 ;-( *) - begin - let name = attr#get_nodeName in - match tattrs#getNamedItem ~name with - None -> -prerr_endline ("XML_DIFF: DOM 1; rimuovo l'attributo " ^ name#to_string); -(* CSC: questo non mi toglieva solo l'attributo, ma anche altri nodi qui - e la' ;-( - ignore (f#removeChild attr) -*) - ignore (fattrs#removeNamedItem ~name) - | Some attr' -> - processed := - (None,Some name)::!processed ; - match attr#get_nodeValue, attr'#get_nodeValue with - Some v1, Some v2 when - v1#to_string = v2#to_string - || (name#to_string = "selection" && - nu#to_string = - "http://www.w3.org/1998/Math/MathML" && - ln#to_string = "maction") - -> -prerr_endline ("XML_DIFF: DOM 1; preservo l'attributo " ^ name#to_string); - () - | Some v1, Some v2 -> -prerr_endline ("XML_DIFF: DOM 1; rimpiazzo l'attributo " ^ name#to_string ^ ": old value=" ^ v1#to_string ^ ", new value=" ^ v2#to_string); - let attr'' = from#importNode attr' true in - ignore (fattrs#setNamedItem attr'') - | _,_ -> assert false - end - | Some namespaceURI -> - let localName = - match attr#get_localName with - Some v -> v - | None -> assert false - in - match - tattrs#getNamedItemNS ~namespaceURI ~localName - with - None -> -prerr_endline ("XML_DIFF: DOM 2; rimuovo l'attributo " ^ namespaceURI#to_string ^ ":" ^ localName#to_string); -(* CSC: questo non mi toglieva solo l'attributo, ma anche altri nodi qui - e la' ;-( - ignore (f#removeChild attr) -*) - ignore - (fattrs#removeNamedItemNS - ~namespaceURI ~localName) - | Some attr' -> - processed := - (Some namespaceURI,Some localName)::!processed ; - (*CSC: questo mi dice read-only ;-( - attr#set_nodeValue attr'#get_nodeValue *) - (*CSC: questo mi abortisce con una assert - let attr'' = from#importNode attr' true in - ignore(f#replaceChild ~newChild:attr'' ~oldChild:attr)*) - match attr#get_nodeValue, attr'#get_nodeValue with - Some v1, Some v2 when - v1#to_string = v2#to_string -> -prerr_endline ("XML_DIFF: DOM 2; preservo l'attributo " ^ namespaceURI#to_string ^ ":" ^ localName#to_string); - () - | Some _, Some _ -> -prerr_endline ("XML_DIFF: DOM 2; rimpiazzo l'attributo " ^ namespaceURI#to_string ^ ":" ^ localName#to_string); - let attr'' = from#importNode attr' true in - ignore (fattrs#setNamedItem attr'') - | _,_ -> assert false - done ; - for i = 0 to tlen -1 do - match tattrs#item i with - None -> assert false - | Some attr -> -let debugs = ref "" in - let namespaceURI,localName = - match attr#get_namespaceURI with - None -> -debugs := ("XML_DIFF: DOM 1; creo un nuovo attributo " ^ attr#get_nodeName#to_string) ; - None,attr#get_nodeName - | Some namespaceURI as v -> -debugs := ("XML_DIFF: DOM 2; creo un nuovo attributo " ^ namespaceURI#to_string ^ ":" ^ match attr#get_localName with Some v -> v#to_string | None -> assert false); - v, match attr#get_localName with - None -> assert false - | Some v -> v - in - if - not - (List.exists - (function - None,Some localName' -> - (match namespaceURI with - None -> - localName#to_string = localName'#to_string - | Some _ -> false) - | Some namespaceURI', Some localName' -> - (match namespaceURI with - None -> false - | Some namespaceURI -> - localName#to_string = localName'#to_string && - namespaceURI#to_string=namespaceURI'#to_string - ) - | _,_ -> assert false - ) !processed) - then - let attr' = from#importNode attr false in -prerr_endline !debugs ; - ignore (fattrs#setNamedItem attr') - done - | _,_ -> assert false - end ; -(* debugging only - let fchildren = f#get_childNodes in - let tchildren = t#get_childNodes in -*) - let rec dumb_diff = - function - [],[] -> () - | he1::tl1,he2::tl2 -> -prerr_endline "XML_DIFF: processo una coppia di figli" ; - aux f he1 he2 ; - dumb_diff (tl1,tl2) - | [],tl2 -> -prerr_endline ("XML_DIFF: appendo i nodi residui " ^ String.concat ", " (List.map (function n -> match n#get_localName with Some s -> s#to_string | None -> "#" ^ (match n#get_nodeValue with Some s -> s#to_string | None -> "_") ^ "#") tl2)) ; - List.iter - (function n -> - let n' = from#importNode n true in - ignore (f#appendChild n') ; - ignore (highlight_node from n') - ) tl2 - | tl1,[] -> -prerr_endline ("XML_DIFF: cancello i nodi residui " ^ String.concat ", " (List.map (function n -> match n#get_localName with Some s -> s#to_string | None -> "_") tl1)) ; - List.iter (function n -> ignore (f#removeChild n)) tl1 - in - let node_list_of_nodeList n = - let rec aux = - function - None -> [] - | Some n when - n#get_nodeType = GdomeNodeTypeT.ELEMENT_NODE - or n#get_nodeType = GdomeNodeTypeT.TEXT_NODE -> - n::(aux n#get_nextSibling) - | Some n -> -prerr_endline ("XML_DIFF: mi sto perdendo i nodi di tipo " ^ string_of_int (Obj.magic n#get_nodeType)) ; - aux n#get_nextSibling - in - aux n#get_firstChild + GdomeNodeTypeT.TEXT_NODE,GdomeNodeTypeT.TEXT_NODE -> + (match f#get_nodeValue, t#get_nodeValue with + Some v, Some v' when v#equals v' -> () + | Some _, (Some _ as v') -> f#set_nodeValue v' + | _,_ -> assert false) + | GdomeNodeTypeT.ELEMENT_NODE as t1,GdomeNodeTypeT.ELEMENT_NODE -> + (match + f#get_namespaceURI,t#get_namespaceURI,f#get_localName,t#get_localName + with + Some nu, Some nu', Some ln, Some ln' when + ln#equals ln' && nu#equals nu' -> + begin + match f#get_attributes, t#get_attributes with + Some fattrs, Some tattrs -> + let flen = fattrs#get_length in + let tlen = tattrs#get_length in + let processed = ref [] in + for i = 0 to flen -1 do + match fattrs#item i with + None -> () (* CSC: sigh, togliere un nodo rompe fa decrescere la lunghezza ==> passare a un while *) + | Some attr -> + match attr#get_namespaceURI with + None -> + (* Back to DOM Level 1 ;-( *) + begin + let name = attr#get_nodeName in + match tattrs#getNamedItem ~name with + None -> + ignore (fattrs#removeNamedItem ~name) + | Some attr' -> + processed := + (None,Some name)::!processed ; + match attr#get_nodeValue, attr'#get_nodeValue with + Some v1, Some v2 when + v1#equals v2 + || (name#equals ds_selection && + nu#equals ds_mathmlns && + ln#equals ds_maction) + -> + () + | Some v1, Some v2 -> + let attr'' = from#importNode attr' true in + ignore (fattrs#setNamedItem attr'') + | _,_ -> assert false + end + | Some namespaceURI -> + let localName = + match attr#get_localName with + Some v -> v + | None -> assert false + in + match + tattrs#getNamedItemNS ~namespaceURI ~localName + with + None -> + ignore + (fattrs#removeNamedItemNS + ~namespaceURI ~localName) + | Some attr' -> + processed := + (Some namespaceURI,Some localName)::!processed ; + match attr#get_nodeValue, attr'#get_nodeValue with + Some v1, Some v2 when + v1#equals v2 -> + () + | Some _, Some _ -> + let attr'' = from#importNode attr' true in + ignore (fattrs#setNamedItem attr'') + | _,_ -> assert false + done ; + for i = 0 to tlen -1 do + match tattrs#item i with + None -> assert false + | Some attr -> + let namespaceURI,localName = + match attr#get_namespaceURI with + None -> + None,attr#get_nodeName + | Some namespaceURI as v -> + v, match attr#get_localName with + None -> assert false + | Some v -> v + in + if + not + (List.exists + (function + None,Some localName' -> + (match namespaceURI with + None -> + localName#equals localName' + | Some _ -> false) + | Some namespaceURI', Some localName' -> + (match namespaceURI with + None -> false + | Some namespaceURI -> + localName#equals localName' && + namespaceURI#equals namespaceURI' + ) + | _,_ -> assert false + ) !processed) + then + let attr' = from#importNode attr false in + ignore (fattrs#setNamedItem attr') + done + | _,_ -> assert false + end ; + let rec dumb_diff = + function + [],[] -> () + | he1::tl1,he2::tl2 -> + aux f he1 he2 ; + dumb_diff (tl1,tl2) + | [],tl2 -> + List.iter + (function n -> + let n' = from#importNode n true in + ignore (f#appendChild n') ; + ignore (highlight_node from n') + ) tl2 + | tl1,[] -> + List.iter (function n -> ignore (f#removeChild n)) tl1 in -(* debugging stuff only -for i = 0 to fchildren#get_length - 1 do -match fchildren#item i with -None -> prerr_endline "EUREKA: ma siamo matti?" -|Some n -> -match n#get_nodeType with GdomeNodeTypeT.ATTRIBUTE_NODE -> prerr_endline "EUREKA attr" | GdomeNodeTypeT.CDATA_SECTION_NODE -> prerr_endline "EUREKA text" | GdomeNodeTypeT.DOCUMENT_FRAGMENT_NODE -> prerr_endline "EUREKA document fragment" | _ -> prerr_endline ("EUREKA " ^ string_of_int (Obj.magic n#get_nodeType)) -done ; -prerr_endline("XML_DIFF: from ha " ^ string_of_int fchildren#get_length ^ " nodi, ovvero " ^ string_of_int (List.length (node_list_of_nodeList f)) ^ " figli"); -prerr_endline("XML_DIFF: to ha " ^ string_of_int tchildren#get_length ^ " nodi, ovvero " ^ string_of_int (List.length (node_list_of_nodeList t)) ^ " figli"); -prerr_endline ("XML_DIFF: chiamo dumb_diff su " ^ String.concat ", " (List.map (function n -> match n#get_localName with Some s -> s#to_string | None -> "_") (node_list_of_nodeList fchildren)) ^ " e " ^ String.concat ", " (List.map (function n -> match n#get_localName with Some s -> s#to_string | None -> "_") (node_list_of_nodeList tchildren))) ; -*) - dumb_diff - (node_list_of_nodeList f, node_list_of_nodeList t) - | t1,t2,_,_,_,_ when + let node_list_of_nodeList n = + let rec aux = + function + None -> [] + | Some n when + n#get_nodeType = GdomeNodeTypeT.ELEMENT_NODE + or n#get_nodeType = GdomeNodeTypeT.TEXT_NODE -> + n::(aux n#get_nextSibling) + | Some n -> + aux n#get_nextSibling + in + aux n#get_firstChild + in + dumb_diff + (node_list_of_nodeList f, node_list_of_nodeList t) + | _,_,_,_ -> replace t1 + ) + | t1,t2 when (t1 = GdomeNodeTypeT.ELEMENT_NODE || t1 = GdomeNodeTypeT.TEXT_NODE) && (t2 = GdomeNodeTypeT.ELEMENT_NODE || t2 = GdomeNodeTypeT.TEXT_NODE) -> - if - t1 = GdomeNodeTypeT.ELEMENT_NODE && - ((new Gdome.element_of_node f)#getAttributeNS - ~namespaceURI:(Gdome.domString xmldiffns) - ~localName:(Gdome.domString "type"))#to_string = "fake" - then - let true_child = - match f#get_firstChild with - None -> assert false - | Some n -> n - in - begin -prerr_endline ("%%% CANCELLO HIGHLIGHTER " ^ (match f#get_localName with Some s -> s#to_string | None -> "_") ^ " CON DENTRO " ^ (match true_child#get_localName with Some s -> s#to_string | None -> "_")) ; - ignore (p#replaceChild ~oldChild:f ~newChild:true_child) ; - aux p true_child t - end - else - let t' = from#importNode t true in -prerr_endline ("%%% XML_DIFF: importo il nodo " ^ match t'#get_localName with Some n -> n#to_string | None -> "_") ; -(* -prerr_endline ("Rimpiazzo" ^ (match f#get_namespaceURI with Some s -> s#to_string) ^ ":" ^ (match f#get_localName with Some s -> s#to_string) ^ " con " ^ (match t#get_namespaceURI with Some s -> s#to_string) ^ ":" ^ (match t#get_localName with Some s -> s#to_string) ^ " in " ^ (match p#get_localName with Some s -> s#to_string | None -> "_")) ; -*) - ignore (p#replaceChild ~newChild:t' ~oldChild:f) ; - ignore (highlight_node from t') - | _,_,_,_,_,_ -> assert false + replace t1 + | _,_ -> assert false in try aux (d :> Gdome.node) (from#get_documentElement :> Gdome.node) (d#get_documentElement :> Gdome.node) with - (GdomeInit.DOMException (e,msg) as ex) -> -(* CSC: Non si puo' per problemi di linking ;-( - let module E = GdomeDOMExceptionT in -*) - prerr_endline - ("DOM EXCEPTION: " ^ msg ^ " --- " ^ - match e with - GdomeDOMExceptionT.NO_ERR -> "NO_ERR" - | GdomeDOMExceptionT.INDEX_SIZE_ERR -> "INDEX_SIZE_ERR" - | GdomeDOMExceptionT.DOMSTRING_SIZE_ERR -> "DOMSTRING_SIZE_ERR" - | GdomeDOMExceptionT.HIERARCHY_REQUEST_ERR -> "HIERARCHY_REQUEST_ERR" - | GdomeDOMExceptionT.WRONG_DOCUMENT_ERR -> "WRONG_DOCUMENT_ERR" - | GdomeDOMExceptionT.INVALID_CHARACTER_ERR -> "INVALID_CHARACTER_ERR" - | GdomeDOMExceptionT.NO_DATA_ALLOWED_ERR -> "NO_DATA_ALLOWER_ERR" - | GdomeDOMExceptionT.NO_MODIFICATION_ALLOWED_ERR -> "NO_MODIFICATION_ALLOWED_ERR" - | GdomeDOMExceptionT.NOT_FOUND_ERR -> "NOT_FOUND_ERR" - | GdomeDOMExceptionT.NOT_SUPPORTED_ERR -> "NOT_SUPPORTED_ERR" - | GdomeDOMExceptionT.INUSE_ATTRIBUTE_ERR -> "INUSE_ATTRIBUTE_ERR" - | GdomeDOMExceptionT.INVALID_STATE_ERR -> "INVALID_STATE_ERR" - | GdomeDOMExceptionT.SYNTAX_ERR -> "SYNTAX_ERR" - | GdomeDOMExceptionT.INVALID_MODIFICATION_ERR -> "INVALID_MODIFICATION_ERR" - | GdomeDOMExceptionT.NAMESPACE_ERR -> "NAMESPACE_ERR" - | GdomeDOMExceptionT.INVALID_ACCESS_ERR -> "INVALID_ACCESS_ERR") - | e -> - prerr_endline "PROBLEMA" ; - raise e + (GdomeInit.DOMException (e,msg) as ex) -> raise ex + | e -> raise e ;;