X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2FDEVEL%2Fpxp%2Fpxp%2Frtests%2Fcanonxml%2Ftest_canonxml.ml;fp=helm%2FDEVEL%2Fpxp%2Fpxp%2Frtests%2Fcanonxml%2Ftest_canonxml.ml;h=ef83a28fc919ed21358c8a4ca965a039b3f30032;hb=c03d2c1fdab8d228cb88aaba5ca0f556318bebc5;hp=0000000000000000000000000000000000000000;hpb=758057e85325f94cd88583feb1fdf6b038e35055;p=helm.git diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/test_canonxml.ml b/helm/DEVEL/pxp/pxp/rtests/canonxml/test_canonxml.ml new file mode 100644 index 000000000..ef83a28fc --- /dev/null +++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/test_canonxml.ml @@ -0,0 +1,239 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + + +open Pxp_document;; +open Pxp_yacc;; +open Pxp_types;; + +let error_happened = ref false;; + +let rec prerr_error e = + prerr_endline (string_of_exn e) +;; + +class warner = + object + method warn w = + prerr_endline ("WARNING: " ^ w) + end +;; + +let outbuf = String.create 8192;; + +let output_utf8 config s = + match config.encoding with + `Enc_utf8 -> + print_string s + | `Enc_iso88591 -> + for i = 0 to String.length s - 1 do + let c = Char.code(s.[i]) in + if c <= 127 then + print_char(Char.chr(c)) + else begin + print_char(Char.chr(0xc0 lor (c lsr 6))); + print_char(Char.chr(0x80 lor (c land 0x3f))); + end + done + | _ -> assert false +;; + + +let re = Str.regexp "[&<>\"\009\010\013]";; + +let escaped s = + Str.global_substitute + re + (fun _ -> + match Str.matched_string s with + "&" -> "&" + | "<" -> "<" + | ">" -> ">" + | "\"" -> """ + | "\009" -> " " + | "\010" -> " " + | "\013" -> " " + | _ -> assert false + ) + s +;; + + +let rec output_xml config n = + match n # node_type with + T_super_root -> + n # iter_nodes (output_xml config) + | T_pinstr pi_name -> + let [ pi ] = n # pinstr pi_name in + output_utf8 config ""; + | T_element name -> + output_utf8 config "<"; + output_utf8 config name; + let sorted_attnames = + Sort.list ( <= ) (n # attribute_names) in + List.iter + (fun attname -> + match n # attribute attname with + Value v -> + output_utf8 config " "; + output_utf8 config attname; + output_utf8 config "=\""; + output_utf8 config (escaped v); + output_utf8 config "\""; + | Valuelist vl -> + let v = String.concat " " vl in + output_utf8 config " "; + output_utf8 config attname; + output_utf8 config "=\""; + output_utf8 config (escaped v); + output_utf8 config "\""; + | Implied_value -> + () + ) + sorted_attnames; + output_utf8 config ">"; + n # iter_nodes (output_xml config); + output_utf8 config ""; + | T_data -> + let v = n # data in + output_utf8 config (escaped v) + | T_comment -> + let v = + match n # comment with + None -> assert false + | Some x -> x + in + output_utf8 config ("") + | _ -> + assert false +;; + + +let parse debug wf iso88591 comments filename = + let spec = + let e = new element_impl default_extension in + e # keep_always_whitespace_mode; + make_spec_from_mapping + ~super_root_exemplar: e + ~default_pinstr_exemplar: e + ~comment_exemplar: e + ~data_exemplar: (new data_impl default_extension) + ~default_element_exemplar: e + ~element_mapping: (Hashtbl.create 1) + () + in + let config = + { default_config with + warner = new warner; + debugging_mode = debug; + enable_pinstr_nodes = true; + enable_super_root_node = true; + enable_comment_nodes = comments; + encoding = if iso88591 then `Enc_iso88591 else `Enc_utf8; + idref_pass = true; + } + in + try + let parse_fn = + if wf then parse_wfdocument_entity + else + let index = new hash_index in + parse_document_entity + ?transform_dtd:None + ~id_index:(index :> 'ext index) + in + let tree = + parse_fn + config + (from_file filename) + spec + in + output_xml config (tree # root) + with + e -> + error_happened := true; + prerr_error e +;; + + +let main() = + let debug = ref false in + let wf = ref false in + let iso88591 = ref false in + let comments = ref false in + let files = ref [] in + Arg.parse + [ "-d", Arg.Set debug, + " turn debugging mode on"; + "-wf", Arg.Set wf, + " check only on well-formedness"; + "-iso-8859-1", Arg.Set iso88591, + " use ISO-8859-1 as internal encoding instead of UTF-8"; + "-comments", Arg.Set comments, + " output comments, too"; + ] + (fun x -> files := x :: !files) + " +usage: test_canonxml [options] file ... + +List of options:"; + files := List.rev !files; + List.iter (parse !debug !wf !iso88591 !comments) !files; +;; + + +main(); +if !error_happened then exit(1);; + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:32 lpadovan + * Initial revision + * + * Revision 1.8 2000/08/17 00:51:57 gerd + * Added -comments option to test enable_comment_nodes. + * + * Revision 1.7 2000/08/16 23:44:17 gerd + * Updates because of changes of the PXP API. + * + * Revision 1.6 2000/07/14 14:56:55 gerd + * Updated: warner. + * + * Revision 1.5 2000/07/14 14:17:58 gerd + * Updated because of iterface changes. + * + * Revision 1.4 2000/07/09 01:06:20 gerd + * Updated. + * + * Revision 1.3 2000/06/04 20:31:03 gerd + * Updates because of renamed PXP modules. + * + * Revision 1.2 2000/05/20 20:34:28 gerd + * Changed for UTF-8 support. + * + * Revision 1.1 2000/04/30 20:13:01 gerd + * Initial revision. + * + * Revision 1.3 1999/11/09 22:27:30 gerd + * The programs returns now an exit code of 1 if one of the + * XML files produces an error. + * + * Revision 1.2 1999/09/01 23:09:56 gerd + * Added the option -wf that switches to well-formedness checking + * instead of validation. + * + * Revision 1.1 1999/08/14 22:20:53 gerd + * Initial revision. + * + * + *)