]> matita.cs.unibo.it Git - helm.git/blobdiff - helm/DEVEL/pxp/pxp/rtests/canonxml/test_canonxml.ml
Initial revision
[helm.git] / helm / DEVEL / pxp / pxp / rtests / canonxml / test_canonxml.ml
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/test_canonxml.ml b/helm/DEVEL/pxp/pxp/rtests/canonxml/test_canonxml.ml
new file mode 100644 (file)
index 0000000..ef83a28
--- /dev/null
@@ -0,0 +1,239 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+open Pxp_document;;
+open Pxp_yacc;;
+open Pxp_types;;
+
+let error_happened = ref false;;
+
+let rec prerr_error e =
+  prerr_endline (string_of_exn e)
+;;
+
+class warner =
+  object 
+    method warn w =
+      prerr_endline ("WARNING: " ^ w)
+  end
+;;
+
+let outbuf = String.create 8192;;
+
+let output_utf8 config s =
+  match config.encoding  with
+      `Enc_utf8 ->
+       print_string s
+    | `Enc_iso88591 ->
+       for i = 0 to String.length s - 1 do
+         let c = Char.code(s.[i]) in
+         if c <= 127 then
+           print_char(Char.chr(c))
+         else begin
+           print_char(Char.chr(0xc0 lor (c lsr 6)));
+           print_char(Char.chr(0x80 lor (c land 0x3f)));
+         end
+       done
+    | _ -> assert false
+;;
+
+
+let re = Str.regexp "[&<>\"\009\010\013]";;
+
+let escaped s =
+  Str.global_substitute 
+    re
+    (fun _ ->
+       match Str.matched_string s with
+          "&"    -> "&amp;"
+        | "<"    -> "&lt;"
+        | ">"    -> "&gt;"
+        | "\""   -> "&quot;"
+        | "\009" -> "&#9;"
+        | "\010" -> "&#10;"
+        | "\013" -> "&#13;"
+        | _      -> assert false
+    )
+    s
+;;
+
+
+let rec output_xml config n =
+  match n # node_type with
+      T_super_root ->
+       n # iter_nodes (output_xml config)
+    | T_pinstr pi_name ->
+       let [ pi ] = n # pinstr pi_name in
+       output_utf8 config "<?";
+       output_utf8 config (pi # target);
+       output_utf8 config " ";
+       output_utf8 config (pi # value);
+       output_utf8 config "?>";
+    | T_element name ->
+       output_utf8 config "<";
+       output_utf8 config name;
+       let sorted_attnames = 
+         Sort.list ( <= ) (n # attribute_names) in
+       List.iter
+         (fun attname ->
+            match n # attribute attname with
+                Value v ->
+                  output_utf8 config " ";
+                  output_utf8 config attname;
+                  output_utf8 config "=\"";
+                  output_utf8 config (escaped v);
+                  output_utf8 config "\"";
+              | Valuelist vl ->
+                  let v = String.concat " " vl in
+                  output_utf8 config " ";
+                  output_utf8 config attname;
+                  output_utf8 config "=\"";
+                  output_utf8 config (escaped v);
+                  output_utf8 config "\"";
+              | Implied_value -> 
+                  ()
+         )
+         sorted_attnames;
+       output_utf8 config ">";
+       n # iter_nodes (output_xml config);
+       output_utf8 config "</";
+       output_utf8 config name;
+       output_utf8 config ">";
+    | T_data ->
+       let v = n # data in
+       output_utf8 config (escaped v)
+    | T_comment ->
+       let v =
+         match n # comment with
+             None -> assert false
+           | Some x -> x
+       in
+       output_utf8 config ("<!--" ^ v ^ "-->")
+    | _ -> 
+       assert false
+;;
+
+
+let parse debug wf iso88591 comments filename =
+  let spec =
+    let e = new element_impl default_extension in
+    e # keep_always_whitespace_mode;
+    make_spec_from_mapping
+      ~super_root_exemplar:      e
+      ~default_pinstr_exemplar:  e
+      ~comment_exemplar:         e
+      ~data_exemplar:            (new data_impl default_extension)
+      ~default_element_exemplar: e
+      ~element_mapping:          (Hashtbl.create 1)
+      ()
+  in
+  let config =
+      { default_config with 
+         warner = new warner;
+         debugging_mode = debug;
+         enable_pinstr_nodes = true;
+         enable_super_root_node = true;
+         enable_comment_nodes = comments;
+         encoding = if iso88591 then `Enc_iso88591 else `Enc_utf8;
+         idref_pass = true;
+      }
+  in
+  try 
+    let parse_fn =
+      if wf then parse_wfdocument_entity 
+      else 
+       let index = new hash_index in
+       parse_document_entity 
+         ?transform_dtd:None 
+         ~id_index:(index :> 'ext index)
+    in
+    let tree =
+      parse_fn
+        config
+       (from_file filename)
+       spec 
+    in
+    output_xml config (tree # root)
+  with
+      e ->
+       error_happened := true;
+       prerr_error e
+;;
+
+
+let main() =
+  let debug = ref false in
+  let wf = ref false in
+  let iso88591 = ref false in
+  let comments = ref false in
+  let files = ref [] in
+  Arg.parse
+      [ "-d",   Arg.Set debug, 
+          "          turn debugging mode on";
+       "-wf",  Arg.Set wf,    
+            "         check only on well-formedness";
+       "-iso-8859-1", Arg.Set iso88591, 
+                    " use ISO-8859-1 as internal encoding instead of UTF-8";
+       "-comments", Arg.Set comments, 
+                 "   output comments, too";
+      ]
+      (fun x -> files := x :: !files)
+      "
+usage: test_canonxml [options] file ...
+
+List of options:";
+  files := List.rev !files;
+  List.iter (parse !debug !wf !iso88591 !comments) !files;
+;;
+
+
+main();
+if !error_happened then exit(1);;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.8  2000/08/17 00:51:57  gerd
+ *     Added -comments option to test enable_comment_nodes.
+ *
+ * Revision 1.7  2000/08/16 23:44:17  gerd
+ *     Updates because of changes of the PXP API.
+ *
+ * Revision 1.6  2000/07/14 14:56:55  gerd
+ *     Updated: warner.
+ *
+ * Revision 1.5  2000/07/14 14:17:58  gerd
+ *     Updated because of iterface changes.
+ *
+ * Revision 1.4  2000/07/09 01:06:20  gerd
+ *     Updated.
+ *
+ * Revision 1.3  2000/06/04 20:31:03  gerd
+ *     Updates because of renamed PXP modules.
+ *
+ * Revision 1.2  2000/05/20 20:34:28  gerd
+ *     Changed for UTF-8 support.
+ *
+ * Revision 1.1  2000/04/30 20:13:01  gerd
+ *     Initial revision.
+ *
+ * Revision 1.3  1999/11/09 22:27:30  gerd
+ *     The programs returns now an exit code of 1 if one of the
+ * XML files produces an error.
+ *
+ * Revision 1.2  1999/09/01 23:09:56  gerd
+ *     Added the option -wf that switches to well-formedness checking
+ * instead of validation.
+ *
+ * Revision 1.1  1999/08/14 22:20:53  gerd
+ *     Initial revision.
+ *
+ * 
+ *)