X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2FDEVEL%2Fpxp%2Fnetstring%2Ftests%2Ftest_recode.ml;fp=helm%2FDEVEL%2Fpxp%2Fnetstring%2Ftests%2Ftest_recode.ml;h=64a04caae4c837a3c961ff3fee36df182a3ee02d;hb=c03d2c1fdab8d228cb88aaba5ca0f556318bebc5;hp=0000000000000000000000000000000000000000;hpb=758057e85325f94cd88583feb1fdf6b038e35055;p=helm.git diff --git a/helm/DEVEL/pxp/netstring/tests/test_recode.ml b/helm/DEVEL/pxp/netstring/tests/test_recode.ml new file mode 100644 index 000000000..64a04caae --- /dev/null +++ b/helm/DEVEL/pxp/netstring/tests/test_recode.ml @@ -0,0 +1,169 @@ + + +let make_iso enc = + let s = ref "" in + for i = 0 to 255 do + let u = try Netconversion.makechar (enc :> Netconversion.encoding) i + with Not_found -> "" in + s := !s ^ u + done; + !s +;; + +let make_ucs2 start stop = + let s = String.create ((stop - start) * 2) in + for i = 0 to stop-start-1 do + let k = 2 * i in + let c = i + start in + s.[k] <- Char.chr(c lsr 8); + s.[k+1] <- Char.chr(c land 0xff); + done; + s +;; + +let make_ucs4 start stop = + let s = String.create ((stop - start) * 4) in + for i = 0 to stop-start-1 do + let k = 4 * i in + let c = i + start in + s.[k] <- Char.chr(c lsr 24); + s.[k+1] <- Char.chr((c lsr 16) land 0xff); + s.[k+2] <- Char.chr((c lsr 8) land 0xff); + s.[k+3] <- Char.chr(c land 0xff); + done; + s +;; + +let name_of_encoding enc = + match enc with + `Enc_iso88591 -> "ISO_8859-1" + | `Enc_iso88592 -> "ISO_8859-2" + | `Enc_iso88593 -> "ISO_8859-3" + | `Enc_iso88594 -> "ISO_8859-4" + | `Enc_iso88595 -> "ISO_8859-5" + | `Enc_iso88596 -> "ISO_8859-6" + | `Enc_iso88597 -> "ISO_8859-7" + | `Enc_iso88598 -> "ISO_8859-8" + | `Enc_iso88599 -> "ISO_8859-9" + | `Enc_iso885910 -> "ISO_8859-10" + | `Enc_iso885913 -> "ISO_8859-13" + | `Enc_iso885914 -> "ISO_8859-14" + | `Enc_iso885915 -> "ISO_8859-15" + | `Enc_utf8 -> "UTF-8" + | `Enc_ucs4 -> "UCS-4" + | `Enc_ucs2 -> "UCS-2" + | `Enc_utf16 -> "UTF-16" + + (* Note: GNU-iconv assumes big endian byte order *) +;; + +let iconv_recode_string in_enc out_enc in_s = + let in_enc_name = name_of_encoding in_enc in + let out_enc_name = name_of_encoding out_enc in + let out_s = ref "" in + + let out_ch,in_ch = Unix.open_process ("iconv -f " ^ in_enc_name ^ " -t " ^ + out_enc_name) in + (* Write in_s to in_ch in a new thread: *) + ignore + (Thread.create + (fun () -> + output_string in_ch in_s; + close_out in_ch; + ) + () + ); + (* Read the result in the current thread: *) + let buf = String.create 1024 in + let n = ref 1 in + while !n <> 0 do + let n' = input out_ch buf 0 1024 in + out_s := !out_s ^ String.sub buf 0 n'; + n := n' + done; + ignore(Unix.close_process (out_ch,in_ch)); + !out_s +;; + +let test_iso_and_utf8 enc = + let name = name_of_encoding enc in + print_string ("Recode: " ^ name ^ " and UTF-8... "); flush stdout; + let s = make_iso enc in + let s1' = Netconversion.recode_string (enc :> Netconversion.encoding) + `Enc_utf8 s in + let s2' = iconv_recode_string enc `Enc_utf8 s in + assert(s1' = s2'); + let s1 = Netconversion.recode_string `Enc_utf8 + (enc :> Netconversion.encoding) s1' in + let s2 = iconv_recode_string `Enc_utf8 enc s1' in + assert(s1 = s2 && s1 = s); + print_endline "OK"; flush stdout +;; + +let test_utf16_and_utf8_0000_d7ff () = + print_string "Recode: UTF-16-BE and UTF-8, #0000-#D7FF... "; + flush stdout; + let s = make_ucs2 0 0xd800 in + let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in + let s2' = iconv_recode_string `Enc_utf16 `Enc_utf8 s in + assert(s1' = s2'); + let s1 = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in + let s2 = iconv_recode_string `Enc_utf8 `Enc_utf16 s1' in + assert(s1 = s2 && s1 = s); + print_endline "OK"; flush stdout +;; + +let test_utf16_and_utf8_e000_fffd () = + print_string "Recode: UTF-16-BE and UTF-8, #E000-#FFFD... "; + flush stdout; + let s = make_ucs2 0xe000 0xfffe in + let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in + let s2' = iconv_recode_string `Enc_utf16 `Enc_utf8 s in + assert(s1' = s2'); + let s1 = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in + let s2 = iconv_recode_string `Enc_utf8 `Enc_utf16 s1' in + assert(s1 = s2 && s1 = s); + print_endline "OK"; flush stdout +;; + +let test_utf16_and_utf8_10000_10FFFF () = + print_string "Recode: UTF-16-BE and UTF-8, #10000-#10FFFF... "; + flush stdout; + for i = 1 to 16 do + let s0 = make_ucs4 (i * 0x10000) (i * 0x10000 + 0x10000) in + let s = iconv_recode_string `Enc_ucs4 `Enc_utf16 s0 in + let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in + let s2' = iconv_recode_string `Enc_utf16 `Enc_utf8 s in + assert(s1' = s2'); + let s1 = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in + let s2 = iconv_recode_string `Enc_utf8 `Enc_utf16 s1' in + assert(s1 = s2 && s1 = s); + print_string "+"; flush stdout; + done; + print_endline "OK"; flush stdout +;; + + +print_endline "Warning: You need the command 'iconv' to run this test!"; +flush stdout; +test_iso_and_utf8 `Enc_iso88591; +test_iso_and_utf8 `Enc_iso88592; +test_iso_and_utf8 `Enc_iso88593; +test_iso_and_utf8 `Enc_iso88594; +test_iso_and_utf8 `Enc_iso88595; +test_iso_and_utf8 `Enc_iso88596; +test_iso_and_utf8 `Enc_iso88597; +(* test_iso_and_utf8 `Enc_iso88598; *) +test_iso_and_utf8 `Enc_iso88599; +test_iso_and_utf8 `Enc_iso885910; +(* test_iso_and_utf8 `Enc_iso885913; *) +(* test_iso_and_utf8 `Enc_iso885914; *) +(* test_iso_and_utf8 `Enc_iso885915; *) +test_utf16_and_utf8_0000_d7ff(); +test_utf16_and_utf8_e000_fffd(); +(* This test does not work because iconv does not support the surrogate + * representation of UTF-16: + * test_utf16_and_utf8_10000_10FFFF(); + *) +() +;;