+++ /dev/null
-
-
-let make_iso enc =
- let s = ref "" in
- for i = 0 to 255 do
- let u = try Netconversion.makechar (enc :> Netconversion.encoding) i
- with Not_found -> "" in
- s := !s ^ u
- done;
- !s
-;;
-
-let make_ucs2 start stop =
- let s = String.create ((stop - start) * 2) in
- for i = 0 to stop-start-1 do
- let k = 2 * i in
- let c = i + start in
- s.[k] <- Char.chr(c lsr 8);
- s.[k+1] <- Char.chr(c land 0xff);
- done;
- s
-;;
-
-let make_ucs4 start stop =
- let s = String.create ((stop - start) * 4) in
- for i = 0 to stop-start-1 do
- let k = 4 * i in
- let c = i + start in
- s.[k] <- Char.chr(c lsr 24);
- s.[k+1] <- Char.chr((c lsr 16) land 0xff);
- s.[k+2] <- Char.chr((c lsr 8) land 0xff);
- s.[k+3] <- Char.chr(c land 0xff);
- done;
- s
-;;
-
-let name_of_encoding enc =
- match enc with
- `Enc_iso88591 -> "ISO_8859-1"
- | `Enc_iso88592 -> "ISO_8859-2"
- | `Enc_iso88593 -> "ISO_8859-3"
- | `Enc_iso88594 -> "ISO_8859-4"
- | `Enc_iso88595 -> "ISO_8859-5"
- | `Enc_iso88596 -> "ISO_8859-6"
- | `Enc_iso88597 -> "ISO_8859-7"
- | `Enc_iso88598 -> "ISO_8859-8"
- | `Enc_iso88599 -> "ISO_8859-9"
- | `Enc_iso885910 -> "ISO_8859-10"
- | `Enc_iso885913 -> "ISO_8859-13"
- | `Enc_iso885914 -> "ISO_8859-14"
- | `Enc_iso885915 -> "ISO_8859-15"
- | `Enc_utf8 -> "UTF-8"
- | `Enc_ucs4 -> "UCS-4"
- | `Enc_ucs2 -> "UCS-2"
- | `Enc_utf16 -> "UTF-16"
-
- (* Note: GNU-iconv assumes big endian byte order *)
-;;
-
-let iconv_recode_string in_enc out_enc in_s =
- let in_enc_name = name_of_encoding in_enc in
- let out_enc_name = name_of_encoding out_enc in
- let out_s = ref "" in
-
- let out_ch,in_ch = Unix.open_process ("iconv -f " ^ in_enc_name ^ " -t " ^
- out_enc_name) in
- (* Write in_s to in_ch in a new thread: *)
- ignore
- (Thread.create
- (fun () ->
- output_string in_ch in_s;
- close_out in_ch;
- )
- ()
- );
- (* Read the result in the current thread: *)
- let buf = String.create 1024 in
- let n = ref 1 in
- while !n <> 0 do
- let n' = input out_ch buf 0 1024 in
- out_s := !out_s ^ String.sub buf 0 n';
- n := n'
- done;
- ignore(Unix.close_process (out_ch,in_ch));
- !out_s
-;;
-
-let test_iso_and_utf8 enc =
- let name = name_of_encoding enc in
- print_string ("Recode: " ^ name ^ " and UTF-8... "); flush stdout;
- let s = make_iso enc in
- let s1' = Netconversion.recode_string (enc :> Netconversion.encoding)
- `Enc_utf8 s in
- let s2' = iconv_recode_string enc `Enc_utf8 s in
- assert(s1' = s2');
- let s1 = Netconversion.recode_string `Enc_utf8
- (enc :> Netconversion.encoding) s1' in
- let s2 = iconv_recode_string `Enc_utf8 enc s1' in
- assert(s1 = s2 && s1 = s);
- print_endline "OK"; flush stdout
-;;
-
-let test_utf16_and_utf8_0000_d7ff () =
- print_string "Recode: UTF-16-BE and UTF-8, #0000-#D7FF... ";
- flush stdout;
- let s = make_ucs2 0 0xd800 in
- let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in
- let s2' = iconv_recode_string `Enc_utf16 `Enc_utf8 s in
- assert(s1' = s2');
- let s1 = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in
- let s2 = iconv_recode_string `Enc_utf8 `Enc_utf16 s1' in
- assert(s1 = s2 && s1 = s);
- print_endline "OK"; flush stdout
-;;
-
-let test_utf16_and_utf8_e000_fffd () =
- print_string "Recode: UTF-16-BE and UTF-8, #E000-#FFFD... ";
- flush stdout;
- let s = make_ucs2 0xe000 0xfffe in
- let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in
- let s2' = iconv_recode_string `Enc_utf16 `Enc_utf8 s in
- assert(s1' = s2');
- let s1 = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in
- let s2 = iconv_recode_string `Enc_utf8 `Enc_utf16 s1' in
- assert(s1 = s2 && s1 = s);
- print_endline "OK"; flush stdout
-;;
-
-let test_utf16_and_utf8_10000_10FFFF () =
- print_string "Recode: UTF-16-BE and UTF-8, #10000-#10FFFF... ";
- flush stdout;
- for i = 1 to 16 do
- let s0 = make_ucs4 (i * 0x10000) (i * 0x10000 + 0x10000) in
- let s = iconv_recode_string `Enc_ucs4 `Enc_utf16 s0 in
- let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in
- let s2' = iconv_recode_string `Enc_utf16 `Enc_utf8 s in
- assert(s1' = s2');
- let s1 = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in
- let s2 = iconv_recode_string `Enc_utf8 `Enc_utf16 s1' in
- assert(s1 = s2 && s1 = s);
- print_string "+"; flush stdout;
- done;
- print_endline "OK"; flush stdout
-;;
-
-
-print_endline "Warning: You need the command 'iconv' to run this test!";
-flush stdout;
-test_iso_and_utf8 `Enc_iso88591;
-test_iso_and_utf8 `Enc_iso88592;
-test_iso_and_utf8 `Enc_iso88593;
-test_iso_and_utf8 `Enc_iso88594;
-test_iso_and_utf8 `Enc_iso88595;
-test_iso_and_utf8 `Enc_iso88596;
-test_iso_and_utf8 `Enc_iso88597;
-(* test_iso_and_utf8 `Enc_iso88598; *)
-test_iso_and_utf8 `Enc_iso88599;
-test_iso_and_utf8 `Enc_iso885910;
-(* test_iso_and_utf8 `Enc_iso885913; *)
-(* test_iso_and_utf8 `Enc_iso885914; *)
-(* test_iso_and_utf8 `Enc_iso885915; *)
-test_utf16_and_utf8_0000_d7ff();
-test_utf16_and_utf8_e000_fffd();
-(* This test does not work because iconv does not support the surrogate
- * representation of UTF-16:
- * test_utf16_and_utf8_10000_10FFFF();
- *)
-()
-;;