Initial revision

[helm.git] / helm / DEVEL / pxp / netstring / netconversion.ml
diff --git a/helm/DEVEL/pxp/netstring/netconversion.ml b/helm/DEVEL/pxp/netstring/netconversion.ml

new file mode 100644 (file)

index 0000000..e740654
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/netconversion.ml
@@ -0,0 +1,864 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+exception Malformed_code
+
+
+type encoding =
+  [  `Enc_utf8       (* UTF-8 *)
+  |  `Enc_java
+  |  `Enc_utf16      (* UTF-16 with unspecified endianess (restricted usage) *)
+  |  `Enc_utf16_le   (* UTF-16 little endian *)
+  |  `Enc_utf16_be   (* UTF-16 big endian *)
+  |  `Enc_usascii    (* US-ASCII (only 7 bit) *)
+  |  `Enc_iso88591   (* ISO-8859-1 *)
+  |  `Enc_iso88592   (* ISO-8859-2 *)
+  |  `Enc_iso88593   (* ISO-8859-3 *)
+  |  `Enc_iso88594   (* ISO-8859-4 *)
+  |  `Enc_iso88595   (* ISO-8859-5 *)
+  |  `Enc_iso88596   (* ISO-8859-6 *)
+  |  `Enc_iso88597   (* ISO-8859-7 *)
+  |  `Enc_iso88598   (* ISO-8859-8 *)
+  |  `Enc_iso88599   (* ISO-8859-9 *)
+  |  `Enc_iso885910  (* ISO-8859-10 *)
+  |  `Enc_iso885913  (* ISO-8859-13 *)
+  |  `Enc_iso885914  (* ISO-8859-14 *)
+  |  `Enc_iso885915  (* ISO-8859-15 *)
+  |  `Enc_koi8r      (* KOI8-R *)
+  |  `Enc_jis0201    (* JIS-0201 *)
+    (* Microsoft: *)
+  |  `Enc_windows1250  (* WINDOWS-1250 *)
+  |  `Enc_windows1251  (* WINDOWS-1251 *)
+  |  `Enc_windows1252  (* WINDOWS-1252 *)
+  |  `Enc_windows1253  (* WINDOWS-1253 *)
+  |  `Enc_windows1254  (* WINDOWS-1254 *)
+  |  `Enc_windows1255  (* WINDOWS-1255 *)
+  |  `Enc_windows1256  (* WINDOWS-1256 *)
+  |  `Enc_windows1257  (* WINDOWS-1257 *)
+  |  `Enc_windows1258  (* WINDOWS-1258 *)
+    (* IBM, ASCII-based: *)
+  |  `Enc_cp437
+  |  `Enc_cp737
+  |  `Enc_cp775
+  |  `Enc_cp850
+  |  `Enc_cp852
+  |  `Enc_cp855
+  |  `Enc_cp856
+  |  `Enc_cp857
+  |  `Enc_cp860
+  |  `Enc_cp861
+  |  `Enc_cp862
+  |  `Enc_cp863
+  |  `Enc_cp864
+  |  `Enc_cp865
+  |  `Enc_cp866
+  |  `Enc_cp869
+  |  `Enc_cp874
+  |  `Enc_cp1006
+   (* IBM, EBCDIC-based: *)
+  |  `Enc_cp037
+  |  `Enc_cp424
+  |  `Enc_cp500
+  |  `Enc_cp875
+  |  `Enc_cp1026
+   (* Adobe: *)
+  |  `Enc_adobe_standard_encoding
+  |  `Enc_adobe_symbol_encoding
+  |  `Enc_adobe_zapf_dingbats_encoding
+   (* Apple: *)
+  |  `Enc_macroman
+
+  ]
+;;
+
+
+let norm_enc_name e =
+  (* Removes some characters from e; uppercase *)
+  let e' = String.create (String.length e) in
+  let rec next i j =
+    if i < String.length e then
+      match e.[i] with
+         ('-'|'_'|'.') -> next (i+1) j
+       | c             -> e'.[j] <- c; next (i+1) (j+1)
+    else
+      j
+  in
+  let l = next 0 0 in
+  String.uppercase(String.sub e' 0 l)
+;;
+
+
+let encoding_of_string e =
+  match norm_enc_name e with
+      ("UTF16"|"UCS2"|"ISO10646UCS2")                 -> `Enc_utf16
+    | "UTF16BE"                                       -> `Enc_utf16_be
+    | "UTF16LE"                                       -> `Enc_utf16_le
+    | "UTF8"                                          -> `Enc_utf8
+    | ("UTF8JAVA"|"JAVA")                             -> `Enc_java
+    | ("USASCII"|"ASCII"|"ISO646US"|"IBM367"|"CP367") -> `Enc_usascii
+    | ("ISO88591"|"LATIN1"|"IBM819"|"CP819")          -> `Enc_iso88591
+    | ("ISO88592"|"LATIN2")                           -> `Enc_iso88592
+    | ("ISO88593"|"LATIN3")                           -> `Enc_iso88593
+    | ("ISO88594"|"LATIN4")                           -> `Enc_iso88594
+    | ("ISO88595"|"CYRILLIC")                         -> `Enc_iso88595
+    | ("ISO88596"|"ARABIC"|"ECMA114"|"ASMO708")       -> `Enc_iso88596
+    | ("ISO88597"|"GREEK"|"GREEK8"|"ELOT928"|"ECMA118") -> `Enc_iso88597
+    | ("ISO88598"|"HEBREW")                           -> `Enc_iso88598
+    | ("ISO88599"|"LATIN5")                           -> `Enc_iso88599
+    | ("ISO885910"|"LATIN6")                          -> `Enc_iso885910
+    | "ISO885913"                                     -> `Enc_iso885913
+    | "ISO885914"                                     -> `Enc_iso885914
+    | "ISO885915"                                     -> `Enc_iso885915
+    | "KOI8R"                                         -> `Enc_koi8r
+    | ("JIS0201"|"JISX0201"|"X0201")                  -> `Enc_jis0201
+
+    | "WINDOWS1250"                                   -> `Enc_windows1250
+    | "WINDOWS1251"                                   -> `Enc_windows1251
+    | "WINDOWS1252"                                   -> `Enc_windows1252
+    | "WINDOWS1253"                                   -> `Enc_windows1253
+    | "WINDOWS1254"                                   -> `Enc_windows1254
+    | "WINDOWS1255"                                   -> `Enc_windows1255
+    | "WINDOWS1256"                                   -> `Enc_windows1256
+    | "WINDOWS1257"                                   -> `Enc_windows1257
+    | "WINDOWS1258"                                   -> `Enc_windows1258
+
+    | ("CP437"|"IBM437")                              -> `Enc_cp437
+    | ("CP737"|"IBM737")                              -> `Enc_cp737
+    | ("CP775"|"IBM775")                              -> `Enc_cp775
+    | ("CP850"|"IBM850")                              -> `Enc_cp850
+    | ("CP852"|"IBM852")                              -> `Enc_cp852
+    | ("CP855"|"IBM855")                              -> `Enc_cp855
+    | ("CP856"|"IBM856")                              -> `Enc_cp856
+    | ("CP857"|"IBM857")                              -> `Enc_cp857
+    | ("CP860"|"IBM860")                              -> `Enc_cp860
+    | ("CP861"|"IBM861")                              -> `Enc_cp861
+    | ("CP862"|"IBM862")                              -> `Enc_cp862
+    | ("CP863"|"IBM863")                              -> `Enc_cp863
+    | ("CP864"|"IBM864")                              -> `Enc_cp864
+    | ("CP865"|"IBM865")                              -> `Enc_cp865
+    | ("CP866"|"IBM866")                              -> `Enc_cp866
+    | ("CP869"|"IBM869")                              -> `Enc_cp869
+    | ("CP874"|"IBM874")                              -> `Enc_cp874
+    | ("CP1006"|"IBM1006")                            -> `Enc_cp1006
+
+    | ("CP037"|"IBM037"|"EBCDICCPUS"|"EBCDICCPCA"|"EBCDICCPWT"|
+       "EBCDICCPNL")                                  -> `Enc_cp037
+    | ("CP424"|"IBM424"|"EBCDICCPHE")                 -> `Enc_cp424
+    | ("CP500"|"IBM500"|"EBCDICCPBE"|"EBCDICCPCH")    -> `Enc_cp500
+    | ("CP875"|"IBM875")                              -> `Enc_cp875
+    | ("CP1026"|"IBM1026")                            -> `Enc_cp1026
+
+    | "ADOBESTANDARDENCODING"       -> `Enc_adobe_standard_encoding
+    | "ADOBESYMBOLENCODING"         -> `Enc_adobe_symbol_encoding
+    | "ADOBEZAPFDINGBATSENCODING"   -> `Enc_adobe_zapf_dingbats_encoding
+
+    | "MACINTOSH"                   -> `Enc_macroman
+
+    | _ ->
+       failwith "Netconversion.encoding_of_string: unknown encoding"
+;;
+
+
+let string_of_encoding (e : encoding) =
+  (* If there is a "preferred MIME name", this name is returned (see IANA). *)
+  match e with
+      `Enc_utf16    -> "UTF-16"
+    | `Enc_utf16_be -> "UTF-16-BE"
+    | `Enc_utf16_le -> "UTF-16-LE"
+    | `Enc_utf8     -> "UTF-8"
+    | `Enc_java     -> "UTF-8-JAVA"
+    | `Enc_usascii  -> "US-ASCII"
+    | `Enc_iso88591 -> "ISO-8859-1"
+    | `Enc_iso88592 -> "ISO-8859-2"
+    | `Enc_iso88593 -> "ISO-8859-3"
+    | `Enc_iso88594 -> "ISO-8859-4"
+    | `Enc_iso88595 -> "ISO-8859-5"
+    | `Enc_iso88596 -> "ISO-8859-6"
+    | `Enc_iso88597 -> "ISO-8859-7"
+    | `Enc_iso88598 -> "ISO-8859-8"
+    | `Enc_iso88599 -> "ISO-8859-9"
+    | `Enc_iso885910 -> "ISO-8859-10"
+    | `Enc_iso885913 -> "ISO-8859-13"
+    | `Enc_iso885914 -> "ISO-8859-14"
+    | `Enc_iso885915 -> "ISO-8859-15"
+    | `Enc_koi8r     -> "KOI8-R"
+    | `Enc_jis0201   -> "JIS_X0201"
+    | `Enc_windows1250 -> "WINDOWS-1250"
+    | `Enc_windows1251 -> "WINDOWS-1251"
+    | `Enc_windows1252 -> "WINDOWS-1252"
+    | `Enc_windows1253 -> "WINDOWS-1253"
+    | `Enc_windows1254 -> "WINDOWS-1254"
+    | `Enc_windows1255 -> "WINDOWS-1255"
+    | `Enc_windows1256 -> "WINDOWS-1256"
+    | `Enc_windows1257 -> "WINDOWS-1257"
+    | `Enc_windows1258 -> "WINDOWS-1258"
+    | `Enc_cp437   -> "CP437"
+    | `Enc_cp737   -> "CP737"
+    | `Enc_cp775   -> "CP775"
+    | `Enc_cp850   -> "CP850"
+    | `Enc_cp852   -> "CP852"
+    | `Enc_cp855   -> "CP855"
+    | `Enc_cp856   -> "CP856"
+    | `Enc_cp857   -> "CP857"
+    | `Enc_cp860   -> "CP860"
+    | `Enc_cp861   -> "CP861"
+    | `Enc_cp862   -> "CP862"
+    | `Enc_cp863   -> "CP863"
+    | `Enc_cp864   -> "CP864"
+    | `Enc_cp865   -> "CP865"
+    | `Enc_cp866   -> "CP866"
+    | `Enc_cp869   -> "CP869"
+    | `Enc_cp874   -> "CP874"
+    | `Enc_cp1006  -> "CP1006"
+    | `Enc_cp037   -> "CP037"
+    | `Enc_cp424   -> "CP424"
+    | `Enc_cp500   -> "CP500"
+    | `Enc_cp875   -> "CP875"
+    | `Enc_cp1026  -> "CP1026"
+    | `Enc_adobe_standard_encoding      -> "ADOBE-STANDARD-ENCODING"
+    | `Enc_adobe_symbol_encoding        -> "ADOBE-SYMBOL-ENCODING"
+    | `Enc_adobe_zapf_dingbats_encoding -> "ADOBE-ZAPF-DINGBATS-ENCODING"
+    | `Enc_macroman                     -> "MACINTOSH"
+;;
+
+
+let read_iso88591 write s_in p_in l_in =
+  let rec scan k_in k_out c_out =
+    if k_in < l_in then begin
+      let p = Char.code s_in.[p_in + k_in] in
+      let n = write p k_out c_out in
+      if n < 0 then
+       k_in, k_out, `Enc_iso88591
+      else
+       scan (k_in + 1) (k_out + n) (c_out + 1)
+    end
+    else
+      k_in, k_out, `Enc_iso88591
+  in
+  scan 0 0 0
+;;
+
+
+let read_usascii write s_in p_in l_in =
+  let rec scan k_in k_out c_out =
+    if k_in < l_in then begin
+      let p = Char.code s_in.[p_in + k_in] in
+      if p >= 0x80 then raise Malformed_code;
+      let n = write p k_out c_out in
+      if n < 0 then
+       k_in, k_out, `Enc_usascii
+      else
+       scan (k_in + 1) (k_out + n) (c_out + 1)
+    end
+    else
+      k_in, k_out, `Enc_usascii
+  in
+  scan 0 0 0
+;;
+
+
+let read_8bit m_to_unicode enc write s_in p_in l_in =
+  let rec scan k_in k_out c_out =
+    if k_in < l_in then begin
+      let p_local = Char.code s_in.[p_in + k_in] in
+      let p_uni = Array.unsafe_get m_to_unicode p_local in
+      if p_uni < 0 then raise Malformed_code;
+      let n = write p_uni k_out c_out in
+      if n < 0 then
+       k_in, k_out, enc
+      else
+       scan (k_in + 1) (k_out + n) (c_out + 1)
+    end
+    else
+      k_in, k_out, enc
+  in
+  scan 0 0 0
+;;
+
+
+let read_utf8 is_java write s_in p_in l_in =
+  let rec scan k_in k_out c_out =
+    if k_in < l_in then begin
+      let n_out, n_in =
+       match s_in.[p_in + k_in] with
+           '\000' ->
+             if is_java then raise Malformed_code;
+             write 0 k_out c_out, 1
+         | ('\001'..'\127' as c) ->
+             write (Char.code c) k_out c_out, 1
+         | ('\128'..'\223' as c) ->
+             if k_in + 1 >= l_in then
+               -1, 0
+             else begin
+               let n1 = Char.code c in
+               let n2 = Char.code (s_in.[p_in + k_in + 1]) in
+               if is_java && (n1 = 0x80 && n2 = 0xc0) then
+                 write 0 k_out c_out, 2
+               else begin
+                 if n2 < 128 or n2 > 191 then raise Malformed_code;
+                 let p = ((n1 land 0b11111) lsl 6) lor (n2 land 0b111111) in
+                 if p < 128 then raise Malformed_code;
+                 write p k_out c_out, 2
+               end
+             end
+         | ('\224'..'\239' as c) ->
+             if k_in + 2 >= l_in then
+               -1, 0
+             else begin
+               let n1 = Char.code c in
+               let n2 = Char.code (s_in.[p_in + k_in + 1]) in
+               let n3 = Char.code (s_in.[p_in + k_in + 2]) in
+               if n2 < 128 or n2 > 191 then raise Malformed_code;
+               if n3 < 128 or n3 > 191 then raise Malformed_code;
+               let p =
+                 ((n1 land 0b1111) lsl 12) lor
+                 ((n2 land 0b111111) lsl 6) lor
+                 (n3 land 0b111111)
+               in
+               if p < 0x800 then raise Malformed_code;
+               if (p >= 0xd800 && p < 0xe000) then
+                 (* Surrogate pairs are not supported in UTF-8 *)
+                 raise Malformed_code;
+               if (p >= 0xfffe && p <= 0xffff) then
+                 raise Malformed_code;
+               write p k_out c_out, 3
+             end
+         | ('\240'..'\247' as c) ->
+             if k_in + 3 >= l_in then
+               -1, 0
+             else begin
+               let n1 = Char.code c in
+               let n2 = Char.code (s_in.[p_in + k_in + 1]) in
+               let n3 = Char.code (s_in.[p_in + k_in + 2]) in
+               let n4 = Char.code (s_in.[p_in + k_in + 3]) in
+               if n2 < 128 or n2 > 191 then raise Malformed_code;
+               if n3 < 128 or n3 > 191 then raise Malformed_code;
+               if n4 < 128 or n4 > 191 then raise Malformed_code;
+               let p = ((n1 land 0b111) lsl 18) lor
+                       ((n2 land 0b111111) lsl 12) lor
+                       ((n3 land 0b111111) lsl 6) lor
+                       (n4 land 0b111111)
+               in
+               if p < 0x10000 then raise Malformed_code;
+               if p >= 0x110000 then
+                 (* These code points are not supported. *)
+                 raise Malformed_code;
+               write p k_out c_out, 4
+             end
+         | _ ->
+             (* Outside the valid range of XML characters *)
+             raise Malformed_code;
+      in
+      (* n_out: number of written bytes; -1 means out buf is full
+       * n_in: number of read bytes; 0 means end of in buf reached
+       * n_in = 0  implies  n_out = -1
+       *)
+      if n_out < 0 then
+       k_in, k_out, `Enc_utf8
+      else
+       scan (k_in + n_in) (k_out + n_out) (c_out + 1)
+    end
+    else
+      k_in, k_out, `Enc_utf8
+  in
+  scan 0 0 0
+;;
+
+
+let surrogate_offset = 0x10000 - (0xD800 lsl 10) - 0xDC00;;
+       
+let read_utf16_le k_in_0 write s_in p_in l_in =
+  let rec scan k_in k_out c_out =
+    if k_in + 1 < l_in then begin
+      let p = (Char.code s_in.[p_in + k_in]) lor ((Char.code s_in.[p_in + k_in + 1]) lsl 8) in
+
+      if p >= 0xd800 & p < 0xe000 then begin
+       (* This is a surrogate pair. *)
+       if k_in + 3 < l_in then begin
+         if p <= 0xdbff then begin
+           let q = (Char.code s_in.[p_in + k_in + 2 ]) lor
+                   ((Char.code s_in.[p_in + k_in + 3]) lsl 8) in
+           if q < 0xdc00 or q > 0xdfff then raise Malformed_code;
+           let eff_p = (p lsl 10) + q + surrogate_offset in
+           let n = write eff_p k_out c_out in
+           if n < 0 then
+             k_in, k_out, `Enc_utf16_le
+           else
+             scan (k_in + 4) (k_out + n) (c_out + 1)
+         end
+         else
+           (* Malformed pair: *)
+           raise Malformed_code;
+       end
+       else 
+         (* Incomplete pair: *)
+         k_in, k_out, `Enc_utf16_le
+      end
+
+      else
+       if p = 0xfffe then 
+         (* Big endian byte order mark: It is illegal here *)
+         raise Malformed_code
+       else begin
+         (* A regular code point *)
+         let n = write p k_out c_out in
+         if n < 0 then
+           k_in, k_out, `Enc_utf16_le
+         else
+           scan (k_in + 2) (k_out + n) (c_out + 1)
+       end
+    end
+    else
+      (* Incomplete character: *)
+      k_in, k_out, `Enc_utf16_le
+  in
+  scan k_in_0 0 0
+;;
+
+
+let read_utf16_be k_in_0 write s_in p_in l_in =
+  let rec scan k_in k_out c_out =
+    if k_in + 1 < l_in then begin
+      let p = (Char.code s_in.[p_in + k_in + 1]) lor ((Char.code s_in.[p_in + k_in]) lsl 8) in
+
+      if p >= 0xd800 & p < 0xe000 then begin
+       (* This is a surrogate pair. *)
+       if k_in + 3 < l_in then begin
+         if p <= 0xdbff then begin
+           let q = (Char.code s_in.[p_in + k_in + 3 ]) lor
+                   ((Char.code s_in.[p_in + k_in + 2]) lsl 8) in
+           if q < 0xdc00 or q > 0xdfff then raise Malformed_code;
+           let eff_p = (p lsl 10) + q + surrogate_offset in
+           let n = write eff_p k_out c_out in
+           if n < 0 then
+             k_in, k_out, `Enc_utf16_be
+           else
+             scan (k_in + 4) (k_out + n) (c_out + 1)
+         end
+         else
+           (* Malformed pair: *)
+           raise Malformed_code;
+       end
+       else 
+         (* Incomplete pair: *)
+         k_in, k_out, `Enc_utf16_be
+      end
+
+      else
+       if p = 0xfffe then
+         (* Little endian byte order mark: It is illegal here *)
+         raise Malformed_code
+       else begin
+         (* A regular code point *)
+         let n = write p k_out c_out in
+         if n < 0 then
+           k_in, k_out, `Enc_utf16_be
+         else
+           scan (k_in + 2) (k_out + n) (c_out + 1)
+       end
+
+    end
+    else
+      (* Incomplete character: *)
+      k_in, k_out, `Enc_utf16_be
+  in
+  scan k_in_0 0 0
+;;
+
+
+let read_utf16 write s_in p_in l_in =
+  (* Expect a BOM at the beginning of the text *)
+  if l_in >= 2 then begin
+    let c0 = s_in.[p_in + 0] in
+    let c1 = s_in.[p_in + 1] in
+    if c0 = '\254' & c1 = '\255' then begin
+      (* 0xfeff as big endian *)
+      read_utf16_be 2 write s_in p_in l_in
+    end
+    else 
+      if c0 = '\255' & c1 = '\254' then begin
+       (* 0xfeff as little endian *)
+       read_utf16_le 2 write s_in p_in l_in
+      end
+      else
+       (* byte order mark missing *)
+       raise Malformed_code
+  end
+  else
+    0, 0, `Enc_utf16
+;;
+
+
+let write_iso88591 s_out p_out l_out max_chars w p k_out c_out =
+  if k_out < l_out && c_out < max_chars then begin
+    if p > 255 then begin
+      let subst = w p in
+      let l_subst =  String.length subst in
+      if k_out + l_subst <= l_out then begin
+       (* Enough space to store 'subst': *)
+       String.blit subst 0 s_out (k_out+p_out) l_subst;
+       l_subst
+      end
+      else
+       (* Not enough space: Stop this round of recoding *)
+       -1
+    end
+    else begin
+      s_out.[p_out + k_out] <- Char.chr p;
+      1
+    end
+  end
+  else
+    -1   (* End-of-buffer indicator *)
+;;
+
+
+let write_usascii s_out p_out l_out max_chars w p k_out c_out =
+  if k_out < l_out && c_out < max_chars then begin
+    if p > 127 then begin
+      let subst = w p in
+      let l_subst =  String.length subst in
+      if k_out + l_subst <= l_out then begin
+       (* Enough space to store 'subst': *)
+       String.blit subst 0 s_out (k_out+p_out) l_subst;
+       l_subst
+      end
+      else
+       (* Not enough space: Stop this round of recoding *)
+       -1
+    end
+    else begin
+      s_out.[p_out + k_out] <- Char.chr p;
+      1
+    end
+  end
+  else
+    -1   (* End-of-buffer indicator *)
+;;
+
+
+let write_8bit from_unicode s_out p_out l_out max_chars w p k_out c_out =
+  if k_out < l_out && c_out < max_chars then begin
+    let p' =
+      match Array.unsafe_get from_unicode (p land 255) with
+         Netmappings.U_nil -> -1
+       | Netmappings.U_single (p0,q0) ->
+           if p0 = p then q0 else -1
+       | Netmappings.U_list l ->
+           (try List.assoc p l with Not_found -> -1)
+    in
+    if p' < 0 then begin
+      let subst = w p in
+      let l_subst =  String.length subst in
+      if k_out + l_subst <= l_out then begin
+       (* Enough space to store 'subst': *)
+       String.blit subst 0 s_out (k_out+p_out) l_subst;
+       l_subst
+      end
+      else
+       (* Not enough space: Stop this round of recoding *)
+       -1
+    end
+    else begin
+      s_out.[p_out + k_out] <- Char.chr p';
+      1
+    end
+  end
+  else
+    -1   (* End-of-buffer indicator *)
+;;
+
+
+let write_utf8 is_java s_out p_out l_out max_chars w p k_out c_out =
+  if p <= 127 && (not is_java || p <> 0) then begin
+    if k_out < l_out && c_out < max_chars then begin
+      s_out.[p_out + k_out] <- Char.chr p;
+      1
+    end
+    else -1
+  end
+  else if p <= 0x7ff then begin
+    if k_out + 1 < l_out && c_out < max_chars then begin
+      s_out.[p_out + k_out]     <- Char.chr (0xc0 lor (p lsr 6));
+      s_out.[p_out + k_out + 1] <- Char.chr (0x80 lor (p land 0x3f));
+      2
+    end
+    else -1
+  end
+  else if p <= 0xffff then begin
+    (* Refuse writing surrogate pairs, and fffe, ffff *)
+    if (p >= 0xd800 & p < 0xe000) or (p >= 0xfffe) then
+      failwith "Netconversion.write_utf8";
+    if k_out + 2 < l_out && c_out < max_chars then begin
+      s_out.[p_out + k_out]     <- Char.chr (0xe0 lor (p lsr 12));
+      s_out.[p_out + k_out + 1] <- Char.chr (0x80 lor ((p lsr 6) land 0x3f));
+      s_out.[p_out + k_out + 2] <- Char.chr (0x80 lor (p land 0x3f));
+      3
+    end
+    else -1
+  end
+  else if p <= 0x10ffff then begin
+    if k_out + 3 < l_out && c_out < max_chars then begin
+      s_out.[p_out + k_out]     <- Char.chr (0xf0 lor (p lsr 18));
+      s_out.[p_out + k_out + 1] <- Char.chr (0x80 lor ((p lsr 12) land 0x3f));
+      s_out.[p_out + k_out + 2] <- Char.chr (0x80 lor ((p lsr 6)  land 0x3f));
+      s_out.[p_out + k_out + 3] <- Char.chr (0x80 lor (p land 0x3f));
+      4
+    end
+    else -1
+  end
+  else
+    (* Higher code points are not possible in XML: *)
+    failwith "Netconversion.write_utf8"
+;;
+
+
+let write_utf16_le s_out p_out l_out max_chars w p k_out c_out =
+  if p >= 0xfffe then begin
+    if p <= 0xffff or p > 0x10ffff then failwith "Netconversion.write_utf16_le";
+    (* Must be written as surrogate pair *)
+    if k_out + 3 < l_out && c_out < max_chars then begin
+      let high = (p lsr 10) + 0xd800 in
+      let low  = (p land 0x3ff) + 0xdc00 in
+      s_out.[p_out + k_out    ] <- Char.chr (high land 0xff);
+      s_out.[p_out + k_out + 1] <- Char.chr (high lsr 8);
+      s_out.[p_out + k_out + 2] <- Char.chr (low land 0xff);
+      s_out.[p_out + k_out + 3] <- Char.chr (low lsr 8);
+      4
+    end
+    else -1
+  end
+  else begin
+    if k_out + 1 < l_out && c_out < max_chars then begin
+      s_out.[p_out + k_out    ] <- Char.chr (p land 0xff);
+      s_out.[p_out + k_out + 1] <- Char.chr (p lsr 8);
+      2
+    end
+    else
+      -1
+  end
+;;
+
+
+let write_utf16_be s_out p_out l_out max_chars w p k_out c_out =
+  if p >= 0xfffe then begin
+    if p <= 0xffff or p > 0x10ffff then failwith "Netconversion.write_utf16_be";
+    (* Must be written as surrogate pair *)
+    if k_out + 3 < l_out && c_out < max_chars then begin
+      let high = (p lsr 10) + 0xd800 in
+      let low  = (p land 0x3ff) + 0xdc00 in
+      s_out.[p_out + k_out + 1] <- Char.chr (high land 0xff);
+      s_out.[p_out + k_out    ] <- Char.chr (high lsr 8);
+      s_out.[p_out + k_out + 3] <- Char.chr (low land 0xff);
+      s_out.[p_out + k_out + 2] <- Char.chr (low lsr 8);
+      4
+    end
+    else -1
+  end
+  else begin
+    if k_out + 1 < l_out && c_out < max_chars then begin
+      s_out.[p_out + k_out + 1] <- Char.chr (p land 0xff);
+      s_out.[p_out + k_out    ] <- Char.chr (p lsr 8);
+      2
+    end
+    else
+      -1
+  end
+;;
+
+
+let recode ~in_enc
+           ~in_buf
+          ~in_pos
+          ~in_len
+          ~out_enc
+          ~out_buf
+           ~out_pos
+          ~out_len
+          ~max_chars
+          ~subst =
+  if (in_pos < 0  || in_len < 0  || in_pos  + in_len  > String.length in_buf ||
+      out_pos < 0 || out_len < 0 || out_pos + out_len > String.length out_buf)
+  then
+    invalid_arg "Netconversion.recode";
+
+  let reader =
+    match in_enc with
+       `Enc_iso88591 -> read_iso88591
+      | `Enc_usascii  -> read_usascii
+      | `Enc_utf8     -> read_utf8 false
+      | `Enc_java     -> read_utf8 true
+      | `Enc_utf16    -> read_utf16
+      | `Enc_utf16_le -> read_utf16_le 0
+      | `Enc_utf16_be -> read_utf16_be 0
+      | _             -> 
+         (try
+            let to_unicode' = Hashtbl.find Netmappings.to_unicode in_enc in
+            let to_unicode =
+              Netmappings.lock();
+              Lazy.force to_unicode' in
+            Netmappings.unlock();
+            read_8bit to_unicode in_enc
+          with
+              Not_found ->
+                failwith("Support for the encoding `" ^
+                         string_of_encoding in_enc ^ 
+                         "' has not been compiled into Netstring")
+         )
+  in
+  let writer =
+    match out_enc with
+       `Enc_iso88591 -> write_iso88591  out_buf out_pos out_len max_chars subst
+      | `Enc_usascii  -> write_usascii   out_buf out_pos out_len max_chars subst
+      | `Enc_utf8     -> write_utf8 false 
+                                         out_buf out_pos out_len max_chars subst
+      | `Enc_java     -> write_utf8 true out_buf out_pos out_len max_chars subst
+      | `Enc_utf16    -> failwith "Netconversion.recode"
+      | `Enc_utf16_le -> write_utf16_le  out_buf out_pos out_len max_chars subst
+      | `Enc_utf16_be -> write_utf16_be  out_buf out_pos out_len max_chars subst
+      | _             -> 
+         (try
+            let from_unicode' = Hashtbl.find Netmappings.from_unicode out_enc 
+            in
+            let from_unicode =
+              Netmappings.lock();
+              Lazy.force from_unicode' in
+            Netmappings.unlock();
+            write_8bit from_unicode out_buf out_pos out_len max_chars subst
+          with
+              Not_found ->
+                failwith("Support for the encoding `" ^
+                         string_of_encoding out_enc ^ 
+                         "' has not been compiled into Netstring")
+         )
+  in
+  reader writer in_buf in_pos in_len
+;;
+
+
+let makechar enc p =
+  match enc with
+      `Enc_iso88591 -> 
+       if p > 255 then raise Not_found;
+       String.make 1 (Char.chr p)
+    | `Enc_usascii ->
+       if p > 127 then raise Not_found;
+       String.make 1 (Char.chr p)
+    | `Enc_utf8 ->
+       let s = String.create 4 in
+       let n = write_utf8 false s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
+       String.sub s 0 n
+    | `Enc_java ->
+       let s = String.create 4 in
+       let n = write_utf8 true s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
+       String.sub s 0 n
+    | `Enc_utf16_le ->
+       let s = String.create 4 in
+       let n = write_utf16_le s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
+       String.sub s 0 n
+    | `Enc_utf16_be ->
+       let s = String.create 4 in
+       let n = write_utf16_be s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
+       String.sub s 0 n
+    | `Enc_utf16 ->
+       failwith "Netconversion.makechar"
+    | _ ->
+       let s = String.create 1 in
+       let from_unicode' = 
+         try
+           Hashtbl.find Netmappings.from_unicode enc 
+         with
+             Not_found ->
+               failwith("Support for the encoding `" ^
+                        string_of_encoding enc ^ 
+                        "' has not been compiled into Netstring")
+       in
+       let from_unicode =
+         Netmappings.lock();
+         Lazy.force from_unicode' in
+       Netmappings.unlock();
+       let n =
+         write_8bit from_unicode s 0 1 1 (fun _ -> raise Not_found) p 0 0 in
+       s
+;;
+
+
+let recode_string ~in_enc ~out_enc ?(subst = (fun _ -> raise Not_found)) s =
+
+  let length = String.length s in
+  let size = 1024 in
+  let out_buf = String.create size in
+
+  let rec recode_loop k s_done in_enc =
+    (* 'k' bytes of 's' have already been processed, and the result is in
+     * 's_done'.
+     *)
+    (* Recode to 'out_buf': *)
+    let in_len = length - k in
+    let in_done, out_done, in_enc' =
+      recode ~in_enc:in_enc   ~in_buf:s        ~in_pos:k     ~in_len:in_len
+             ~out_enc:out_enc ~out_buf:out_buf ~out_pos:0    ~out_len:size  
+             ~max_chars:size  ~subst:subst in
+    (* Collect the results: *)
+    let k' = k + in_done in
+    let s_done' = String.sub out_buf 0 out_done :: s_done in
+    (* Still something to do? *)
+    if k' < length then
+      recode_loop k' s_done' in_enc'
+    else
+      (* No: Concatenate s_done' to get the final result. *)
+      String.concat "" (List.rev s_done')
+  in
+
+  recode_loop 0 [] in_enc
+;;
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:28  lpadovan
+ * Initial revision
+ *
+ * Revision 1.2  2000/08/29 00:46:41  gerd
+ *     New type for the Unicode to 8 bit translation table.
+ *     The Netmappings tables are now Lazy.t.
+ *
+ * Revision 1.1  2000/08/13 00:02:57  gerd
+ *     Initial revision.
+ *
+ *
+ * ======================================================================
+ * OLD LOGS FROM THE PXP PACKAGE (FILE NAME pxp_encoding.ml):
+ * 
+ * Revision 1.5  2000/07/27 00:41:14  gerd
+ *     new 8 bit codes
+ *
+ * Revision 1.4  2000/07/04 22:11:41  gerd
+ *     Implemented the enhancements and extensions of
+ * rev. 1.4 of pxp_encoding.mli.
+ *
+ * Revision 1.3  2000/05/29 23:48:38  gerd
+ *     Changed module names:
+ *             Markup_aux          into Pxp_aux
+ *             Markup_codewriter   into Pxp_codewriter
+ *             Markup_document     into Pxp_document
+ *             Markup_dtd          into Pxp_dtd
+ *             Markup_entity       into Pxp_entity
+ *             Markup_lexer_types  into Pxp_lexer_types
+ *             Markup_reader       into Pxp_reader
+ *             Markup_types        into Pxp_types
+ *             Markup_yacc         into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * Revision 1.2  2000/05/29 21:14:57  gerd
+ *     Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.1  2000/05/20 20:30:50  gerd
+ *     Initial revision.
+ *
+ * 
+ *)