X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2FDEVEL%2Fpxp%2Fnetstring%2Fnetconversion.ml;fp=helm%2FDEVEL%2Fpxp%2Fnetstring%2Fnetconversion.ml;h=e740654ade9805b86902a499bc6a2838f304f801;hb=c03d2c1fdab8d228cb88aaba5ca0f556318bebc5;hp=0000000000000000000000000000000000000000;hpb=758057e85325f94cd88583feb1fdf6b038e35055;p=helm.git

diff --git a/helm/DEVEL/pxp/netstring/netconversion.ml b/helm/DEVEL/pxp/netstring/netconversion.ml
new file mode 100644
index 000000000..e740654ad
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/netconversion.ml
@@ -0,0 +1,864 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+exception Malformed_code
+
+
+type encoding =
+  [  `Enc_utf8       (* UTF-8 *)
+  |  `Enc_java
+  |  `Enc_utf16      (* UTF-16 with unspecified endianess (restricted usage) *)
+  |  `Enc_utf16_le   (* UTF-16 little endian *)
+  |  `Enc_utf16_be   (* UTF-16 big endian *)
+  |  `Enc_usascii    (* US-ASCII (only 7 bit) *)
+  |  `Enc_iso88591   (* ISO-8859-1 *)
+  |  `Enc_iso88592   (* ISO-8859-2 *)
+  |  `Enc_iso88593   (* ISO-8859-3 *)
+  |  `Enc_iso88594   (* ISO-8859-4 *)
+  |  `Enc_iso88595   (* ISO-8859-5 *)
+  |  `Enc_iso88596   (* ISO-8859-6 *)
+  |  `Enc_iso88597   (* ISO-8859-7 *)
+  |  `Enc_iso88598   (* ISO-8859-8 *)
+  |  `Enc_iso88599   (* ISO-8859-9 *)
+  |  `Enc_iso885910  (* ISO-8859-10 *)
+  |  `Enc_iso885913  (* ISO-8859-13 *)
+  |  `Enc_iso885914  (* ISO-8859-14 *)
+  |  `Enc_iso885915  (* ISO-8859-15 *)
+  |  `Enc_koi8r      (* KOI8-R *)
+  |  `Enc_jis0201    (* JIS-0201 *)
+    (* Microsoft: *)
+  |  `Enc_windows1250  (* WINDOWS-1250 *)
+  |  `Enc_windows1251  (* WINDOWS-1251 *)
+  |  `Enc_windows1252  (* WINDOWS-1252 *)
+  |  `Enc_windows1253  (* WINDOWS-1253 *)
+  |  `Enc_windows1254  (* WINDOWS-1254 *)
+  |  `Enc_windows1255  (* WINDOWS-1255 *)
+  |  `Enc_windows1256  (* WINDOWS-1256 *)
+  |  `Enc_windows1257  (* WINDOWS-1257 *)
+  |  `Enc_windows1258  (* WINDOWS-1258 *)
+    (* IBM, ASCII-based: *)
+  |  `Enc_cp437
+  |  `Enc_cp737
+  |  `Enc_cp775
+  |  `Enc_cp850
+  |  `Enc_cp852
+  |  `Enc_cp855
+  |  `Enc_cp856
+  |  `Enc_cp857
+  |  `Enc_cp860
+  |  `Enc_cp861
+  |  `Enc_cp862
+  |  `Enc_cp863
+  |  `Enc_cp864
+  |  `Enc_cp865
+  |  `Enc_cp866
+  |  `Enc_cp869
+  |  `Enc_cp874
+  |  `Enc_cp1006
+   (* IBM, EBCDIC-based: *)
+  |  `Enc_cp037
+  |  `Enc_cp424
+  |  `Enc_cp500
+  |  `Enc_cp875
+  |  `Enc_cp1026
+   (* Adobe: *)
+  |  `Enc_adobe_standard_encoding
+  |  `Enc_adobe_symbol_encoding
+  |  `Enc_adobe_zapf_dingbats_encoding
+   (* Apple: *)
+  |  `Enc_macroman
+
+  ]
+;;
+
+
+let norm_enc_name e =
+  (* Removes some characters from e; uppercase *)
+  let e' = String.create (String.length e) in
+  let rec next i j =
+    if i < String.length e then
+      match e.[i] with
+	  ('-'|'_'|'.') -> next (i+1) j
+	| c             -> e'.[j] <- c; next (i+1) (j+1)
+    else
+      j
+  in
+  let l = next 0 0 in
+  String.uppercase(String.sub e' 0 l)
+;;
+
+
+let encoding_of_string e =
+  match norm_enc_name e with
+      ("UTF16"|"UCS2"|"ISO10646UCS2")                 -> `Enc_utf16
+    | "UTF16BE"                                       -> `Enc_utf16_be
+    | "UTF16LE"                                       -> `Enc_utf16_le
+    | "UTF8"                                          -> `Enc_utf8
+    | ("UTF8JAVA"|"JAVA")                             -> `Enc_java
+    | ("USASCII"|"ASCII"|"ISO646US"|"IBM367"|"CP367") -> `Enc_usascii
+    | ("ISO88591"|"LATIN1"|"IBM819"|"CP819")          -> `Enc_iso88591
+    | ("ISO88592"|"LATIN2")                           -> `Enc_iso88592
+    | ("ISO88593"|"LATIN3")                           -> `Enc_iso88593
+    | ("ISO88594"|"LATIN4")                           -> `Enc_iso88594
+    | ("ISO88595"|"CYRILLIC")                         -> `Enc_iso88595
+    | ("ISO88596"|"ARABIC"|"ECMA114"|"ASMO708")       -> `Enc_iso88596
+    | ("ISO88597"|"GREEK"|"GREEK8"|"ELOT928"|"ECMA118") -> `Enc_iso88597
+    | ("ISO88598"|"HEBREW")                           -> `Enc_iso88598
+    | ("ISO88599"|"LATIN5")                           -> `Enc_iso88599
+    | ("ISO885910"|"LATIN6")                          -> `Enc_iso885910
+    | "ISO885913"                                     -> `Enc_iso885913
+    | "ISO885914"                                     -> `Enc_iso885914
+    | "ISO885915"                                     -> `Enc_iso885915
+    | "KOI8R"                                         -> `Enc_koi8r
+    | ("JIS0201"|"JISX0201"|"X0201")                  -> `Enc_jis0201
+
+    | "WINDOWS1250"                                   -> `Enc_windows1250
+    | "WINDOWS1251"                                   -> `Enc_windows1251
+    | "WINDOWS1252"                                   -> `Enc_windows1252
+    | "WINDOWS1253"                                   -> `Enc_windows1253
+    | "WINDOWS1254"                                   -> `Enc_windows1254
+    | "WINDOWS1255"                                   -> `Enc_windows1255
+    | "WINDOWS1256"                                   -> `Enc_windows1256
+    | "WINDOWS1257"                                   -> `Enc_windows1257
+    | "WINDOWS1258"                                   -> `Enc_windows1258
+
+    | ("CP437"|"IBM437")                              -> `Enc_cp437
+    | ("CP737"|"IBM737")                              -> `Enc_cp737
+    | ("CP775"|"IBM775")                              -> `Enc_cp775
+    | ("CP850"|"IBM850")                              -> `Enc_cp850
+    | ("CP852"|"IBM852")                              -> `Enc_cp852
+    | ("CP855"|"IBM855")                              -> `Enc_cp855
+    | ("CP856"|"IBM856")                              -> `Enc_cp856
+    | ("CP857"|"IBM857")                              -> `Enc_cp857
+    | ("CP860"|"IBM860")                              -> `Enc_cp860
+    | ("CP861"|"IBM861")                              -> `Enc_cp861
+    | ("CP862"|"IBM862")                              -> `Enc_cp862
+    | ("CP863"|"IBM863")                              -> `Enc_cp863
+    | ("CP864"|"IBM864")                              -> `Enc_cp864
+    | ("CP865"|"IBM865")                              -> `Enc_cp865
+    | ("CP866"|"IBM866")                              -> `Enc_cp866
+    | ("CP869"|"IBM869")                              -> `Enc_cp869
+    | ("CP874"|"IBM874")                              -> `Enc_cp874
+    | ("CP1006"|"IBM1006")                            -> `Enc_cp1006
+
+    | ("CP037"|"IBM037"|"EBCDICCPUS"|"EBCDICCPCA"|"EBCDICCPWT"|
+       "EBCDICCPNL")                                  -> `Enc_cp037
+    | ("CP424"|"IBM424"|"EBCDICCPHE")                 -> `Enc_cp424
+    | ("CP500"|"IBM500"|"EBCDICCPBE"|"EBCDICCPCH")    -> `Enc_cp500
+    | ("CP875"|"IBM875")                              -> `Enc_cp875
+    | ("CP1026"|"IBM1026")                            -> `Enc_cp1026
+
+    | "ADOBESTANDARDENCODING"       -> `Enc_adobe_standard_encoding
+    | "ADOBESYMBOLENCODING"         -> `Enc_adobe_symbol_encoding
+    | "ADOBEZAPFDINGBATSENCODING"   -> `Enc_adobe_zapf_dingbats_encoding
+
+    | "MACINTOSH"                   -> `Enc_macroman
+
+    | _ ->
+	failwith "Netconversion.encoding_of_string: unknown encoding"
+;;
+
+
+let string_of_encoding (e : encoding) =
+  (* If there is a "preferred MIME name", this name is returned (see IANA). *)
+  match e with
+      `Enc_utf16    -> "UTF-16"
+    | `Enc_utf16_be -> "UTF-16-BE"
+    | `Enc_utf16_le -> "UTF-16-LE"
+    | `Enc_utf8     -> "UTF-8"
+    | `Enc_java     -> "UTF-8-JAVA"
+    | `Enc_usascii  -> "US-ASCII"
+    | `Enc_iso88591 -> "ISO-8859-1"
+    | `Enc_iso88592 -> "ISO-8859-2"
+    | `Enc_iso88593 -> "ISO-8859-3"
+    | `Enc_iso88594 -> "ISO-8859-4"
+    | `Enc_iso88595 -> "ISO-8859-5"
+    | `Enc_iso88596 -> "ISO-8859-6"
+    | `Enc_iso88597 -> "ISO-8859-7"
+    | `Enc_iso88598 -> "ISO-8859-8"
+    | `Enc_iso88599 -> "ISO-8859-9"
+    | `Enc_iso885910 -> "ISO-8859-10"
+    | `Enc_iso885913 -> "ISO-8859-13"
+    | `Enc_iso885914 -> "ISO-8859-14"
+    | `Enc_iso885915 -> "ISO-8859-15"
+    | `Enc_koi8r     -> "KOI8-R"
+    | `Enc_jis0201   -> "JIS_X0201"
+    | `Enc_windows1250 -> "WINDOWS-1250"
+    | `Enc_windows1251 -> "WINDOWS-1251"
+    | `Enc_windows1252 -> "WINDOWS-1252"
+    | `Enc_windows1253 -> "WINDOWS-1253"
+    | `Enc_windows1254 -> "WINDOWS-1254"
+    | `Enc_windows1255 -> "WINDOWS-1255"
+    | `Enc_windows1256 -> "WINDOWS-1256"
+    | `Enc_windows1257 -> "WINDOWS-1257"
+    | `Enc_windows1258 -> "WINDOWS-1258"
+    | `Enc_cp437   -> "CP437"
+    | `Enc_cp737   -> "CP737"
+    | `Enc_cp775   -> "CP775"
+    | `Enc_cp850   -> "CP850"
+    | `Enc_cp852   -> "CP852"
+    | `Enc_cp855   -> "CP855"
+    | `Enc_cp856   -> "CP856"
+    | `Enc_cp857   -> "CP857"
+    | `Enc_cp860   -> "CP860"
+    | `Enc_cp861   -> "CP861"
+    | `Enc_cp862   -> "CP862"
+    | `Enc_cp863   -> "CP863"
+    | `Enc_cp864   -> "CP864"
+    | `Enc_cp865   -> "CP865"
+    | `Enc_cp866   -> "CP866"
+    | `Enc_cp869   -> "CP869"
+    | `Enc_cp874   -> "CP874"
+    | `Enc_cp1006  -> "CP1006"
+    | `Enc_cp037   -> "CP037"
+    | `Enc_cp424   -> "CP424"
+    | `Enc_cp500   -> "CP500"
+    | `Enc_cp875   -> "CP875"
+    | `Enc_cp1026  -> "CP1026"
+    | `Enc_adobe_standard_encoding      -> "ADOBE-STANDARD-ENCODING"
+    | `Enc_adobe_symbol_encoding        -> "ADOBE-SYMBOL-ENCODING"
+    | `Enc_adobe_zapf_dingbats_encoding -> "ADOBE-ZAPF-DINGBATS-ENCODING"
+    | `Enc_macroman                     -> "MACINTOSH"
+;;
+
+
+let read_iso88591 write s_in p_in l_in =
+  let rec scan k_in k_out c_out =
+    if k_in < l_in then begin
+      let p = Char.code s_in.[p_in + k_in] in
+      let n = write p k_out c_out in
+      if n < 0 then
+	k_in, k_out, `Enc_iso88591
+      else
+	scan (k_in + 1) (k_out + n) (c_out + 1)
+    end
+    else
+      k_in, k_out, `Enc_iso88591
+  in
+  scan 0 0 0
+;;
+
+
+let read_usascii write s_in p_in l_in =
+  let rec scan k_in k_out c_out =
+    if k_in < l_in then begin
+      let p = Char.code s_in.[p_in + k_in] in
+      if p >= 0x80 then raise Malformed_code;
+      let n = write p k_out c_out in
+      if n < 0 then
+	k_in, k_out, `Enc_usascii
+      else
+	scan (k_in + 1) (k_out + n) (c_out + 1)
+    end
+    else
+      k_in, k_out, `Enc_usascii
+  in
+  scan 0 0 0
+;;
+
+
+let read_8bit m_to_unicode enc write s_in p_in l_in =
+  let rec scan k_in k_out c_out =
+    if k_in < l_in then begin
+      let p_local = Char.code s_in.[p_in + k_in] in
+      let p_uni = Array.unsafe_get m_to_unicode p_local in
+      if p_uni < 0 then raise Malformed_code;
+      let n = write p_uni k_out c_out in
+      if n < 0 then
+	k_in, k_out, enc
+      else
+	scan (k_in + 1) (k_out + n) (c_out + 1)
+    end
+    else
+      k_in, k_out, enc
+  in
+  scan 0 0 0
+;;
+
+
+let read_utf8 is_java write s_in p_in l_in =
+  let rec scan k_in k_out c_out =
+    if k_in < l_in then begin
+      let n_out, n_in =
+	match s_in.[p_in + k_in] with
+	    '\000' ->
+	      if is_java then raise Malformed_code;
+	      write 0 k_out c_out, 1
+	  | ('\001'..'\127' as c) ->
+	      write (Char.code c) k_out c_out, 1
+	  | ('\128'..'\223' as c) ->
+	      if k_in + 1 >= l_in then
+		-1, 0
+	      else begin
+		let n1 = Char.code c in
+		let n2 = Char.code (s_in.[p_in + k_in + 1]) in
+		if is_java && (n1 = 0x80 && n2 = 0xc0) then
+		  write 0 k_out c_out, 2
+		else begin
+		  if n2 < 128 or n2 > 191 then raise Malformed_code;
+		  let p = ((n1 land 0b11111) lsl 6) lor (n2 land 0b111111) in
+		  if p < 128 then raise Malformed_code;
+		  write p k_out c_out, 2
+		end
+	      end
+	  | ('\224'..'\239' as c) ->
+	      if k_in + 2 >= l_in then
+		-1, 0
+	      else begin
+		let n1 = Char.code c in
+		let n2 = Char.code (s_in.[p_in + k_in + 1]) in
+		let n3 = Char.code (s_in.[p_in + k_in + 2]) in
+		if n2 < 128 or n2 > 191 then raise Malformed_code;
+		if n3 < 128 or n3 > 191 then raise Malformed_code;
+		let p =
+		  ((n1 land 0b1111) lsl 12) lor
+		  ((n2 land 0b111111) lsl 6) lor
+		  (n3 land 0b111111)
+		in
+		if p < 0x800 then raise Malformed_code;
+		if (p >= 0xd800 && p < 0xe000) then
+		  (* Surrogate pairs are not supported in UTF-8 *)
+		  raise Malformed_code;
+		if (p >= 0xfffe && p <= 0xffff) then
+		  raise Malformed_code;
+		write p k_out c_out, 3
+	      end
+	  | ('\240'..'\247' as c) ->
+	      if k_in + 3 >= l_in then
+		-1, 0
+	      else begin
+		let n1 = Char.code c in
+		let n2 = Char.code (s_in.[p_in + k_in + 1]) in
+		let n3 = Char.code (s_in.[p_in + k_in + 2]) in
+		let n4 = Char.code (s_in.[p_in + k_in + 3]) in
+		if n2 < 128 or n2 > 191 then raise Malformed_code;
+		if n3 < 128 or n3 > 191 then raise Malformed_code;
+		if n4 < 128 or n4 > 191 then raise Malformed_code;
+		let p = ((n1 land 0b111) lsl 18) lor
+			((n2 land 0b111111) lsl 12) lor
+			((n3 land 0b111111) lsl 6) lor
+			(n4 land 0b111111)
+		in
+		if p < 0x10000 then raise Malformed_code;
+		if p >= 0x110000 then
+		  (* These code points are not supported. *)
+		  raise Malformed_code;
+		write p k_out c_out, 4
+	      end
+	  | _ ->
+	      (* Outside the valid range of XML characters *)
+	      raise Malformed_code;
+      in
+      (* n_out: number of written bytes; -1 means out buf is full
+       * n_in: number of read bytes; 0 means end of in buf reached
+       * n_in = 0  implies  n_out = -1
+       *)
+      if n_out < 0 then
+	k_in, k_out, `Enc_utf8
+      else
+	scan (k_in + n_in) (k_out + n_out) (c_out + 1)
+    end
+    else
+      k_in, k_out, `Enc_utf8
+  in
+  scan 0 0 0
+;;
+
+
+let surrogate_offset = 0x10000 - (0xD800 lsl 10) - 0xDC00;;
+	
+let read_utf16_le k_in_0 write s_in p_in l_in =
+  let rec scan k_in k_out c_out =
+    if k_in + 1 < l_in then begin
+      let p = (Char.code s_in.[p_in + k_in]) lor ((Char.code s_in.[p_in + k_in + 1]) lsl 8) in
+
+      if p >= 0xd800 & p < 0xe000 then begin
+	(* This is a surrogate pair. *)
+	if k_in + 3 < l_in then begin
+	  if p <= 0xdbff then begin
+	    let q = (Char.code s_in.[p_in + k_in + 2 ]) lor
+		    ((Char.code s_in.[p_in + k_in + 3]) lsl 8) in
+	    if q < 0xdc00 or q > 0xdfff then raise Malformed_code;
+	    let eff_p = (p lsl 10) + q + surrogate_offset in
+	    let n = write eff_p k_out c_out in
+	    if n < 0 then
+	      k_in, k_out, `Enc_utf16_le
+	    else
+	      scan (k_in + 4) (k_out + n) (c_out + 1)
+	  end
+	  else
+	    (* Malformed pair: *)
+	    raise Malformed_code;
+	end
+	else 
+	  (* Incomplete pair: *)
+	  k_in, k_out, `Enc_utf16_le
+      end
+
+      else
+	if p = 0xfffe then 
+	  (* Big endian byte order mark: It is illegal here *)
+	  raise Malformed_code
+	else begin
+	  (* A regular code point *)
+	  let n = write p k_out c_out in
+	  if n < 0 then
+	    k_in, k_out, `Enc_utf16_le
+	  else
+	    scan (k_in + 2) (k_out + n) (c_out + 1)
+	end
+    end
+    else
+      (* Incomplete character: *)
+      k_in, k_out, `Enc_utf16_le
+  in
+  scan k_in_0 0 0
+;;
+
+
+let read_utf16_be k_in_0 write s_in p_in l_in =
+  let rec scan k_in k_out c_out =
+    if k_in + 1 < l_in then begin
+      let p = (Char.code s_in.[p_in + k_in + 1]) lor ((Char.code s_in.[p_in + k_in]) lsl 8) in
+
+      if p >= 0xd800 & p < 0xe000 then begin
+	(* This is a surrogate pair. *)
+	if k_in + 3 < l_in then begin
+	  if p <= 0xdbff then begin
+	    let q = (Char.code s_in.[p_in + k_in + 3 ]) lor
+		    ((Char.code s_in.[p_in + k_in + 2]) lsl 8) in
+	    if q < 0xdc00 or q > 0xdfff then raise Malformed_code;
+	    let eff_p = (p lsl 10) + q + surrogate_offset in
+	    let n = write eff_p k_out c_out in
+	    if n < 0 then
+	      k_in, k_out, `Enc_utf16_be
+	    else
+	      scan (k_in + 4) (k_out + n) (c_out + 1)
+	  end
+	  else
+	    (* Malformed pair: *)
+	    raise Malformed_code;
+	end
+	else 
+	  (* Incomplete pair: *)
+	  k_in, k_out, `Enc_utf16_be
+      end
+
+      else
+	if p = 0xfffe then
+	  (* Little endian byte order mark: It is illegal here *)
+	  raise Malformed_code
+	else begin
+	  (* A regular code point *)
+	  let n = write p k_out c_out in
+	  if n < 0 then
+	    k_in, k_out, `Enc_utf16_be
+	  else
+	    scan (k_in + 2) (k_out + n) (c_out + 1)
+	end
+
+    end
+    else
+      (* Incomplete character: *)
+      k_in, k_out, `Enc_utf16_be
+  in
+  scan k_in_0 0 0
+;;
+
+
+let read_utf16 write s_in p_in l_in =
+  (* Expect a BOM at the beginning of the text *)
+  if l_in >= 2 then begin
+    let c0 = s_in.[p_in + 0] in
+    let c1 = s_in.[p_in + 1] in
+    if c0 = '\254' & c1 = '\255' then begin
+      (* 0xfeff as big endian *)
+      read_utf16_be 2 write s_in p_in l_in
+    end
+    else 
+      if c0 = '\255' & c1 = '\254' then begin
+	(* 0xfeff as little endian *)
+	read_utf16_le 2 write s_in p_in l_in
+      end
+      else
+	(* byte order mark missing *)
+	raise Malformed_code
+  end
+  else
+    0, 0, `Enc_utf16
+;;
+
+
+let write_iso88591 s_out p_out l_out max_chars w p k_out c_out =
+  if k_out < l_out && c_out < max_chars then begin
+    if p > 255 then begin
+      let subst = w p in
+      let l_subst =  String.length subst in
+      if k_out + l_subst <= l_out then begin
+	(* Enough space to store 'subst': *)
+	String.blit subst 0 s_out (k_out+p_out) l_subst;
+	l_subst
+      end
+      else
+	(* Not enough space: Stop this round of recoding *)
+	-1
+    end
+    else begin
+      s_out.[p_out + k_out] <- Char.chr p;
+      1
+    end
+  end
+  else
+    -1   (* End-of-buffer indicator *)
+;;
+
+
+let write_usascii s_out p_out l_out max_chars w p k_out c_out =
+  if k_out < l_out && c_out < max_chars then begin
+    if p > 127 then begin
+      let subst = w p in
+      let l_subst =  String.length subst in
+      if k_out + l_subst <= l_out then begin
+	(* Enough space to store 'subst': *)
+	String.blit subst 0 s_out (k_out+p_out) l_subst;
+	l_subst
+      end
+      else
+	(* Not enough space: Stop this round of recoding *)
+	-1
+    end
+    else begin
+      s_out.[p_out + k_out] <- Char.chr p;
+      1
+    end
+  end
+  else
+    -1   (* End-of-buffer indicator *)
+;;
+
+
+let write_8bit from_unicode s_out p_out l_out max_chars w p k_out c_out =
+  if k_out < l_out && c_out < max_chars then begin
+    let p' =
+      match Array.unsafe_get from_unicode (p land 255) with
+	  Netmappings.U_nil -> -1
+	| Netmappings.U_single (p0,q0) ->
+	    if p0 = p then q0 else -1
+	| Netmappings.U_list l ->
+	    (try List.assoc p l with Not_found -> -1)
+    in
+    if p' < 0 then begin
+      let subst = w p in
+      let l_subst =  String.length subst in
+      if k_out + l_subst <= l_out then begin
+	(* Enough space to store 'subst': *)
+	String.blit subst 0 s_out (k_out+p_out) l_subst;
+	l_subst
+      end
+      else
+	(* Not enough space: Stop this round of recoding *)
+	-1
+    end
+    else begin
+      s_out.[p_out + k_out] <- Char.chr p';
+      1
+    end
+  end
+  else
+    -1   (* End-of-buffer indicator *)
+;;
+
+
+let write_utf8 is_java s_out p_out l_out max_chars w p k_out c_out =
+  if p <= 127 && (not is_java || p <> 0) then begin
+    if k_out < l_out && c_out < max_chars then begin
+      s_out.[p_out + k_out] <- Char.chr p;
+      1
+    end
+    else -1
+  end
+  else if p <= 0x7ff then begin
+    if k_out + 1 < l_out && c_out < max_chars then begin
+      s_out.[p_out + k_out]     <- Char.chr (0xc0 lor (p lsr 6));
+      s_out.[p_out + k_out + 1] <- Char.chr (0x80 lor (p land 0x3f));
+      2
+    end
+    else -1
+  end
+  else if p <= 0xffff then begin
+    (* Refuse writing surrogate pairs, and fffe, ffff *)
+    if (p >= 0xd800 & p < 0xe000) or (p >= 0xfffe) then
+      failwith "Netconversion.write_utf8";
+    if k_out + 2 < l_out && c_out < max_chars then begin
+      s_out.[p_out + k_out]     <- Char.chr (0xe0 lor (p lsr 12));
+      s_out.[p_out + k_out + 1] <- Char.chr (0x80 lor ((p lsr 6) land 0x3f));
+      s_out.[p_out + k_out + 2] <- Char.chr (0x80 lor (p land 0x3f));
+      3
+    end
+    else -1
+  end
+  else if p <= 0x10ffff then begin
+    if k_out + 3 < l_out && c_out < max_chars then begin
+      s_out.[p_out + k_out]     <- Char.chr (0xf0 lor (p lsr 18));
+      s_out.[p_out + k_out + 1] <- Char.chr (0x80 lor ((p lsr 12) land 0x3f));
+      s_out.[p_out + k_out + 2] <- Char.chr (0x80 lor ((p lsr 6)  land 0x3f));
+      s_out.[p_out + k_out + 3] <- Char.chr (0x80 lor (p land 0x3f));
+      4
+    end
+    else -1
+  end
+  else
+    (* Higher code points are not possible in XML: *)
+    failwith "Netconversion.write_utf8"
+;;
+
+
+let write_utf16_le s_out p_out l_out max_chars w p k_out c_out =
+  if p >= 0xfffe then begin
+    if p <= 0xffff or p > 0x10ffff then failwith "Netconversion.write_utf16_le";
+    (* Must be written as surrogate pair *)
+    if k_out + 3 < l_out && c_out < max_chars then begin
+      let high = (p lsr 10) + 0xd800 in
+      let low  = (p land 0x3ff) + 0xdc00 in
+      s_out.[p_out + k_out    ] <- Char.chr (high land 0xff);
+      s_out.[p_out + k_out + 1] <- Char.chr (high lsr 8);
+      s_out.[p_out + k_out + 2] <- Char.chr (low land 0xff);
+      s_out.[p_out + k_out + 3] <- Char.chr (low lsr 8);
+      4
+    end
+    else -1
+  end
+  else begin
+    if k_out + 1 < l_out && c_out < max_chars then begin
+      s_out.[p_out + k_out    ] <- Char.chr (p land 0xff);
+      s_out.[p_out + k_out + 1] <- Char.chr (p lsr 8);
+      2
+    end
+    else
+      -1
+  end
+;;
+
+
+let write_utf16_be s_out p_out l_out max_chars w p k_out c_out =
+  if p >= 0xfffe then begin
+    if p <= 0xffff or p > 0x10ffff then failwith "Netconversion.write_utf16_be";
+    (* Must be written as surrogate pair *)
+    if k_out + 3 < l_out && c_out < max_chars then begin
+      let high = (p lsr 10) + 0xd800 in
+      let low  = (p land 0x3ff) + 0xdc00 in
+      s_out.[p_out + k_out + 1] <- Char.chr (high land 0xff);
+      s_out.[p_out + k_out    ] <- Char.chr (high lsr 8);
+      s_out.[p_out + k_out + 3] <- Char.chr (low land 0xff);
+      s_out.[p_out + k_out + 2] <- Char.chr (low lsr 8);
+      4
+    end
+    else -1
+  end
+  else begin
+    if k_out + 1 < l_out && c_out < max_chars then begin
+      s_out.[p_out + k_out + 1] <- Char.chr (p land 0xff);
+      s_out.[p_out + k_out    ] <- Char.chr (p lsr 8);
+      2
+    end
+    else
+      -1
+  end
+;;
+
+
+let recode ~in_enc
+           ~in_buf
+	   ~in_pos
+	   ~in_len
+	   ~out_enc
+	   ~out_buf
+           ~out_pos
+	   ~out_len
+	   ~max_chars
+	   ~subst =
+  if (in_pos < 0  || in_len < 0  || in_pos  + in_len  > String.length in_buf ||
+      out_pos < 0 || out_len < 0 || out_pos + out_len > String.length out_buf)
+  then
+    invalid_arg "Netconversion.recode";
+
+  let reader =
+    match in_enc with
+	`Enc_iso88591 -> read_iso88591
+      | `Enc_usascii  -> read_usascii
+      | `Enc_utf8     -> read_utf8 false
+      | `Enc_java     -> read_utf8 true
+      | `Enc_utf16    -> read_utf16
+      | `Enc_utf16_le -> read_utf16_le 0
+      | `Enc_utf16_be -> read_utf16_be 0
+      | _             -> 
+	  (try
+	     let to_unicode' = Hashtbl.find Netmappings.to_unicode in_enc in
+	     let to_unicode =
+	       Netmappings.lock();
+	       Lazy.force to_unicode' in
+	     Netmappings.unlock();
+	     read_8bit to_unicode in_enc
+	   with
+	       Not_found ->
+		 failwith("Support for the encoding `" ^
+			  string_of_encoding in_enc ^ 
+			  "' has not been compiled into Netstring")
+	  )
+  in
+  let writer =
+    match out_enc with
+	`Enc_iso88591 -> write_iso88591  out_buf out_pos out_len max_chars subst
+      | `Enc_usascii  -> write_usascii   out_buf out_pos out_len max_chars subst
+      | `Enc_utf8     -> write_utf8 false 
+                                         out_buf out_pos out_len max_chars subst
+      | `Enc_java     -> write_utf8 true out_buf out_pos out_len max_chars subst
+      | `Enc_utf16    -> failwith "Netconversion.recode"
+      | `Enc_utf16_le -> write_utf16_le  out_buf out_pos out_len max_chars subst
+      | `Enc_utf16_be -> write_utf16_be  out_buf out_pos out_len max_chars subst
+      | _             -> 
+	  (try
+	     let from_unicode' = Hashtbl.find Netmappings.from_unicode out_enc 
+	     in
+	     let from_unicode =
+	       Netmappings.lock();
+	       Lazy.force from_unicode' in
+	     Netmappings.unlock();
+	     write_8bit from_unicode out_buf out_pos out_len max_chars subst
+	   with
+	       Not_found ->
+		 failwith("Support for the encoding `" ^
+			  string_of_encoding out_enc ^ 
+			  "' has not been compiled into Netstring")
+	  )
+  in
+  reader writer in_buf in_pos in_len
+;;
+
+
+let makechar enc p =
+  match enc with
+      `Enc_iso88591 -> 
+	if p > 255 then raise Not_found;
+	String.make 1 (Char.chr p)
+    | `Enc_usascii ->
+	if p > 127 then raise Not_found;
+	String.make 1 (Char.chr p)
+    | `Enc_utf8 ->
+	let s = String.create 4 in
+	let n = write_utf8 false s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
+	String.sub s 0 n
+    | `Enc_java ->
+	let s = String.create 4 in
+	let n = write_utf8 true s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
+	String.sub s 0 n
+    | `Enc_utf16_le ->
+	let s = String.create 4 in
+	let n = write_utf16_le s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
+	String.sub s 0 n
+    | `Enc_utf16_be ->
+	let s = String.create 4 in
+	let n = write_utf16_be s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
+	String.sub s 0 n
+    | `Enc_utf16 ->
+	failwith "Netconversion.makechar"
+    | _ ->
+	let s = String.create 1 in
+	let from_unicode' = 
+	  try
+	    Hashtbl.find Netmappings.from_unicode enc 
+	  with
+	      Not_found ->
+		failwith("Support for the encoding `" ^
+			 string_of_encoding enc ^ 
+			 "' has not been compiled into Netstring")
+	in
+	let from_unicode =
+	  Netmappings.lock();
+	  Lazy.force from_unicode' in
+	Netmappings.unlock();
+	let n =
+	  write_8bit from_unicode s 0 1 1 (fun _ -> raise Not_found) p 0 0 in
+	s
+;;
+
+
+let recode_string ~in_enc ~out_enc ?(subst = (fun _ -> raise Not_found)) s =
+
+  let length = String.length s in
+  let size = 1024 in
+  let out_buf = String.create size in
+
+  let rec recode_loop k s_done in_enc =
+    (* 'k' bytes of 's' have already been processed, and the result is in
+     * 's_done'.
+     *)
+    (* Recode to 'out_buf': *)
+    let in_len = length - k in
+    let in_done, out_done, in_enc' =
+      recode ~in_enc:in_enc   ~in_buf:s        ~in_pos:k     ~in_len:in_len
+             ~out_enc:out_enc ~out_buf:out_buf ~out_pos:0    ~out_len:size  
+             ~max_chars:size  ~subst:subst in
+    (* Collect the results: *)
+    let k' = k + in_done in
+    let s_done' = String.sub out_buf 0 out_done :: s_done in
+    (* Still something to do? *)
+    if k' < length then
+      recode_loop k' s_done' in_enc'
+    else
+      (* No: Concatenate s_done' to get the final result. *)
+      String.concat "" (List.rev s_done')
+  in
+
+  recode_loop 0 [] in_enc
+;;
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:28  lpadovan
+ * Initial revision
+ *
+ * Revision 1.2  2000/08/29 00:46:41  gerd
+ * 	New type for the Unicode to 8 bit translation table.
+ * 	The Netmappings tables are now Lazy.t.
+ *
+ * Revision 1.1  2000/08/13 00:02:57  gerd
+ * 	Initial revision.
+ *
+ *
+ * ======================================================================
+ * OLD LOGS FROM THE PXP PACKAGE (FILE NAME pxp_encoding.ml):
+ * 
+ * Revision 1.5  2000/07/27 00:41:14  gerd
+ * 	new 8 bit codes
+ *
+ * Revision 1.4  2000/07/04 22:11:41  gerd
+ * 	Implemented the enhancements and extensions of
+ * rev. 1.4 of pxp_encoding.mli.
+ *
+ * Revision 1.3  2000/05/29 23:48:38  gerd
+ * 	Changed module names:
+ * 		Markup_aux          into Pxp_aux
+ * 		Markup_codewriter   into Pxp_codewriter
+ * 		Markup_document     into Pxp_document
+ * 		Markup_dtd          into Pxp_dtd
+ * 		Markup_entity       into Pxp_entity
+ * 		Markup_lexer_types  into Pxp_lexer_types
+ * 		Markup_reader       into Pxp_reader
+ * 		Markup_types        into Pxp_types
+ * 		Markup_yacc         into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * Revision 1.2  2000/05/29 21:14:57  gerd
+ * 	Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.1  2000/05/20 20:30:50  gerd
+ * 	Initial revision.
+ *
+ * 
+ *)