(* $Id$ * ---------------------------------------------------------------------- *) type from_uni_list = U_nil | U_single of (int*int) | U_list of (int*int) list ;; (* A representation of (int*int) list that is optimized for the case that * lists with 0 and 1 elements are the most frequent cases. *) val to_unicode : (Netconversion.encoding, int array Lazy.t) Hashtbl.t;; val from_unicode : (Netconversion.encoding, from_uni_list array Lazy.t) Hashtbl.t;; (* These hashtables are used internally by the parser to store * the conversion tables from 8 bit encodings to Unicode and vice versa. * It is normally not necessary to access these tables; the * Netconversion module does it already for you. * * Specification of the conversion tables: * * to_unicode: maps an 8 bit code to Unicode, i.e. * let m = Hashtbl.find `Enc_isoXXX to_unicode in * let unicode = m.(isocode) * - This may be (-1) to indicate that the code point is not defined. * * from_unicode: maps Unicode to an 8 bit code, i.e. * let m = Hashtbl.find `Enc_isoXXX from_unicode in * let l = m.(unicode land 255) * Now search in l the pair (unicode, isocode), and return isocode. * * Note: It is guaranteed that both arrays have always 256 elements. *) val lock : unit -> unit (* In multi-threaded applications: obtains a lock which is required to * Lazy.force the values found in to_unicode and from_unicode. * In single-threaded applications: a NO-OP *) val unlock : unit -> unit (* In multi-threaded applications: releases the lock which is required to * Lazy.force the values found in to_unicode and from_unicode. * In single-threaded applications: a NO-OP *) val init_mt : (unit -> unit) -> (unit -> unit) -> unit (* Internally used; see netstring_mt.ml *) (* ---------------------------------------- *) (* The following comment was written when the conversion module belonged * to the PXP package (Polymorhic XML Parser). *) (* HOW TO ADD A NEW 8 BIT CODE: * * It is relatively simple to add a new 8 bit code to the system. This * means that the parser can read and write files with the new encoding; * this does not mean that the parser can represent the XML tree internally * by the new encoding. * * - Put a new unimap file into the "mappings" directory. The file format * is simple; please look at the already existing files. * The name of the file determines the internal name of the code: * If the file is called .unimap, the code will be called * `Enc_. * * - Extend the type "encoding" in pxp_types.mli and pxp_types.ml * * - Extend the two functions encoding_of_string and string_of_encoding * in pxp_types.ml * * - Recompile the parser * * Every encoding consumes at least 3kB of memory, but this may be much more * if the code points are dispersed on the Unicode code space. * * Perhaps the addition of new codes will become even simpler in future * versions of PXP; but it is currently more important to support * non-8-bit codes, too. * Every contribution of new codes to PXP is welcome! *) (* ====================================================================== * History: * * $Log$ * Revision 1.1 2000/11/17 09:57:28 lpadovan * Initial revision * * Revision 1.2 2000/08/29 00:47:24 gerd * New type for the conversion Unicode to 8bit. * Conversion tables are now lazy. Thus also mutexes are required. * * Revision 1.1 2000/08/13 00:02:57 gerd * Initial revision. * * * ====================================================================== * OLD LOGS FROM THE PXP PACKAGE (FILE NAME pxp_mappings.mli): * * Revision 1.1 2000/07/27 00:40:02 gerd * Initial revision. * * *)