helm/DEVEL/pxp/netstring/netmappings.mli

   1 (* $Id$
   2  * ----------------------------------------------------------------------
   3  *)
   4
   5 type from_uni_list =
   6     U_nil
   7   | U_single of (int*int)
   8   | U_list of (int*int) list
   9 ;;
  10   (* A representation of (int*int) list that is optimized for the case that
  11    * lists with 0 and 1 elements are the most frequent cases.
  12    *)
  13
  14
  15 val to_unicode   : (Netconversion.encoding,
  16                     int array Lazy.t)           Hashtbl.t;;
  17
  18 val from_unicode : (Netconversion.encoding,
  19                     from_uni_list array Lazy.t) Hashtbl.t;;
  20   (* These hashtables are used internally by the parser to store
  21    * the conversion tables from 8 bit encodings to Unicode and vice versa.
  22    * It is normally not necessary to access these tables; the
  23    * Netconversion module does it already for you.
  24    *
  25    * Specification of the conversion tables:
  26    *
  27    * to_unicode: maps an 8 bit code to Unicode, i.e.
  28    *    let m = Hashtbl.find `Enc_isoXXX to_unicode in
  29    *    let unicode = m.(isocode)
  30    *    - This may be (-1) to indicate that the code point is not defined.
  31    *
  32    * from_unicode: maps Unicode to an 8 bit code, i.e.
  33    *    let m = Hashtbl.find `Enc_isoXXX from_unicode in
  34    *    let l = m.(unicode land 255)
  35    *    Now search in l the pair (unicode, isocode), and return isocode.
  36    *
  37    * Note: It is guaranteed that both arrays have always 256 elements.
  38    *)
  39
  40 val lock : unit -> unit
  41   (* In multi-threaded applications: obtains a lock which is required to
  42    * Lazy.force the values found in to_unicode and from_unicode.
  43    * In single-threaded applications: a NO-OP
  44    *)
  45
  46 val unlock : unit -> unit
  47   (* In multi-threaded applications: releases the lock which is required to
  48    * Lazy.force the values found in to_unicode and from_unicode.
  49    * In single-threaded applications: a NO-OP
  50    *)
  51
  52
  53 val init_mt : (unit -> unit) -> (unit -> unit) -> unit
  54   (* Internally used; see netstring_mt.ml *)
  55
  56
  57 (* ---------------------------------------- *)
  58
  59 (* The following comment was written when the conversion module belonged
  60  * to the PXP package (Polymorhic XML Parser).
  61  *)
  62
  63 (* HOW TO ADD A NEW 8 BIT CODE:
  64  *
  65  * It is relatively simple to add a new 8 bit code to the system. This
  66  * means that the parser can read and write files with the new encoding;
  67  * this does not mean that the parser can represent the XML tree internally
  68  * by the new encoding.
  69  *
  70  * - Put a new unimap file into the "mappings" directory. The file format
  71  *   is simple; please look at the already existing files.
  72  *   The name of the file determines the internal name of the code:
  73  *   If the file is called <name>.unimap, the code will be called
  74  *   `Enc_<name>.
  75  *
  76  * - Extend the type "encoding" in pxp_types.mli and pxp_types.ml
  77  *
  78  * - Extend the two functions encoding_of_string and string_of_encoding
  79  *   in pxp_types.ml
  80  *
  81  * - Recompile the parser
  82  *
  83  * Every encoding consumes at least 3kB of memory, but this may be much more
  84  * if the code points are dispersed on the Unicode code space.
  85  *
  86  * Perhaps the addition of new codes will become even simpler in future
  87  * versions of PXP; but it is currently more important to support
  88  * non-8-bit codes, too.
  89  * Every contribution of new codes to PXP is welcome!
  90  *)
  91
  92
  93 (* ======================================================================
  94  * History:
  95  *
  96  * $Log$
  97  * Revision 1.1  2000/11/17 09:57:28  lpadovan
  98  * Initial revision
  99  *
 100  * Revision 1.2  2000/08/29 00:47:24  gerd
 101  *      New type for the conversion Unicode to 8bit.
 102  *      Conversion tables are now lazy. Thus also mutexes are required.
 103  *
 104  * Revision 1.1  2000/08/13 00:02:57  gerd
 105  *      Initial revision.
 106  *
 107  *
 108  * ======================================================================
 109  * OLD LOGS FROM THE PXP PACKAGE (FILE NAME pxp_mappings.mli):
 110  *
 111  * Revision 1.1  2000/07/27 00:40:02  gerd
 112  *      Initial revision.
 113  *
 114  *
 115  *)