+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-open Printf;;
-
-let comment_re = Str.regexp "#.*$";;
-let space_re = Str.regexp "[ \t\r\n]+";;
-
-let read_unimap_format_a fname f =
- (* Reads a Unicode mapping in format A from a "local" code to Unicode.
- * Returns a list of pairs (localcode, unicode).
- *)
-
- let read_unimap_line() =
- let s = input_line f in (* may raise End_of_file *)
- let s' = Str.global_replace comment_re "" s in
- let words = Str.split space_re s' in
- match words with
- [] -> raise Not_found
- | [ localcode; unicode ] ->
- int_of_string localcode, int_of_string unicode
- | _ ->
- failwith ("File " ^ fname ^ ": Do not know what to do with:\n" ^ s')
- in
-
- let rec read_following_lines() =
- try
- let localcode, unicode = read_unimap_line() in
- (* may raise End_of_file, Not_found *)
- (localcode, unicode) :: read_following_lines()
- with
- Not_found -> read_following_lines()
- | End_of_file -> []
- in
-
- read_following_lines()
-;;
-
-
-type from_uni_list =
- U_nil
- | U_single of (int * int)
- | U_list of (int * int) list
-
-type from_unicode =
- from_uni_list array;;
- (* A hashtable with fixed size (256). A pair (unicode, localcode) is
- * stored at the position unicode mod 256 in the array.
- *)
-
-
-let make_bijection unimap =
- (* unimap: a list of pairs (localcode, unicode)
- * returns a pair of arrays (m_to_unicode, m_from_unicode) with:
- * - m_to_unicode.(localcode) = Some unicode,
- * if the pair (localcode, unicode) exists
- * m_to_unicode.(x) = None otherwise
- * - m_from_unicode.(unicode lsr 8) = [ ...; (unicode,localcode); ... ]
- *)
-
- let m_to_unicode = Array.create 256 None in
- let m_from_unicode = Array.create 256 [] in
-
- List.iter
- (fun (localcode, unicode) ->
- assert(localcode < 256);
-
- (* Update m_to_unicode: *)
- if m_to_unicode.(localcode) <> None then
- failwith ("Local code point " ^ string_of_int localcode ^
- " mapped twice");
- m_to_unicode.(localcode) <- Some unicode;
-
- (* Update m_from_unicode: *)
- let unilow = unicode land 255 in
- if List.mem_assoc unicode (m_from_unicode.(unilow)) then
- failwith ("Unicode code point " ^ string_of_int unicode ^
- " mapped twice");
- m_from_unicode.(unilow) <-
- m_from_unicode.(unilow) @ [unicode,localcode];
- )
- unimap;
-
- m_to_unicode, m_from_unicode
-;;
-
-
-let to_unimap_as_string to_unimap =
- let make_repr x =
- match x with
- None -> -1
- | Some u -> u
- in
- Marshal.to_string (Array.map make_repr to_unimap) [ Marshal.No_sharing ]
-;;
-
-
-let from_unimap_as_string from_unimap =
- let make_repr l =
- match l with
- [] -> U_nil
- | [u,l] -> U_single(u,l)
- | _ -> U_list l
- in
- let m = Array.map make_repr from_unimap in
- Marshal.to_string m [ Marshal.No_sharing ]
-;;
-
-
-let print_bijection f name m_to_unicode m_from_unicode =
- (* Prints on file f this O'Caml code:
- * let <name>_to_unicode = ...
- * let <name>_from_unicode = ...
- *)
- fprintf f "let %s_to_unicode = lazy (Marshal.from_string \"%s\" 0 : int array);;\n"
- name
- (String.escaped (to_unimap_as_string m_to_unicode));
-
- fprintf f "let %s_from_unicode = lazy (Marshal.from_string \"%s\" 0 : Netmappings.from_uni_list array);;\n "
- name
- (String.escaped (from_unimap_as_string m_from_unicode));
-;;
-
-
-let main() =
- let files = ref [] in
- let outch = ref (lazy stdout) in
- Arg.parse
- [ "-o", Arg.String (fun s -> outch := lazy (open_out s)),
- " <file> Write result to this file"]
- (fun s -> files := !files @ [s])
- "usage: unimap_to_ocaml file.unimap ...";
-
- (* First read in all unimaps: *)
- let unimaps =
- List.map
- (fun filename ->
- let mapname = Str.replace_first (Str.regexp "\.unimap$") ""
- (Filename.basename filename) in
- let f = open_in filename in
- prerr_endline ("Reading " ^ filename);
- let unimap = read_unimap_format_a filename f in
- close_in f;
- mapname, unimap
- )
- !files
- in
-
- (* Second compute all bijections: *)
- let bijections =
- List.map
- (fun (mapname, unimap) ->
- prerr_endline ("Processing " ^ mapname);
- let to_unicode, from_unicode = make_bijection unimap in
- mapname, to_unicode, from_unicode
- )
- unimaps
- in
-
- let out = Lazy.force !outch in
- (* Third output all results: *)
- output_string out "(* WARNING! This is a generated file! *)\n";
-
- List.iter
- (fun (mapname, to_unicode, from_unicode) ->
- print_bijection out mapname to_unicode from_unicode)
- bijections;
- List.iter
- (fun (mapname, _, _) ->
- fprintf out "Hashtbl.add Netmappings.to_unicode `Enc_%s %s_to_unicode;\n"
- mapname mapname;
- fprintf out "Hashtbl.add Netmappings.from_unicode `Enc_%s %s_from_unicode;\n"
- mapname mapname;
- )
- (List.rev bijections);
- fprintf out "();;\n";
-
- close_out out
-;;
-
-
-main();;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.3 2000/08/29 00:48:52 gerd
- * Conversion tables are now stored in marshalled form.
- * New type for the conversion table Unicode to 8bit.
- *
- * Revision 1.2 2000/08/12 23:54:56 gerd
- * Initial revision.
- *
- *
- *)