X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2FDEVEL%2Fpxp%2Fnetstring%2Ftools%2Funimap_to_ocaml%2Funimap_to_ocaml.ml;fp=helm%2FDEVEL%2Fpxp%2Fnetstring%2Ftools%2Funimap_to_ocaml%2Funimap_to_ocaml.ml;h=0000000000000000000000000000000000000000;hb=3ef089a4c58fbe429dd539af6215991ecbe11ee2;hp=14a89e9d986de436ee102905d0faab0e7279138c;hpb=1c7fb836e2af4f2f3d18afd0396701f2094265ff;p=helm.git diff --git a/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/unimap_to_ocaml.ml b/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/unimap_to_ocaml.ml deleted file mode 100644 index 14a89e9d9..000000000 --- a/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/unimap_to_ocaml.ml +++ /dev/null @@ -1,201 +0,0 @@ -(* $Id$ - * ---------------------------------------------------------------------- - * - *) - -open Printf;; - -let comment_re = Str.regexp "#.*$";; -let space_re = Str.regexp "[ \t\r\n]+";; - -let read_unimap_format_a fname f = - (* Reads a Unicode mapping in format A from a "local" code to Unicode. - * Returns a list of pairs (localcode, unicode). - *) - - let read_unimap_line() = - let s = input_line f in (* may raise End_of_file *) - let s' = Str.global_replace comment_re "" s in - let words = Str.split space_re s' in - match words with - [] -> raise Not_found - | [ localcode; unicode ] -> - int_of_string localcode, int_of_string unicode - | _ -> - failwith ("File " ^ fname ^ ": Do not know what to do with:\n" ^ s') - in - - let rec read_following_lines() = - try - let localcode, unicode = read_unimap_line() in - (* may raise End_of_file, Not_found *) - (localcode, unicode) :: read_following_lines() - with - Not_found -> read_following_lines() - | End_of_file -> [] - in - - read_following_lines() -;; - - -type from_uni_list = - U_nil - | U_single of (int * int) - | U_list of (int * int) list - -type from_unicode = - from_uni_list array;; - (* A hashtable with fixed size (256). A pair (unicode, localcode) is - * stored at the position unicode mod 256 in the array. - *) - - -let make_bijection unimap = - (* unimap: a list of pairs (localcode, unicode) - * returns a pair of arrays (m_to_unicode, m_from_unicode) with: - * - m_to_unicode.(localcode) = Some unicode, - * if the pair (localcode, unicode) exists - * m_to_unicode.(x) = None otherwise - * - m_from_unicode.(unicode lsr 8) = [ ...; (unicode,localcode); ... ] - *) - - let m_to_unicode = Array.create 256 None in - let m_from_unicode = Array.create 256 [] in - - List.iter - (fun (localcode, unicode) -> - assert(localcode < 256); - - (* Update m_to_unicode: *) - if m_to_unicode.(localcode) <> None then - failwith ("Local code point " ^ string_of_int localcode ^ - " mapped twice"); - m_to_unicode.(localcode) <- Some unicode; - - (* Update m_from_unicode: *) - let unilow = unicode land 255 in - if List.mem_assoc unicode (m_from_unicode.(unilow)) then - failwith ("Unicode code point " ^ string_of_int unicode ^ - " mapped twice"); - m_from_unicode.(unilow) <- - m_from_unicode.(unilow) @ [unicode,localcode]; - ) - unimap; - - m_to_unicode, m_from_unicode -;; - - -let to_unimap_as_string to_unimap = - let make_repr x = - match x with - None -> -1 - | Some u -> u - in - Marshal.to_string (Array.map make_repr to_unimap) [ Marshal.No_sharing ] -;; - - -let from_unimap_as_string from_unimap = - let make_repr l = - match l with - [] -> U_nil - | [u,l] -> U_single(u,l) - | _ -> U_list l - in - let m = Array.map make_repr from_unimap in - Marshal.to_string m [ Marshal.No_sharing ] -;; - - -let print_bijection f name m_to_unicode m_from_unicode = - (* Prints on file f this O'Caml code: - * let _to_unicode = ... - * let _from_unicode = ... - *) - fprintf f "let %s_to_unicode = lazy (Marshal.from_string \"%s\" 0 : int array);;\n" - name - (String.escaped (to_unimap_as_string m_to_unicode)); - - fprintf f "let %s_from_unicode = lazy (Marshal.from_string \"%s\" 0 : Netmappings.from_uni_list array);;\n " - name - (String.escaped (from_unimap_as_string m_from_unicode)); -;; - - -let main() = - let files = ref [] in - let outch = ref (lazy stdout) in - Arg.parse - [ "-o", Arg.String (fun s -> outch := lazy (open_out s)), - " Write result to this file"] - (fun s -> files := !files @ [s]) - "usage: unimap_to_ocaml file.unimap ..."; - - (* First read in all unimaps: *) - let unimaps = - List.map - (fun filename -> - let mapname = Str.replace_first (Str.regexp "\.unimap$") "" - (Filename.basename filename) in - let f = open_in filename in - prerr_endline ("Reading " ^ filename); - let unimap = read_unimap_format_a filename f in - close_in f; - mapname, unimap - ) - !files - in - - (* Second compute all bijections: *) - let bijections = - List.map - (fun (mapname, unimap) -> - prerr_endline ("Processing " ^ mapname); - let to_unicode, from_unicode = make_bijection unimap in - mapname, to_unicode, from_unicode - ) - unimaps - in - - let out = Lazy.force !outch in - (* Third output all results: *) - output_string out "(* WARNING! This is a generated file! *)\n"; - - List.iter - (fun (mapname, to_unicode, from_unicode) -> - print_bijection out mapname to_unicode from_unicode) - bijections; - List.iter - (fun (mapname, _, _) -> - fprintf out "Hashtbl.add Netmappings.to_unicode `Enc_%s %s_to_unicode;\n" - mapname mapname; - fprintf out "Hashtbl.add Netmappings.from_unicode `Enc_%s %s_from_unicode;\n" - mapname mapname; - ) - (List.rev bijections); - fprintf out "();;\n"; - - close_out out -;; - - -main();; - -(* ====================================================================== - * History: - * - * $Log$ - * Revision 1.1 2000/11/17 09:57:29 lpadovan - * Initial revision - * - * Revision 1.3 2000/08/29 00:48:52 gerd - * Conversion tables are now stored in marshalled form. - * New type for the conversion table Unicode to 8bit. - * - * Revision 1.2 2000/08/12 23:54:56 gerd - * Initial revision. - * - * - *)