X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2FDEVEL%2Fpxp%2Fnetstring%2Fnetencoding.ml;fp=helm%2FDEVEL%2Fpxp%2Fnetstring%2Fnetencoding.ml;h=e87c4c397d58e4f8848462bce394b15d5d9a46cf;hb=c03d2c1fdab8d228cb88aaba5ca0f556318bebc5;hp=0000000000000000000000000000000000000000;hpb=758057e85325f94cd88583feb1fdf6b038e35055;p=helm.git diff --git a/helm/DEVEL/pxp/netstring/netencoding.ml b/helm/DEVEL/pxp/netstring/netencoding.ml new file mode 100644 index 000000000..e87c4c397 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/netencoding.ml @@ -0,0 +1,903 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + + +module Str = Netstring_str;; + +module Base64 = struct + let b64_pattern plus slash = + [| 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M'; + 'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z'; + 'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm'; + 'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z'; + '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9'; plus; slash |];; + + + let rfc_pattern = b64_pattern '+' '/';; + let url_pattern = b64_pattern '-' '/';; + + let encode_with_options b64 equal s pos len linelen crlf = + (* encode using "base64". + * 'b64': The encoding table, created by b64_pattern. + * 'equal': The character that should be used instead of '=' in the original + * encoding scheme. Pass '=' to get the original encoding scheme. + * s, pos, len, linelen: See the interface description of encode_substring. + *) + assert (Array.length b64 = 64); + if len < 0 or pos < 0 or pos > String.length s or linelen < 0 then + invalid_arg "Netencoding.Base64.encode_with_options"; + if pos + len > String.length s then + invalid_arg "Netencoding.Base64.encode_with_options"; + + let linelen = + (linelen/4) * 4 in + + let l_t = if len = 0 then 0 else ((len - 1) / 3 + 1) * 4 in + (* l_t: length of the result without additional line endings *) + + let l_t' = + if linelen < 4 then + l_t + else + if l_t = 0 then 0 else + let n_lines = ((l_t - 1) / linelen) + 1 in + l_t + n_lines * (if crlf then 2 else 1) + in + (* l_t': length of the result with CRLF or LF characters *) + + let t = String.make l_t' equal in + let j = ref 0 in + let q = ref 0 in + for k = 0 to len / 3 - 1 do + let p = pos + 3*k in + (* p >= pos >= 0: this is evident + * p+2 < pos+len <= String.length s: + * Because k <= len/3-1 + * 3*k <= 3*(len/3-1) = len - 3 + * pos+3*k+2 <= pos + len - 3 + 2 = pos + len - 1 < pos + len + * So it is proved that the following unsafe string accesses always + * work. + *) + let bits = (Char.code (String.unsafe_get s (p)) lsl 16) lor + (Char.code (String.unsafe_get s (p+1)) lsl 8) lor + (Char.code (String.unsafe_get s (p+2))) in + (* Obviously, 'bits' is a 24 bit entity (i.e. bits < 2**24) *) + assert(!j + 3 < l_t'); + String.unsafe_set t !j (Array.unsafe_get b64 ( bits lsr 18)); + String.unsafe_set t (!j+1) (Array.unsafe_get b64 ((bits lsr 12) land 63)); + String.unsafe_set t (!j+2) (Array.unsafe_get b64 ((bits lsr 6) land 63)); + String.unsafe_set t (!j+3) (Array.unsafe_get b64 ( bits land 63)); + j := !j + 4; + if linelen > 3 then begin + q := !q + 4; + if !q + 4 > linelen then begin + (* The next 4 characters won't fit on the current line. So insert + * a line ending. + *) + if crlf then begin + t.[ !j ] <- '\013'; + t.[ !j+1 ] <- '\010'; + j := !j + 2; + end + else begin + t.[ !j ] <- '\010'; + incr j + end; + q := 0; + end; + end; + done; + (* padding if needed: *) + let m = len mod 3 in + begin + match m with + 0 -> () + | 1 -> + let bits = Char.code (s.[pos + len - 1]) in + t.[ !j ] <- b64.( bits lsr 2); + t.[ !j + 1 ] <- b64.( (bits land 0x03) lsl 4); + j := !j + 4; + q := !q + 4; + | 2 -> + let bits = (Char.code (s.[pos + len - 2]) lsl 8) lor + (Char.code (s.[pos + len - 1])) in + t.[ !j ] <- b64.( bits lsr 10); + t.[ !j + 1 ] <- b64.((bits lsr 4) land 0x3f); + t.[ !j + 2 ] <- b64.((bits lsl 2) land 0x3f); + j := !j + 4; + q := !q + 4; + | _ -> assert false + end; + + (* If required, add another line end: *) + + if linelen > 3 & !q > 0 then begin + if crlf then begin + t.[ !j ] <- '\013'; + t.[ !j+1 ] <- '\010'; + j := !j + 2; + end + else begin + t.[ !j ] <- '\010'; + incr j + end; + end; + + t ;; + + + + let encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s = + let l = match len with None -> String.length s - pos | Some x -> x in + encode_with_options rfc_pattern '=' s pos l linelength crlf;; + + + let encode_substring s ~pos ~len ~linelength ~crlf = + encode_with_options rfc_pattern '=' s pos len linelength crlf;; + + + let url_encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s = + let l = match len with None -> String.length s - pos | Some x -> x in + encode_with_options url_pattern '.' s pos l linelength crlf;; + + + let decode_substring t ~pos ~len ~url_variant:p_url ~accept_spaces:p_spaces = + if len < 0 or pos < 0 or pos > String.length t then + invalid_arg "Netencoding.Base64.decode_substring"; + if pos + len > String.length t then + invalid_arg "Netencoding.Base64.decode_substring"; + + (* Compute the number of effective characters l_t in 't'; + * pad_chars: number of '=' characters at the end of the string. + *) + let l_t, pad_chars = + if p_spaces then begin + (* Count all non-whitespace characters: *) + let c = ref 0 in + let p = ref 0 in + for i = pos to pos + len - 1 do + match String.unsafe_get t i with + (' '|'\t'|'\r'|'\n') -> () + | ('='|'.') as ch -> + if ch = '.' & not p_url then + invalid_arg "Netencoding.Base64.decode_substring"; + incr c; + incr p; + if !p > 2 then + invalid_arg "Netencoding.Base64.decode_substring"; + for j = i+1 to pos + len - 1 do + match String.unsafe_get t j with + (' '|'\t'|'\r'|'\n'|'.'|'=') -> () + | _ -> + (* Only another '=' or spaces allowed *) + invalid_arg "Netencoding.Base64.decode_substring"; + done + | _ -> incr c + done; + if !c mod 4 <> 0 then + invalid_arg "Netencoding.Base64.decode_substring"; + !c, !p + end + else + len, + ( if len mod 4 <> 0 then + invalid_arg "Netencoding.Base64.decode_substring"; + if len > 0 then ( + if String.sub t (len - 2) 2 = "==" or + (p_url & String.sub t (len - 2) 2 = "..") then 2 + else + if String.sub t (len - 1) 1 = "=" or + (p_url & String.sub t (len - 1) 1 = ".") then 1 + else + 0 + ) + else 0 + ) + in + + let l_s = (l_t / 4) * 3 - pad_chars in (* sic! *) + let s = String.create l_s in + + let decode_char c = + match c with + 'A' .. 'Z' -> Char.code(c) - 65 (* 65 = Char.code 'A' *) + | 'a' .. 'z' -> Char.code(c) - 71 (* 71 = Char.code 'a' - 26 *) + | '0' .. '9' -> Char.code(c) + 4 (* -4 = Char.code '0' - 52 *) + | '+' -> 62 + | '-' -> if not p_url then + invalid_arg "Netencoding.Base64.decode_substring"; + 62 + | '/' -> 63 + | _ -> invalid_arg "Netencoding.Base64.decode_substring"; + in + + (* Decode all but the last quartet: *) + + let cursor = ref pos in + let rec next_char() = + match t.[ !cursor ] with + (' '|'\t'|'\r'|'\n') -> + if p_spaces then (incr cursor; next_char()) + else invalid_arg "Netencoding.Base64.decode_substring" + | c -> + incr cursor; c + in + + if p_spaces then begin + for k = 0 to l_t / 4 - 2 do + let q = 3*k in + let c0 = next_char() in + let c1 = next_char() in + let c2 = next_char() in + let c3 = next_char() in + let n0 = decode_char c0 in + let n1 = decode_char c1 in + let n2 = decode_char c2 in + let n3 = decode_char c3 in + let x0 = (n0 lsl 2) lor (n1 lsr 4) in + let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in + let x2 = ((n2 lsl 6) land 0xc0) lor n3 in + String.unsafe_set s q (Char.chr x0); + String.unsafe_set s (q+1) (Char.chr x1); + String.unsafe_set s (q+2) (Char.chr x2); + done; + end + else begin + (* Much faster: *) + for k = 0 to l_t / 4 - 2 do + let p = pos + 4*k in + let q = 3*k in + let c0 = String.unsafe_get t p in + let c1 = String.unsafe_get t (p + 1) in + let c2 = String.unsafe_get t (p + 2) in + let c3 = String.unsafe_get t (p + 3) in + let n0 = decode_char c0 in + let n1 = decode_char c1 in + let n2 = decode_char c2 in + let n3 = decode_char c3 in + let x0 = (n0 lsl 2) lor (n1 lsr 4) in + let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in + let x2 = ((n2 lsl 6) land 0xc0) lor n3 in + String.unsafe_set s q (Char.chr x0); + String.unsafe_set s (q+1) (Char.chr x1); + String.unsafe_set s (q+2) (Char.chr x2); + done; + cursor := pos + l_t - 4; + end; + + (* Decode the last quartet: *) + + if l_t > 0 then begin + let q = 3*(l_t / 4 - 1) in + let c0 = next_char() in + let c1 = next_char() in + let c2 = next_char() in + let c3 = next_char() in + + if (c2 = '=' & c3 = '=') or (p_url & c2 = '.' & c3 = '.') then begin + let n0 = decode_char c0 in + let n1 = decode_char c1 in + let x0 = (n0 lsl 2) lor (n1 lsr 4) in + s.[ q ] <- Char.chr x0; + end + else + if (c3 = '=') or (p_url & c3 = '.') then begin + let n0 = decode_char c0 in + let n1 = decode_char c1 in + let n2 = decode_char c2 in + let x0 = (n0 lsl 2) lor (n1 lsr 4) in + let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in + s.[ q ] <- Char.chr x0; + s.[ q+1 ] <- Char.chr x1; + end + else begin + let n0 = decode_char c0 in + let n1 = decode_char c1 in + let n2 = decode_char c2 in + let n3 = decode_char c3 in + let x0 = (n0 lsl 2) lor (n1 lsr 4) in + let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in + let x2 = ((n2 lsl 6) land 0xc0) lor n3 in + s.[ q ] <- Char.chr x0; + s.[ q+1 ] <- Char.chr x1; + s.[ q+2 ] <- Char.chr x2; + end + + end; + + s ;; + + + + let decode ?(pos=0) ?len ?(url_variant=true) ?(accept_spaces=false) s = + let l = match len with None -> String.length s - pos | Some x -> x in + decode_substring s pos l url_variant accept_spaces;; + + let decode_ignore_spaces s = + decode_substring s 0 (String.length s) true true;; + + +end + + + +module QuotedPrintable = struct + + let encode_substring s ~pos ~len = + + if len < 0 or pos < 0 or pos > String.length s then + invalid_arg "Netencoding.QuotedPrintable.encode_substring"; + if pos + len > String.length s then + invalid_arg "Netencoding.QuotedPrintable.encode_substring"; + + let rec count n i = + if i < len then + match String.unsafe_get s (pos+i) with + ('\r'|'\n') -> + count (n+1) (i+1) + | ('\000'..'\031'|'\127'..'\255'| + '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') -> + count (n+3) (i+1) + | ' ' -> + (* Protect spaces only if they occur at the end of a line *) + if i+1 < len then + match s.[pos+i+1] with + ('\r'|'\n') -> + count (n+3) (i+1) + | _ -> + count (n+1) (i+1) + else + count (n+3) (i+1) + | _ -> + count (n+1) (i+1) + else + n + in + + let l = count 0 0 in + let t = String.create l in + + let hexdigit = + [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; + '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in + + let k = ref 0 in + + let add_quoted c = + t.[ !k ] <- '='; + t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 ); + t.[ !k+2 ] <- hexdigit.( Char.code c land 15 ) + in + + for i = 0 to len - 1 do + match String.unsafe_get s i with + ('\r'|'\n') as c -> + String.unsafe_set t !k c; + incr k + | ('\000'..'\031'|'\127'..'\255'| + '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') as c -> + add_quoted c; + k := !k + 3 + | ' ' -> + (* Protect spaces only if they occur at the end of a line *) + if i+1 < len then + match s.[pos+i+1] with + ('\r'|'\n') -> + add_quoted ' '; + k := !k + 3; + | _ -> + String.unsafe_set t !k ' '; + incr k + else begin + add_quoted ' '; + k := !k + 3; + end + | c -> + String.unsafe_set t !k c; + incr k + done; + + t ;; + + + let encode ?(pos=0) ?len s = + let l = match len with None -> String.length s - pos | Some x -> x in + encode_substring s pos l;; + + + + let decode_substring s ~pos ~len = + + if len < 0 or pos < 0 or pos > String.length s then + invalid_arg "Netencoding.QuotedPrintable.decode_substring"; + if pos + len > String.length s then + invalid_arg "Netencoding.QuotedPrintable.decode_substring"; + + let decode_hex c = + match c with + '0'..'9' -> Char.code c - 48 + | 'A'..'F' -> Char.code c - 55 + | 'a'..'f' -> Char.code c - 87 + | _ -> + invalid_arg "Netencoding.QuotedPrintable.decode_substring"; + in + + let rec count n i = + if i < len then + match String.unsafe_get s (pos+i) with + '=' -> + if i+1 = len then + (* A '=' at EOF is ignored *) + count n (i+1) + else + if i+1 < len then + match s.[pos+i+1] with + '\r' -> + (* Official soft break *) + if i+2 < len & s.[pos+i+2] = '\n' then + count n (i+3) + else + count n (i+2) + | '\n' -> + (* Inofficial soft break *) + count n (i+2) + | _ -> + if i+2 >= len then + invalid_arg + "Netencoding.QuotedPrintable.decode_substring"; + let _ = decode_hex s.[pos+i+1] in + let _ = decode_hex s.[pos+i+2] in + count (n+1) (i+3) + else + invalid_arg "Netencoding.QuotedPrintable.decode_substring" + | _ -> + count (n+1) (i+1) + else + n + in + + let l = count 0 0 in + let t = String.create l in + let k = ref pos in + let e = pos + len in + let i = ref 0 in + + while !i < l do + match String.unsafe_get s !k with + '=' -> + if !k+1 = e then + (* A '=' at EOF is ignored *) + () + else + if !k+1 < e then + match s.[!k+1] with + '\r' -> + (* Official soft break *) + if !k+2 < e & s.[!k+2] = '\n' then + k := !k + 3 + else + k := !k + 2 + | '\n' -> + (* Inofficial soft break *) + k := !k + 2 + | _ -> + if !k+2 >= e then + invalid_arg + "Netencoding.QuotedPrintable.decode_substring"; + let x1 = decode_hex s.[!k+1] in + let x2 = decode_hex s.[!k+2] in + t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2); + k := !k + 3; + incr i + else + invalid_arg "Netencoding.QuotedPrintable.decode_substring" + | c -> + String.unsafe_set t !i c; + incr k; + incr i + done; + + t ;; + + + let decode ?(pos=0) ?len s = + let l = match len with None -> String.length s - pos | Some x -> x in + decode_substring s pos l;; + +end + + +module Q = struct + + let encode_substring s ~pos ~len = + + if len < 0 or pos < 0 or pos > String.length s then + invalid_arg "Netencoding.Q.encode_substring"; + if pos + len > String.length s then + invalid_arg "Netencoding.Q.encode_substring"; + + let rec count n i = + if i < len then + match String.unsafe_get s (pos+i) with + | ('A'..'Z'|'a'..'z'|'0'..'9') -> + count (n+1) (i+1) + | _ -> + count (n+3) (i+1) + else + n + in + + let l = count 0 0 in + let t = String.create l in + + let hexdigit = + [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; + '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in + + let k = ref 0 in + + let add_quoted c = + t.[ !k ] <- '='; + t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 ); + t.[ !k+2 ] <- hexdigit.( Char.code c land 15 ) + in + + for i = 0 to len - 1 do + match String.unsafe_get s i with + | ('A'..'Z'|'a'..'z'|'0'..'9') as c -> + String.unsafe_set t !k c; + incr k + | c -> + add_quoted c; + k := !k + 3 + done; + + t ;; + + + let encode ?(pos=0) ?len s = + let l = match len with None -> String.length s - pos | Some x -> x in + encode_substring s pos l;; + + + + let decode_substring s ~pos ~len = + + if len < 0 or pos < 0 or pos > String.length s then + invalid_arg "Netencoding.Q.decode_substring"; + if pos + len > String.length s then + invalid_arg "Netencoding.Q.decode_substring"; + + let decode_hex c = + match c with + '0'..'9' -> Char.code c - 48 + | 'A'..'F' -> Char.code c - 55 + | 'a'..'f' -> Char.code c - 87 + | _ -> + invalid_arg "Netencoding.Q.decode_substring"; + in + + let rec count n i = + if i < len then + match String.unsafe_get s (pos+i) with + '=' -> + if i+2 >= len then + invalid_arg "Netencoding.Q.decode_substring"; + let _ = decode_hex s.[pos+i+1] in + let _ = decode_hex s.[pos+i+2] in + count (n+1) (i+3) + | _ -> (* including '_' *) + count (n+1) (i+1) + else + n + in + + let l = count 0 0 in + let t = String.create l in + let k = ref pos in + let e = pos + len in + let i = ref 0 in + + while !i < l do + match String.unsafe_get s !k with + '=' -> + if !k+2 >= e then + invalid_arg "Netencoding.Q.decode_substring"; + let x1 = decode_hex s.[!k+1] in + let x2 = decode_hex s.[!k+2] in + t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2); + k := !k + 3; + incr i + | '_' -> + String.unsafe_set t !i ' '; + incr k; + incr i + | c -> + String.unsafe_set t !i c; + incr k; + incr i + done; + + t ;; + + + let decode ?(pos=0) ?len s = + let l = match len with None -> String.length s - pos | Some x -> x in + decode_substring s pos l ;; + +end + + +module Url = struct + let hex_digits = + [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; + '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F' |];; + + let to_hex2 k = + (* Converts k to a 2-digit hex string *) + let s = String.create 2 in + s.[0] <- hex_digits.( (k lsr 4) land 15 ); + s.[1] <- hex_digits.( k land 15 ); + s ;; + + + let of_hex1 c = + match c with + ('0'..'9') -> Char.code c - Char.code '0' + | ('A'..'F') -> Char.code c - Char.code 'A' + 10 + | ('a'..'f') -> Char.code c - Char.code 'a' + 10 + | _ -> + raise Not_found ;; + + + + let url_encoding_re = + Str.regexp "[^A-Za-z0-9$_.!*'(),-]";; + + let url_decoding_re = + Str.regexp "\\+\\|%..\\|%.\\|%";; + + + let encode s = + Str.global_substitute + url_encoding_re + (fun r _ -> + match Str.matched_string r s with + " " -> "+" + | x -> + let k = Char.code(x.[0]) in + "%" ^ to_hex2 k + ) + s ;; + + + let decode s = + let l = String.length s in + Str.global_substitute + url_decoding_re + (fun r _ -> + match Str.matched_string r s with + | "+" -> " " + | _ -> + let i = Str.match_beginning r in + (* Assertion: s.[i] = '%' *) + if i+2 >= l then failwith "Cgi.decode"; + let c1 = s.[i+1] in + let c2 = s.[i+2] in + begin + try + let k1 = of_hex1 c1 in + let k2 = of_hex1 c2 in + String.make 1 (Char.chr((k1 lsl 4) lor k2)) + with + Not_found -> + failwith "Cgi.decode" + end + ) + s ;; + +end + + +module Html = struct + + let eref_re = + Str.regexp + "&\\(#\\([0-9]+\\);\\|\\([a-zA-Z]+\\);\\)" ;; + let unsafe_re = Str.regexp "[<>&\"\000-\008\011-\012\014-\031\127-\255]" ;; + + let etable = + [ "lt", "<"; + "gt", ">"; + "amp", "&"; + "quot", "\""; + (* Note: " is new in HTML-4.0, but it has been widely used + * much earlier. + *) + "nbsp", "\160"; + "iexcl", "\161"; + "cent", "\162"; + "pound", "\163"; + "curren", "\164"; + "yen", "\165"; + "brvbar", "\166"; + "sect", "\167"; + "uml", "\168"; + "copy", "\169"; + "ordf", "\170"; + "laquo", "\171"; + "not", "\172"; + "shy", "\173"; + "reg", "\174"; + "macr", "\175"; + "deg", "\176"; + "plusmn", "\177"; + "sup2", "\178"; + "sup3", "\179"; + "acute", "\180"; + "micro", "\181"; + "para", "\182"; + "middot", "\183"; + "cedil", "\184"; + "sup1", "\185"; + "ordm", "\186"; + "raquo", "\187"; + "frac14", "\188"; + "frac12", "\189"; + "frac34", "\190"; + "iquest", "\191"; + "Agrave", "\192"; + "Aacute", "\193"; + "Acirc", "\194"; + "Atilde", "\195"; + "Auml", "\196"; + "Aring", "\197"; + "AElig", "\198"; + "Ccedil", "\199"; + "Egrave", "\200"; + "Eacute", "\201"; + "Ecirc", "\202"; + "Euml", "\203"; + "Igrave", "\204"; + "Iacute", "\205"; + "Icirc", "\206"; + "Iuml", "\207"; + "ETH", "\208"; + "Ntilde", "\209"; + "Ograve", "\210"; + "Oacute", "\211"; + "Ocirc", "\212"; + "Otilde", "\213"; + "Ouml", "\214"; + "times", "\215"; + "Oslash", "\216"; + "Ugrave", "\217"; + "Uacute", "\218"; + "Ucirc", "\219"; + "Uuml", "\220"; + "Yacute", "\221"; + "THORN", "\222"; + "szlig", "\223"; + "agrave", "\224"; + "aacute", "\225"; + "acirc", "\226"; + "atilde", "\227"; + "auml", "\228"; + "aring", "\229"; + "aelig", "\230"; + "ccedil", "\231"; + "egrave", "\232"; + "eacute", "\233"; + "ecirc", "\234"; + "euml", "\235"; + "igrave", "\236"; + "iacute", "\237"; + "icirc", "\238"; + "iuml", "\239"; + "eth", "\240"; + "ntilde", "\241"; + "ograve", "\242"; + "oacute", "\243"; + "ocirc", "\244"; + "otilde", "\245"; + "ouml", "\246"; + "divide", "\247"; + "oslash", "\248"; + "ugrave", "\249"; + "uacute", "\250"; + "ucirc", "\251"; + "uuml", "\252"; + "yacute", "\253"; + "thorn", "\254"; + "yuml", "\255"; + ] ;; + + let quick_etable = + let ht = Hashtbl.create 50 in + List.iter (fun (name,value) -> Hashtbl.add ht name value) etable; + (* Entities to be decoded, but that must not be encoded: *) + Hashtbl.add ht "apos" "'"; (* used in XML documents *) + ht ;; + + let rev_etable = + let a = Array.create 256 "" in + List.iter (fun (name,value) -> + a.(Char.code(value.[0])) <- "&" ^ name ^ ";") etable; + for i = 0 to 8 do + a.(i) <- "&#" ^ string_of_int i ^ ";" + done; + for i = 11 to 12 do + a.(i) <- "&#" ^ string_of_int i ^ ";" + done; + for i = 14 to 31 do + a.(i) <- "&#" ^ string_of_int i ^ ";" + done; + for i = 127 to 159 do + a.(i) <- "&#" ^ string_of_int i ^ ";" + done; + a ;; + + let decode_to_latin1 s = + Str.global_substitute + eref_re + (fun r _ -> + let t = Str.matched_string r s in + try + let n = int_of_string(Str.matched_group r 2 s) in + if n < 256 then + String.make 1 (Char.chr n) + else + t + with + Not_found -> + try + let name = Str.matched_group r 3 s in + try + Hashtbl.find quick_etable name + with + Not_found -> + t + with + Not_found -> assert false + ) + s ;; + + let encode_from_latin1 s = + Str.global_substitute + unsafe_re + (fun r _ -> + let t = Str.matched_string r s in + let i = Char.code (t.[0]) in + rev_etable.(i) + ) + s ;; +end + + + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:27 lpadovan + * Initial revision + * + * Revision 1.5 2000/06/25 22:34:43 gerd + * Added labels to arguments. + * + * Revision 1.4 2000/06/25 21:15:48 gerd + * Checked thread-safety. + * + * Revision 1.3 2000/03/03 17:03:16 gerd + * Q encoding: CR and LF are quoted. + * + * Revision 1.2 2000/03/03 01:08:29 gerd + * Added Netencoding.Html functions. + * + * Revision 1.1 2000/03/02 01:14:48 gerd + * Initial revision. + * + * + *)