X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2FDEVEL%2Fpxp%2Fnetstring%2Fnetencoding.ml;fp=helm%2FDEVEL%2Fpxp%2Fnetstring%2Fnetencoding.ml;h=0000000000000000000000000000000000000000;hb=c7514aaa249a96c5fdd39b1123fbdb38d92f20b6;hp=e87c4c397d58e4f8848462bce394b15d5d9a46cf;hpb=1c7fb836e2af4f2f3d18afd0396701f2094265ff;p=helm.git diff --git a/helm/DEVEL/pxp/netstring/netencoding.ml b/helm/DEVEL/pxp/netstring/netencoding.ml deleted file mode 100644 index e87c4c397..000000000 --- a/helm/DEVEL/pxp/netstring/netencoding.ml +++ /dev/null @@ -1,903 +0,0 @@ -(* $Id$ - * ---------------------------------------------------------------------- - * - *) - - -module Str = Netstring_str;; - -module Base64 = struct - let b64_pattern plus slash = - [| 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M'; - 'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z'; - 'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm'; - 'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z'; - '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9'; plus; slash |];; - - - let rfc_pattern = b64_pattern '+' '/';; - let url_pattern = b64_pattern '-' '/';; - - let encode_with_options b64 equal s pos len linelen crlf = - (* encode using "base64". - * 'b64': The encoding table, created by b64_pattern. - * 'equal': The character that should be used instead of '=' in the original - * encoding scheme. Pass '=' to get the original encoding scheme. - * s, pos, len, linelen: See the interface description of encode_substring. - *) - assert (Array.length b64 = 64); - if len < 0 or pos < 0 or pos > String.length s or linelen < 0 then - invalid_arg "Netencoding.Base64.encode_with_options"; - if pos + len > String.length s then - invalid_arg "Netencoding.Base64.encode_with_options"; - - let linelen = - (linelen/4) * 4 in - - let l_t = if len = 0 then 0 else ((len - 1) / 3 + 1) * 4 in - (* l_t: length of the result without additional line endings *) - - let l_t' = - if linelen < 4 then - l_t - else - if l_t = 0 then 0 else - let n_lines = ((l_t - 1) / linelen) + 1 in - l_t + n_lines * (if crlf then 2 else 1) - in - (* l_t': length of the result with CRLF or LF characters *) - - let t = String.make l_t' equal in - let j = ref 0 in - let q = ref 0 in - for k = 0 to len / 3 - 1 do - let p = pos + 3*k in - (* p >= pos >= 0: this is evident - * p+2 < pos+len <= String.length s: - * Because k <= len/3-1 - * 3*k <= 3*(len/3-1) = len - 3 - * pos+3*k+2 <= pos + len - 3 + 2 = pos + len - 1 < pos + len - * So it is proved that the following unsafe string accesses always - * work. - *) - let bits = (Char.code (String.unsafe_get s (p)) lsl 16) lor - (Char.code (String.unsafe_get s (p+1)) lsl 8) lor - (Char.code (String.unsafe_get s (p+2))) in - (* Obviously, 'bits' is a 24 bit entity (i.e. bits < 2**24) *) - assert(!j + 3 < l_t'); - String.unsafe_set t !j (Array.unsafe_get b64 ( bits lsr 18)); - String.unsafe_set t (!j+1) (Array.unsafe_get b64 ((bits lsr 12) land 63)); - String.unsafe_set t (!j+2) (Array.unsafe_get b64 ((bits lsr 6) land 63)); - String.unsafe_set t (!j+3) (Array.unsafe_get b64 ( bits land 63)); - j := !j + 4; - if linelen > 3 then begin - q := !q + 4; - if !q + 4 > linelen then begin - (* The next 4 characters won't fit on the current line. So insert - * a line ending. - *) - if crlf then begin - t.[ !j ] <- '\013'; - t.[ !j+1 ] <- '\010'; - j := !j + 2; - end - else begin - t.[ !j ] <- '\010'; - incr j - end; - q := 0; - end; - end; - done; - (* padding if needed: *) - let m = len mod 3 in - begin - match m with - 0 -> () - | 1 -> - let bits = Char.code (s.[pos + len - 1]) in - t.[ !j ] <- b64.( bits lsr 2); - t.[ !j + 1 ] <- b64.( (bits land 0x03) lsl 4); - j := !j + 4; - q := !q + 4; - | 2 -> - let bits = (Char.code (s.[pos + len - 2]) lsl 8) lor - (Char.code (s.[pos + len - 1])) in - t.[ !j ] <- b64.( bits lsr 10); - t.[ !j + 1 ] <- b64.((bits lsr 4) land 0x3f); - t.[ !j + 2 ] <- b64.((bits lsl 2) land 0x3f); - j := !j + 4; - q := !q + 4; - | _ -> assert false - end; - - (* If required, add another line end: *) - - if linelen > 3 & !q > 0 then begin - if crlf then begin - t.[ !j ] <- '\013'; - t.[ !j+1 ] <- '\010'; - j := !j + 2; - end - else begin - t.[ !j ] <- '\010'; - incr j - end; - end; - - t ;; - - - - let encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s = - let l = match len with None -> String.length s - pos | Some x -> x in - encode_with_options rfc_pattern '=' s pos l linelength crlf;; - - - let encode_substring s ~pos ~len ~linelength ~crlf = - encode_with_options rfc_pattern '=' s pos len linelength crlf;; - - - let url_encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s = - let l = match len with None -> String.length s - pos | Some x -> x in - encode_with_options url_pattern '.' s pos l linelength crlf;; - - - let decode_substring t ~pos ~len ~url_variant:p_url ~accept_spaces:p_spaces = - if len < 0 or pos < 0 or pos > String.length t then - invalid_arg "Netencoding.Base64.decode_substring"; - if pos + len > String.length t then - invalid_arg "Netencoding.Base64.decode_substring"; - - (* Compute the number of effective characters l_t in 't'; - * pad_chars: number of '=' characters at the end of the string. - *) - let l_t, pad_chars = - if p_spaces then begin - (* Count all non-whitespace characters: *) - let c = ref 0 in - let p = ref 0 in - for i = pos to pos + len - 1 do - match String.unsafe_get t i with - (' '|'\t'|'\r'|'\n') -> () - | ('='|'.') as ch -> - if ch = '.' & not p_url then - invalid_arg "Netencoding.Base64.decode_substring"; - incr c; - incr p; - if !p > 2 then - invalid_arg "Netencoding.Base64.decode_substring"; - for j = i+1 to pos + len - 1 do - match String.unsafe_get t j with - (' '|'\t'|'\r'|'\n'|'.'|'=') -> () - | _ -> - (* Only another '=' or spaces allowed *) - invalid_arg "Netencoding.Base64.decode_substring"; - done - | _ -> incr c - done; - if !c mod 4 <> 0 then - invalid_arg "Netencoding.Base64.decode_substring"; - !c, !p - end - else - len, - ( if len mod 4 <> 0 then - invalid_arg "Netencoding.Base64.decode_substring"; - if len > 0 then ( - if String.sub t (len - 2) 2 = "==" or - (p_url & String.sub t (len - 2) 2 = "..") then 2 - else - if String.sub t (len - 1) 1 = "=" or - (p_url & String.sub t (len - 1) 1 = ".") then 1 - else - 0 - ) - else 0 - ) - in - - let l_s = (l_t / 4) * 3 - pad_chars in (* sic! *) - let s = String.create l_s in - - let decode_char c = - match c with - 'A' .. 'Z' -> Char.code(c) - 65 (* 65 = Char.code 'A' *) - | 'a' .. 'z' -> Char.code(c) - 71 (* 71 = Char.code 'a' - 26 *) - | '0' .. '9' -> Char.code(c) + 4 (* -4 = Char.code '0' - 52 *) - | '+' -> 62 - | '-' -> if not p_url then - invalid_arg "Netencoding.Base64.decode_substring"; - 62 - | '/' -> 63 - | _ -> invalid_arg "Netencoding.Base64.decode_substring"; - in - - (* Decode all but the last quartet: *) - - let cursor = ref pos in - let rec next_char() = - match t.[ !cursor ] with - (' '|'\t'|'\r'|'\n') -> - if p_spaces then (incr cursor; next_char()) - else invalid_arg "Netencoding.Base64.decode_substring" - | c -> - incr cursor; c - in - - if p_spaces then begin - for k = 0 to l_t / 4 - 2 do - let q = 3*k in - let c0 = next_char() in - let c1 = next_char() in - let c2 = next_char() in - let c3 = next_char() in - let n0 = decode_char c0 in - let n1 = decode_char c1 in - let n2 = decode_char c2 in - let n3 = decode_char c3 in - let x0 = (n0 lsl 2) lor (n1 lsr 4) in - let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in - let x2 = ((n2 lsl 6) land 0xc0) lor n3 in - String.unsafe_set s q (Char.chr x0); - String.unsafe_set s (q+1) (Char.chr x1); - String.unsafe_set s (q+2) (Char.chr x2); - done; - end - else begin - (* Much faster: *) - for k = 0 to l_t / 4 - 2 do - let p = pos + 4*k in - let q = 3*k in - let c0 = String.unsafe_get t p in - let c1 = String.unsafe_get t (p + 1) in - let c2 = String.unsafe_get t (p + 2) in - let c3 = String.unsafe_get t (p + 3) in - let n0 = decode_char c0 in - let n1 = decode_char c1 in - let n2 = decode_char c2 in - let n3 = decode_char c3 in - let x0 = (n0 lsl 2) lor (n1 lsr 4) in - let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in - let x2 = ((n2 lsl 6) land 0xc0) lor n3 in - String.unsafe_set s q (Char.chr x0); - String.unsafe_set s (q+1) (Char.chr x1); - String.unsafe_set s (q+2) (Char.chr x2); - done; - cursor := pos + l_t - 4; - end; - - (* Decode the last quartet: *) - - if l_t > 0 then begin - let q = 3*(l_t / 4 - 1) in - let c0 = next_char() in - let c1 = next_char() in - let c2 = next_char() in - let c3 = next_char() in - - if (c2 = '=' & c3 = '=') or (p_url & c2 = '.' & c3 = '.') then begin - let n0 = decode_char c0 in - let n1 = decode_char c1 in - let x0 = (n0 lsl 2) lor (n1 lsr 4) in - s.[ q ] <- Char.chr x0; - end - else - if (c3 = '=') or (p_url & c3 = '.') then begin - let n0 = decode_char c0 in - let n1 = decode_char c1 in - let n2 = decode_char c2 in - let x0 = (n0 lsl 2) lor (n1 lsr 4) in - let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in - s.[ q ] <- Char.chr x0; - s.[ q+1 ] <- Char.chr x1; - end - else begin - let n0 = decode_char c0 in - let n1 = decode_char c1 in - let n2 = decode_char c2 in - let n3 = decode_char c3 in - let x0 = (n0 lsl 2) lor (n1 lsr 4) in - let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in - let x2 = ((n2 lsl 6) land 0xc0) lor n3 in - s.[ q ] <- Char.chr x0; - s.[ q+1 ] <- Char.chr x1; - s.[ q+2 ] <- Char.chr x2; - end - - end; - - s ;; - - - - let decode ?(pos=0) ?len ?(url_variant=true) ?(accept_spaces=false) s = - let l = match len with None -> String.length s - pos | Some x -> x in - decode_substring s pos l url_variant accept_spaces;; - - let decode_ignore_spaces s = - decode_substring s 0 (String.length s) true true;; - - -end - - - -module QuotedPrintable = struct - - let encode_substring s ~pos ~len = - - if len < 0 or pos < 0 or pos > String.length s then - invalid_arg "Netencoding.QuotedPrintable.encode_substring"; - if pos + len > String.length s then - invalid_arg "Netencoding.QuotedPrintable.encode_substring"; - - let rec count n i = - if i < len then - match String.unsafe_get s (pos+i) with - ('\r'|'\n') -> - count (n+1) (i+1) - | ('\000'..'\031'|'\127'..'\255'| - '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') -> - count (n+3) (i+1) - | ' ' -> - (* Protect spaces only if they occur at the end of a line *) - if i+1 < len then - match s.[pos+i+1] with - ('\r'|'\n') -> - count (n+3) (i+1) - | _ -> - count (n+1) (i+1) - else - count (n+3) (i+1) - | _ -> - count (n+1) (i+1) - else - n - in - - let l = count 0 0 in - let t = String.create l in - - let hexdigit = - [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; - '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in - - let k = ref 0 in - - let add_quoted c = - t.[ !k ] <- '='; - t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 ); - t.[ !k+2 ] <- hexdigit.( Char.code c land 15 ) - in - - for i = 0 to len - 1 do - match String.unsafe_get s i with - ('\r'|'\n') as c -> - String.unsafe_set t !k c; - incr k - | ('\000'..'\031'|'\127'..'\255'| - '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') as c -> - add_quoted c; - k := !k + 3 - | ' ' -> - (* Protect spaces only if they occur at the end of a line *) - if i+1 < len then - match s.[pos+i+1] with - ('\r'|'\n') -> - add_quoted ' '; - k := !k + 3; - | _ -> - String.unsafe_set t !k ' '; - incr k - else begin - add_quoted ' '; - k := !k + 3; - end - | c -> - String.unsafe_set t !k c; - incr k - done; - - t ;; - - - let encode ?(pos=0) ?len s = - let l = match len with None -> String.length s - pos | Some x -> x in - encode_substring s pos l;; - - - - let decode_substring s ~pos ~len = - - if len < 0 or pos < 0 or pos > String.length s then - invalid_arg "Netencoding.QuotedPrintable.decode_substring"; - if pos + len > String.length s then - invalid_arg "Netencoding.QuotedPrintable.decode_substring"; - - let decode_hex c = - match c with - '0'..'9' -> Char.code c - 48 - | 'A'..'F' -> Char.code c - 55 - | 'a'..'f' -> Char.code c - 87 - | _ -> - invalid_arg "Netencoding.QuotedPrintable.decode_substring"; - in - - let rec count n i = - if i < len then - match String.unsafe_get s (pos+i) with - '=' -> - if i+1 = len then - (* A '=' at EOF is ignored *) - count n (i+1) - else - if i+1 < len then - match s.[pos+i+1] with - '\r' -> - (* Official soft break *) - if i+2 < len & s.[pos+i+2] = '\n' then - count n (i+3) - else - count n (i+2) - | '\n' -> - (* Inofficial soft break *) - count n (i+2) - | _ -> - if i+2 >= len then - invalid_arg - "Netencoding.QuotedPrintable.decode_substring"; - let _ = decode_hex s.[pos+i+1] in - let _ = decode_hex s.[pos+i+2] in - count (n+1) (i+3) - else - invalid_arg "Netencoding.QuotedPrintable.decode_substring" - | _ -> - count (n+1) (i+1) - else - n - in - - let l = count 0 0 in - let t = String.create l in - let k = ref pos in - let e = pos + len in - let i = ref 0 in - - while !i < l do - match String.unsafe_get s !k with - '=' -> - if !k+1 = e then - (* A '=' at EOF is ignored *) - () - else - if !k+1 < e then - match s.[!k+1] with - '\r' -> - (* Official soft break *) - if !k+2 < e & s.[!k+2] = '\n' then - k := !k + 3 - else - k := !k + 2 - | '\n' -> - (* Inofficial soft break *) - k := !k + 2 - | _ -> - if !k+2 >= e then - invalid_arg - "Netencoding.QuotedPrintable.decode_substring"; - let x1 = decode_hex s.[!k+1] in - let x2 = decode_hex s.[!k+2] in - t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2); - k := !k + 3; - incr i - else - invalid_arg "Netencoding.QuotedPrintable.decode_substring" - | c -> - String.unsafe_set t !i c; - incr k; - incr i - done; - - t ;; - - - let decode ?(pos=0) ?len s = - let l = match len with None -> String.length s - pos | Some x -> x in - decode_substring s pos l;; - -end - - -module Q = struct - - let encode_substring s ~pos ~len = - - if len < 0 or pos < 0 or pos > String.length s then - invalid_arg "Netencoding.Q.encode_substring"; - if pos + len > String.length s then - invalid_arg "Netencoding.Q.encode_substring"; - - let rec count n i = - if i < len then - match String.unsafe_get s (pos+i) with - | ('A'..'Z'|'a'..'z'|'0'..'9') -> - count (n+1) (i+1) - | _ -> - count (n+3) (i+1) - else - n - in - - let l = count 0 0 in - let t = String.create l in - - let hexdigit = - [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; - '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in - - let k = ref 0 in - - let add_quoted c = - t.[ !k ] <- '='; - t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 ); - t.[ !k+2 ] <- hexdigit.( Char.code c land 15 ) - in - - for i = 0 to len - 1 do - match String.unsafe_get s i with - | ('A'..'Z'|'a'..'z'|'0'..'9') as c -> - String.unsafe_set t !k c; - incr k - | c -> - add_quoted c; - k := !k + 3 - done; - - t ;; - - - let encode ?(pos=0) ?len s = - let l = match len with None -> String.length s - pos | Some x -> x in - encode_substring s pos l;; - - - - let decode_substring s ~pos ~len = - - if len < 0 or pos < 0 or pos > String.length s then - invalid_arg "Netencoding.Q.decode_substring"; - if pos + len > String.length s then - invalid_arg "Netencoding.Q.decode_substring"; - - let decode_hex c = - match c with - '0'..'9' -> Char.code c - 48 - | 'A'..'F' -> Char.code c - 55 - | 'a'..'f' -> Char.code c - 87 - | _ -> - invalid_arg "Netencoding.Q.decode_substring"; - in - - let rec count n i = - if i < len then - match String.unsafe_get s (pos+i) with - '=' -> - if i+2 >= len then - invalid_arg "Netencoding.Q.decode_substring"; - let _ = decode_hex s.[pos+i+1] in - let _ = decode_hex s.[pos+i+2] in - count (n+1) (i+3) - | _ -> (* including '_' *) - count (n+1) (i+1) - else - n - in - - let l = count 0 0 in - let t = String.create l in - let k = ref pos in - let e = pos + len in - let i = ref 0 in - - while !i < l do - match String.unsafe_get s !k with - '=' -> - if !k+2 >= e then - invalid_arg "Netencoding.Q.decode_substring"; - let x1 = decode_hex s.[!k+1] in - let x2 = decode_hex s.[!k+2] in - t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2); - k := !k + 3; - incr i - | '_' -> - String.unsafe_set t !i ' '; - incr k; - incr i - | c -> - String.unsafe_set t !i c; - incr k; - incr i - done; - - t ;; - - - let decode ?(pos=0) ?len s = - let l = match len with None -> String.length s - pos | Some x -> x in - decode_substring s pos l ;; - -end - - -module Url = struct - let hex_digits = - [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; - '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F' |];; - - let to_hex2 k = - (* Converts k to a 2-digit hex string *) - let s = String.create 2 in - s.[0] <- hex_digits.( (k lsr 4) land 15 ); - s.[1] <- hex_digits.( k land 15 ); - s ;; - - - let of_hex1 c = - match c with - ('0'..'9') -> Char.code c - Char.code '0' - | ('A'..'F') -> Char.code c - Char.code 'A' + 10 - | ('a'..'f') -> Char.code c - Char.code 'a' + 10 - | _ -> - raise Not_found ;; - - - - let url_encoding_re = - Str.regexp "[^A-Za-z0-9$_.!*'(),-]";; - - let url_decoding_re = - Str.regexp "\\+\\|%..\\|%.\\|%";; - - - let encode s = - Str.global_substitute - url_encoding_re - (fun r _ -> - match Str.matched_string r s with - " " -> "+" - | x -> - let k = Char.code(x.[0]) in - "%" ^ to_hex2 k - ) - s ;; - - - let decode s = - let l = String.length s in - Str.global_substitute - url_decoding_re - (fun r _ -> - match Str.matched_string r s with - | "+" -> " " - | _ -> - let i = Str.match_beginning r in - (* Assertion: s.[i] = '%' *) - if i+2 >= l then failwith "Cgi.decode"; - let c1 = s.[i+1] in - let c2 = s.[i+2] in - begin - try - let k1 = of_hex1 c1 in - let k2 = of_hex1 c2 in - String.make 1 (Char.chr((k1 lsl 4) lor k2)) - with - Not_found -> - failwith "Cgi.decode" - end - ) - s ;; - -end - - -module Html = struct - - let eref_re = - Str.regexp - "&\\(#\\([0-9]+\\);\\|\\([a-zA-Z]+\\);\\)" ;; - let unsafe_re = Str.regexp "[<>&\"\000-\008\011-\012\014-\031\127-\255]" ;; - - let etable = - [ "lt", "<"; - "gt", ">"; - "amp", "&"; - "quot", "\""; - (* Note: " is new in HTML-4.0, but it has been widely used - * much earlier. - *) - "nbsp", "\160"; - "iexcl", "\161"; - "cent", "\162"; - "pound", "\163"; - "curren", "\164"; - "yen", "\165"; - "brvbar", "\166"; - "sect", "\167"; - "uml", "\168"; - "copy", "\169"; - "ordf", "\170"; - "laquo", "\171"; - "not", "\172"; - "shy", "\173"; - "reg", "\174"; - "macr", "\175"; - "deg", "\176"; - "plusmn", "\177"; - "sup2", "\178"; - "sup3", "\179"; - "acute", "\180"; - "micro", "\181"; - "para", "\182"; - "middot", "\183"; - "cedil", "\184"; - "sup1", "\185"; - "ordm", "\186"; - "raquo", "\187"; - "frac14", "\188"; - "frac12", "\189"; - "frac34", "\190"; - "iquest", "\191"; - "Agrave", "\192"; - "Aacute", "\193"; - "Acirc", "\194"; - "Atilde", "\195"; - "Auml", "\196"; - "Aring", "\197"; - "AElig", "\198"; - "Ccedil", "\199"; - "Egrave", "\200"; - "Eacute", "\201"; - "Ecirc", "\202"; - "Euml", "\203"; - "Igrave", "\204"; - "Iacute", "\205"; - "Icirc", "\206"; - "Iuml", "\207"; - "ETH", "\208"; - "Ntilde", "\209"; - "Ograve", "\210"; - "Oacute", "\211"; - "Ocirc", "\212"; - "Otilde", "\213"; - "Ouml", "\214"; - "times", "\215"; - "Oslash", "\216"; - "Ugrave", "\217"; - "Uacute", "\218"; - "Ucirc", "\219"; - "Uuml", "\220"; - "Yacute", "\221"; - "THORN", "\222"; - "szlig", "\223"; - "agrave", "\224"; - "aacute", "\225"; - "acirc", "\226"; - "atilde", "\227"; - "auml", "\228"; - "aring", "\229"; - "aelig", "\230"; - "ccedil", "\231"; - "egrave", "\232"; - "eacute", "\233"; - "ecirc", "\234"; - "euml", "\235"; - "igrave", "\236"; - "iacute", "\237"; - "icirc", "\238"; - "iuml", "\239"; - "eth", "\240"; - "ntilde", "\241"; - "ograve", "\242"; - "oacute", "\243"; - "ocirc", "\244"; - "otilde", "\245"; - "ouml", "\246"; - "divide", "\247"; - "oslash", "\248"; - "ugrave", "\249"; - "uacute", "\250"; - "ucirc", "\251"; - "uuml", "\252"; - "yacute", "\253"; - "thorn", "\254"; - "yuml", "\255"; - ] ;; - - let quick_etable = - let ht = Hashtbl.create 50 in - List.iter (fun (name,value) -> Hashtbl.add ht name value) etable; - (* Entities to be decoded, but that must not be encoded: *) - Hashtbl.add ht "apos" "'"; (* used in XML documents *) - ht ;; - - let rev_etable = - let a = Array.create 256 "" in - List.iter (fun (name,value) -> - a.(Char.code(value.[0])) <- "&" ^ name ^ ";") etable; - for i = 0 to 8 do - a.(i) <- "&#" ^ string_of_int i ^ ";" - done; - for i = 11 to 12 do - a.(i) <- "&#" ^ string_of_int i ^ ";" - done; - for i = 14 to 31 do - a.(i) <- "&#" ^ string_of_int i ^ ";" - done; - for i = 127 to 159 do - a.(i) <- "&#" ^ string_of_int i ^ ";" - done; - a ;; - - let decode_to_latin1 s = - Str.global_substitute - eref_re - (fun r _ -> - let t = Str.matched_string r s in - try - let n = int_of_string(Str.matched_group r 2 s) in - if n < 256 then - String.make 1 (Char.chr n) - else - t - with - Not_found -> - try - let name = Str.matched_group r 3 s in - try - Hashtbl.find quick_etable name - with - Not_found -> - t - with - Not_found -> assert false - ) - s ;; - - let encode_from_latin1 s = - Str.global_substitute - unsafe_re - (fun r _ -> - let t = Str.matched_string r s in - let i = Char.code (t.[0]) in - rev_etable.(i) - ) - s ;; -end - - - -(* ====================================================================== - * History: - * - * $Log$ - * Revision 1.1 2000/11/17 09:57:27 lpadovan - * Initial revision - * - * Revision 1.5 2000/06/25 22:34:43 gerd - * Added labels to arguments. - * - * Revision 1.4 2000/06/25 21:15:48 gerd - * Checked thread-safety. - * - * Revision 1.3 2000/03/03 17:03:16 gerd - * Q encoding: CR and LF are quoted. - * - * Revision 1.2 2000/03/03 01:08:29 gerd - * Added Netencoding.Html functions. - * - * Revision 1.1 2000/03/02 01:14:48 gerd - * Initial revision. - * - * - *)