(* $Id$ * ---------------------------------------------------------------------- * *) module Str = Netstring_str;; module Base64 = struct let b64_pattern plus slash = [| 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M'; 'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z'; 'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm'; 'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z'; '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9'; plus; slash |];; let rfc_pattern = b64_pattern '+' '/';; let url_pattern = b64_pattern '-' '/';; let encode_with_options b64 equal s pos len linelen crlf = (* encode using "base64". * 'b64': The encoding table, created by b64_pattern. * 'equal': The character that should be used instead of '=' in the original * encoding scheme. Pass '=' to get the original encoding scheme. * s, pos, len, linelen: See the interface description of encode_substring. *) assert (Array.length b64 = 64); if len < 0 or pos < 0 or pos > String.length s or linelen < 0 then invalid_arg "Netencoding.Base64.encode_with_options"; if pos + len > String.length s then invalid_arg "Netencoding.Base64.encode_with_options"; let linelen = (linelen/4) * 4 in let l_t = if len = 0 then 0 else ((len - 1) / 3 + 1) * 4 in (* l_t: length of the result without additional line endings *) let l_t' = if linelen < 4 then l_t else if l_t = 0 then 0 else let n_lines = ((l_t - 1) / linelen) + 1 in l_t + n_lines * (if crlf then 2 else 1) in (* l_t': length of the result with CRLF or LF characters *) let t = String.make l_t' equal in let j = ref 0 in let q = ref 0 in for k = 0 to len / 3 - 1 do let p = pos + 3*k in (* p >= pos >= 0: this is evident * p+2 < pos+len <= String.length s: * Because k <= len/3-1 * 3*k <= 3*(len/3-1) = len - 3 * pos+3*k+2 <= pos + len - 3 + 2 = pos + len - 1 < pos + len * So it is proved that the following unsafe string accesses always * work. *) let bits = (Char.code (String.unsafe_get s (p)) lsl 16) lor (Char.code (String.unsafe_get s (p+1)) lsl 8) lor (Char.code (String.unsafe_get s (p+2))) in (* Obviously, 'bits' is a 24 bit entity (i.e. bits < 2**24) *) assert(!j + 3 < l_t'); String.unsafe_set t !j (Array.unsafe_get b64 ( bits lsr 18)); String.unsafe_set t (!j+1) (Array.unsafe_get b64 ((bits lsr 12) land 63)); String.unsafe_set t (!j+2) (Array.unsafe_get b64 ((bits lsr 6) land 63)); String.unsafe_set t (!j+3) (Array.unsafe_get b64 ( bits land 63)); j := !j + 4; if linelen > 3 then begin q := !q + 4; if !q + 4 > linelen then begin (* The next 4 characters won't fit on the current line. So insert * a line ending. *) if crlf then begin t.[ !j ] <- '\013'; t.[ !j+1 ] <- '\010'; j := !j + 2; end else begin t.[ !j ] <- '\010'; incr j end; q := 0; end; end; done; (* padding if needed: *) let m = len mod 3 in begin match m with 0 -> () | 1 -> let bits = Char.code (s.[pos + len - 1]) in t.[ !j ] <- b64.( bits lsr 2); t.[ !j + 1 ] <- b64.( (bits land 0x03) lsl 4); j := !j + 4; q := !q + 4; | 2 -> let bits = (Char.code (s.[pos + len - 2]) lsl 8) lor (Char.code (s.[pos + len - 1])) in t.[ !j ] <- b64.( bits lsr 10); t.[ !j + 1 ] <- b64.((bits lsr 4) land 0x3f); t.[ !j + 2 ] <- b64.((bits lsl 2) land 0x3f); j := !j + 4; q := !q + 4; | _ -> assert false end; (* If required, add another line end: *) if linelen > 3 & !q > 0 then begin if crlf then begin t.[ !j ] <- '\013'; t.[ !j+1 ] <- '\010'; j := !j + 2; end else begin t.[ !j ] <- '\010'; incr j end; end; t ;; let encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s = let l = match len with None -> String.length s - pos | Some x -> x in encode_with_options rfc_pattern '=' s pos l linelength crlf;; let encode_substring s ~pos ~len ~linelength ~crlf = encode_with_options rfc_pattern '=' s pos len linelength crlf;; let url_encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s = let l = match len with None -> String.length s - pos | Some x -> x in encode_with_options url_pattern '.' s pos l linelength crlf;; let decode_substring t ~pos ~len ~url_variant:p_url ~accept_spaces:p_spaces = if len < 0 or pos < 0 or pos > String.length t then invalid_arg "Netencoding.Base64.decode_substring"; if pos + len > String.length t then invalid_arg "Netencoding.Base64.decode_substring"; (* Compute the number of effective characters l_t in 't'; * pad_chars: number of '=' characters at the end of the string. *) let l_t, pad_chars = if p_spaces then begin (* Count all non-whitespace characters: *) let c = ref 0 in let p = ref 0 in for i = pos to pos + len - 1 do match String.unsafe_get t i with (' '|'\t'|'\r'|'\n') -> () | ('='|'.') as ch -> if ch = '.' & not p_url then invalid_arg "Netencoding.Base64.decode_substring"; incr c; incr p; if !p > 2 then invalid_arg "Netencoding.Base64.decode_substring"; for j = i+1 to pos + len - 1 do match String.unsafe_get t j with (' '|'\t'|'\r'|'\n'|'.'|'=') -> () | _ -> (* Only another '=' or spaces allowed *) invalid_arg "Netencoding.Base64.decode_substring"; done | _ -> incr c done; if !c mod 4 <> 0 then invalid_arg "Netencoding.Base64.decode_substring"; !c, !p end else len, ( if len mod 4 <> 0 then invalid_arg "Netencoding.Base64.decode_substring"; if len > 0 then ( if String.sub t (len - 2) 2 = "==" or (p_url & String.sub t (len - 2) 2 = "..") then 2 else if String.sub t (len - 1) 1 = "=" or (p_url & String.sub t (len - 1) 1 = ".") then 1 else 0 ) else 0 ) in let l_s = (l_t / 4) * 3 - pad_chars in (* sic! *) let s = String.create l_s in let decode_char c = match c with 'A' .. 'Z' -> Char.code(c) - 65 (* 65 = Char.code 'A' *) | 'a' .. 'z' -> Char.code(c) - 71 (* 71 = Char.code 'a' - 26 *) | '0' .. '9' -> Char.code(c) + 4 (* -4 = Char.code '0' - 52 *) | '+' -> 62 | '-' -> if not p_url then invalid_arg "Netencoding.Base64.decode_substring"; 62 | '/' -> 63 | _ -> invalid_arg "Netencoding.Base64.decode_substring"; in (* Decode all but the last quartet: *) let cursor = ref pos in let rec next_char() = match t.[ !cursor ] with (' '|'\t'|'\r'|'\n') -> if p_spaces then (incr cursor; next_char()) else invalid_arg "Netencoding.Base64.decode_substring" | c -> incr cursor; c in if p_spaces then begin for k = 0 to l_t / 4 - 2 do let q = 3*k in let c0 = next_char() in let c1 = next_char() in let c2 = next_char() in let c3 = next_char() in let n0 = decode_char c0 in let n1 = decode_char c1 in let n2 = decode_char c2 in let n3 = decode_char c3 in let x0 = (n0 lsl 2) lor (n1 lsr 4) in let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in let x2 = ((n2 lsl 6) land 0xc0) lor n3 in String.unsafe_set s q (Char.chr x0); String.unsafe_set s (q+1) (Char.chr x1); String.unsafe_set s (q+2) (Char.chr x2); done; end else begin (* Much faster: *) for k = 0 to l_t / 4 - 2 do let p = pos + 4*k in let q = 3*k in let c0 = String.unsafe_get t p in let c1 = String.unsafe_get t (p + 1) in let c2 = String.unsafe_get t (p + 2) in let c3 = String.unsafe_get t (p + 3) in let n0 = decode_char c0 in let n1 = decode_char c1 in let n2 = decode_char c2 in let n3 = decode_char c3 in let x0 = (n0 lsl 2) lor (n1 lsr 4) in let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in let x2 = ((n2 lsl 6) land 0xc0) lor n3 in String.unsafe_set s q (Char.chr x0); String.unsafe_set s (q+1) (Char.chr x1); String.unsafe_set s (q+2) (Char.chr x2); done; cursor := pos + l_t - 4; end; (* Decode the last quartet: *) if l_t > 0 then begin let q = 3*(l_t / 4 - 1) in let c0 = next_char() in let c1 = next_char() in let c2 = next_char() in let c3 = next_char() in if (c2 = '=' & c3 = '=') or (p_url & c2 = '.' & c3 = '.') then begin let n0 = decode_char c0 in let n1 = decode_char c1 in let x0 = (n0 lsl 2) lor (n1 lsr 4) in s.[ q ] <- Char.chr x0; end else if (c3 = '=') or (p_url & c3 = '.') then begin let n0 = decode_char c0 in let n1 = decode_char c1 in let n2 = decode_char c2 in let x0 = (n0 lsl 2) lor (n1 lsr 4) in let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in s.[ q ] <- Char.chr x0; s.[ q+1 ] <- Char.chr x1; end else begin let n0 = decode_char c0 in let n1 = decode_char c1 in let n2 = decode_char c2 in let n3 = decode_char c3 in let x0 = (n0 lsl 2) lor (n1 lsr 4) in let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in let x2 = ((n2 lsl 6) land 0xc0) lor n3 in s.[ q ] <- Char.chr x0; s.[ q+1 ] <- Char.chr x1; s.[ q+2 ] <- Char.chr x2; end end; s ;; let decode ?(pos=0) ?len ?(url_variant=true) ?(accept_spaces=false) s = let l = match len with None -> String.length s - pos | Some x -> x in decode_substring s pos l url_variant accept_spaces;; let decode_ignore_spaces s = decode_substring s 0 (String.length s) true true;; end module QuotedPrintable = struct let encode_substring s ~pos ~len = if len < 0 or pos < 0 or pos > String.length s then invalid_arg "Netencoding.QuotedPrintable.encode_substring"; if pos + len > String.length s then invalid_arg "Netencoding.QuotedPrintable.encode_substring"; let rec count n i = if i < len then match String.unsafe_get s (pos+i) with ('\r'|'\n') -> count (n+1) (i+1) | ('\000'..'\031'|'\127'..'\255'| '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') -> count (n+3) (i+1) | ' ' -> (* Protect spaces only if they occur at the end of a line *) if i+1 < len then match s.[pos+i+1] with ('\r'|'\n') -> count (n+3) (i+1) | _ -> count (n+1) (i+1) else count (n+3) (i+1) | _ -> count (n+1) (i+1) else n in let l = count 0 0 in let t = String.create l in let hexdigit = [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in let k = ref 0 in let add_quoted c = t.[ !k ] <- '='; t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 ); t.[ !k+2 ] <- hexdigit.( Char.code c land 15 ) in for i = 0 to len - 1 do match String.unsafe_get s i with ('\r'|'\n') as c -> String.unsafe_set t !k c; incr k | ('\000'..'\031'|'\127'..'\255'| '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') as c -> add_quoted c; k := !k + 3 | ' ' -> (* Protect spaces only if they occur at the end of a line *) if i+1 < len then match s.[pos+i+1] with ('\r'|'\n') -> add_quoted ' '; k := !k + 3; | _ -> String.unsafe_set t !k ' '; incr k else begin add_quoted ' '; k := !k + 3; end | c -> String.unsafe_set t !k c; incr k done; t ;; let encode ?(pos=0) ?len s = let l = match len with None -> String.length s - pos | Some x -> x in encode_substring s pos l;; let decode_substring s ~pos ~len = if len < 0 or pos < 0 or pos > String.length s then invalid_arg "Netencoding.QuotedPrintable.decode_substring"; if pos + len > String.length s then invalid_arg "Netencoding.QuotedPrintable.decode_substring"; let decode_hex c = match c with '0'..'9' -> Char.code c - 48 | 'A'..'F' -> Char.code c - 55 | 'a'..'f' -> Char.code c - 87 | _ -> invalid_arg "Netencoding.QuotedPrintable.decode_substring"; in let rec count n i = if i < len then match String.unsafe_get s (pos+i) with '=' -> if i+1 = len then (* A '=' at EOF is ignored *) count n (i+1) else if i+1 < len then match s.[pos+i+1] with '\r' -> (* Official soft break *) if i+2 < len & s.[pos+i+2] = '\n' then count n (i+3) else count n (i+2) | '\n' -> (* Inofficial soft break *) count n (i+2) | _ -> if i+2 >= len then invalid_arg "Netencoding.QuotedPrintable.decode_substring"; let _ = decode_hex s.[pos+i+1] in let _ = decode_hex s.[pos+i+2] in count (n+1) (i+3) else invalid_arg "Netencoding.QuotedPrintable.decode_substring" | _ -> count (n+1) (i+1) else n in let l = count 0 0 in let t = String.create l in let k = ref pos in let e = pos + len in let i = ref 0 in while !i < l do match String.unsafe_get s !k with '=' -> if !k+1 = e then (* A '=' at EOF is ignored *) () else if !k+1 < e then match s.[!k+1] with '\r' -> (* Official soft break *) if !k+2 < e & s.[!k+2] = '\n' then k := !k + 3 else k := !k + 2 | '\n' -> (* Inofficial soft break *) k := !k + 2 | _ -> if !k+2 >= e then invalid_arg "Netencoding.QuotedPrintable.decode_substring"; let x1 = decode_hex s.[!k+1] in let x2 = decode_hex s.[!k+2] in t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2); k := !k + 3; incr i else invalid_arg "Netencoding.QuotedPrintable.decode_substring" | c -> String.unsafe_set t !i c; incr k; incr i done; t ;; let decode ?(pos=0) ?len s = let l = match len with None -> String.length s - pos | Some x -> x in decode_substring s pos l;; end module Q = struct let encode_substring s ~pos ~len = if len < 0 or pos < 0 or pos > String.length s then invalid_arg "Netencoding.Q.encode_substring"; if pos + len > String.length s then invalid_arg "Netencoding.Q.encode_substring"; let rec count n i = if i < len then match String.unsafe_get s (pos+i) with | ('A'..'Z'|'a'..'z'|'0'..'9') -> count (n+1) (i+1) | _ -> count (n+3) (i+1) else n in let l = count 0 0 in let t = String.create l in let hexdigit = [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in let k = ref 0 in let add_quoted c = t.[ !k ] <- '='; t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 ); t.[ !k+2 ] <- hexdigit.( Char.code c land 15 ) in for i = 0 to len - 1 do match String.unsafe_get s i with | ('A'..'Z'|'a'..'z'|'0'..'9') as c -> String.unsafe_set t !k c; incr k | c -> add_quoted c; k := !k + 3 done; t ;; let encode ?(pos=0) ?len s = let l = match len with None -> String.length s - pos | Some x -> x in encode_substring s pos l;; let decode_substring s ~pos ~len = if len < 0 or pos < 0 or pos > String.length s then invalid_arg "Netencoding.Q.decode_substring"; if pos + len > String.length s then invalid_arg "Netencoding.Q.decode_substring"; let decode_hex c = match c with '0'..'9' -> Char.code c - 48 | 'A'..'F' -> Char.code c - 55 | 'a'..'f' -> Char.code c - 87 | _ -> invalid_arg "Netencoding.Q.decode_substring"; in let rec count n i = if i < len then match String.unsafe_get s (pos+i) with '=' -> if i+2 >= len then invalid_arg "Netencoding.Q.decode_substring"; let _ = decode_hex s.[pos+i+1] in let _ = decode_hex s.[pos+i+2] in count (n+1) (i+3) | _ -> (* including '_' *) count (n+1) (i+1) else n in let l = count 0 0 in let t = String.create l in let k = ref pos in let e = pos + len in let i = ref 0 in while !i < l do match String.unsafe_get s !k with '=' -> if !k+2 >= e then invalid_arg "Netencoding.Q.decode_substring"; let x1 = decode_hex s.[!k+1] in let x2 = decode_hex s.[!k+2] in t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2); k := !k + 3; incr i | '_' -> String.unsafe_set t !i ' '; incr k; incr i | c -> String.unsafe_set t !i c; incr k; incr i done; t ;; let decode ?(pos=0) ?len s = let l = match len with None -> String.length s - pos | Some x -> x in decode_substring s pos l ;; end module Url = struct let hex_digits = [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F' |];; let to_hex2 k = (* Converts k to a 2-digit hex string *) let s = String.create 2 in s.[0] <- hex_digits.( (k lsr 4) land 15 ); s.[1] <- hex_digits.( k land 15 ); s ;; let of_hex1 c = match c with ('0'..'9') -> Char.code c - Char.code '0' | ('A'..'F') -> Char.code c - Char.code 'A' + 10 | ('a'..'f') -> Char.code c - Char.code 'a' + 10 | _ -> raise Not_found ;; let url_encoding_re = Str.regexp "[^A-Za-z0-9$_.!*'(),-]";; let url_decoding_re = Str.regexp "\\+\\|%..\\|%.\\|%";; let encode s = Str.global_substitute url_encoding_re (fun r _ -> match Str.matched_string r s with " " -> "+" | x -> let k = Char.code(x.[0]) in "%" ^ to_hex2 k ) s ;; let decode s = let l = String.length s in Str.global_substitute url_decoding_re (fun r _ -> match Str.matched_string r s with | "+" -> " " | _ -> let i = Str.match_beginning r in (* Assertion: s.[i] = '%' *) if i+2 >= l then failwith "Cgi.decode"; let c1 = s.[i+1] in let c2 = s.[i+2] in begin try let k1 = of_hex1 c1 in let k2 = of_hex1 c2 in String.make 1 (Char.chr((k1 lsl 4) lor k2)) with Not_found -> failwith "Cgi.decode" end ) s ;; end module Html = struct let eref_re = Str.regexp "&\\(#\\([0-9]+\\);\\|\\([a-zA-Z]+\\);\\)" ;; let unsafe_re = Str.regexp "[<>&\"\000-\008\011-\012\014-\031\127-\255]" ;; let etable = [ "lt", "<"; "gt", ">"; "amp", "&"; "quot", "\""; (* Note: " is new in HTML-4.0, but it has been widely used * much earlier. *) "nbsp", "\160"; "iexcl", "\161"; "cent", "\162"; "pound", "\163"; "curren", "\164"; "yen", "\165"; "brvbar", "\166"; "sect", "\167"; "uml", "\168"; "copy", "\169"; "ordf", "\170"; "laquo", "\171"; "not", "\172"; "shy", "\173"; "reg", "\174"; "macr", "\175"; "deg", "\176"; "plusmn", "\177"; "sup2", "\178"; "sup3", "\179"; "acute", "\180"; "micro", "\181"; "para", "\182"; "middot", "\183"; "cedil", "\184"; "sup1", "\185"; "ordm", "\186"; "raquo", "\187"; "frac14", "\188"; "frac12", "\189"; "frac34", "\190"; "iquest", "\191"; "Agrave", "\192"; "Aacute", "\193"; "Acirc", "\194"; "Atilde", "\195"; "Auml", "\196"; "Aring", "\197"; "AElig", "\198"; "Ccedil", "\199"; "Egrave", "\200"; "Eacute", "\201"; "Ecirc", "\202"; "Euml", "\203"; "Igrave", "\204"; "Iacute", "\205"; "Icirc", "\206"; "Iuml", "\207"; "ETH", "\208"; "Ntilde", "\209"; "Ograve", "\210"; "Oacute", "\211"; "Ocirc", "\212"; "Otilde", "\213"; "Ouml", "\214"; "times", "\215"; "Oslash", "\216"; "Ugrave", "\217"; "Uacute", "\218"; "Ucirc", "\219"; "Uuml", "\220"; "Yacute", "\221"; "THORN", "\222"; "szlig", "\223"; "agrave", "\224"; "aacute", "\225"; "acirc", "\226"; "atilde", "\227"; "auml", "\228"; "aring", "\229"; "aelig", "\230"; "ccedil", "\231"; "egrave", "\232"; "eacute", "\233"; "ecirc", "\234"; "euml", "\235"; "igrave", "\236"; "iacute", "\237"; "icirc", "\238"; "iuml", "\239"; "eth", "\240"; "ntilde", "\241"; "ograve", "\242"; "oacute", "\243"; "ocirc", "\244"; "otilde", "\245"; "ouml", "\246"; "divide", "\247"; "oslash", "\248"; "ugrave", "\249"; "uacute", "\250"; "ucirc", "\251"; "uuml", "\252"; "yacute", "\253"; "thorn", "\254"; "yuml", "\255"; ] ;; let quick_etable = let ht = Hashtbl.create 50 in List.iter (fun (name,value) -> Hashtbl.add ht name value) etable; (* Entities to be decoded, but that must not be encoded: *) Hashtbl.add ht "apos" "'"; (* used in XML documents *) ht ;; let rev_etable = let a = Array.create 256 "" in List.iter (fun (name,value) -> a.(Char.code(value.[0])) <- "&" ^ name ^ ";") etable; for i = 0 to 8 do a.(i) <- "&#" ^ string_of_int i ^ ";" done; for i = 11 to 12 do a.(i) <- "&#" ^ string_of_int i ^ ";" done; for i = 14 to 31 do a.(i) <- "&#" ^ string_of_int i ^ ";" done; for i = 127 to 159 do a.(i) <- "&#" ^ string_of_int i ^ ";" done; a ;; let decode_to_latin1 s = Str.global_substitute eref_re (fun r _ -> let t = Str.matched_string r s in try let n = int_of_string(Str.matched_group r 2 s) in if n < 256 then String.make 1 (Char.chr n) else t with Not_found -> try let name = Str.matched_group r 3 s in try Hashtbl.find quick_etable name with Not_found -> t with Not_found -> assert false ) s ;; let encode_from_latin1 s = Str.global_substitute unsafe_re (fun r _ -> let t = Str.matched_string r s in let i = Char.code (t.[0]) in rev_etable.(i) ) s ;; end (* ====================================================================== * History: * * $Log$ * Revision 1.1 2000/11/17 09:57:27 lpadovan * Initial revision * * Revision 1.5 2000/06/25 22:34:43 gerd * Added labels to arguments. * * Revision 1.4 2000/06/25 21:15:48 gerd * Checked thread-safety. * * Revision 1.3 2000/03/03 17:03:16 gerd * Q encoding: CR and LF are quoted. * * Revision 1.2 2000/03/03 01:08:29 gerd * Added Netencoding.Html functions. * * Revision 1.1 2000/03/02 01:14:48 gerd * Initial revision. * * *)