2 * ----------------------------------------------------------------------
7 module Str = Netstring_str;;
10 let b64_pattern plus slash =
11 [| 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M';
12 'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z';
13 'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm';
14 'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z';
15 '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9'; plus; slash |];;
18 let rfc_pattern = b64_pattern '+' '/';;
19 let url_pattern = b64_pattern '-' '/';;
21 let encode_with_options b64 equal s pos len linelen crlf =
22 (* encode using "base64".
23 * 'b64': The encoding table, created by b64_pattern.
24 * 'equal': The character that should be used instead of '=' in the original
25 * encoding scheme. Pass '=' to get the original encoding scheme.
26 * s, pos, len, linelen: See the interface description of encode_substring.
28 assert (Array.length b64 = 64);
29 if len < 0 or pos < 0 or pos > String.length s or linelen < 0 then
30 invalid_arg "Netencoding.Base64.encode_with_options";
31 if pos + len > String.length s then
32 invalid_arg "Netencoding.Base64.encode_with_options";
37 let l_t = if len = 0 then 0 else ((len - 1) / 3 + 1) * 4 in
38 (* l_t: length of the result without additional line endings *)
44 if l_t = 0 then 0 else
45 let n_lines = ((l_t - 1) / linelen) + 1 in
46 l_t + n_lines * (if crlf then 2 else 1)
48 (* l_t': length of the result with CRLF or LF characters *)
50 let t = String.make l_t' equal in
53 for k = 0 to len / 3 - 1 do
55 (* p >= pos >= 0: this is evident
56 * p+2 < pos+len <= String.length s:
57 * Because k <= len/3-1
58 * 3*k <= 3*(len/3-1) = len - 3
59 * pos+3*k+2 <= pos + len - 3 + 2 = pos + len - 1 < pos + len
60 * So it is proved that the following unsafe string accesses always
63 let bits = (Char.code (String.unsafe_get s (p)) lsl 16) lor
64 (Char.code (String.unsafe_get s (p+1)) lsl 8) lor
65 (Char.code (String.unsafe_get s (p+2))) in
66 (* Obviously, 'bits' is a 24 bit entity (i.e. bits < 2**24) *)
67 assert(!j + 3 < l_t');
68 String.unsafe_set t !j (Array.unsafe_get b64 ( bits lsr 18));
69 String.unsafe_set t (!j+1) (Array.unsafe_get b64 ((bits lsr 12) land 63));
70 String.unsafe_set t (!j+2) (Array.unsafe_get b64 ((bits lsr 6) land 63));
71 String.unsafe_set t (!j+3) (Array.unsafe_get b64 ( bits land 63));
73 if linelen > 3 then begin
75 if !q + 4 > linelen then begin
76 (* The next 4 characters won't fit on the current line. So insert
92 (* padding if needed: *)
98 let bits = Char.code (s.[pos + len - 1]) in
99 t.[ !j ] <- b64.( bits lsr 2);
100 t.[ !j + 1 ] <- b64.( (bits land 0x03) lsl 4);
104 let bits = (Char.code (s.[pos + len - 2]) lsl 8) lor
105 (Char.code (s.[pos + len - 1])) in
106 t.[ !j ] <- b64.( bits lsr 10);
107 t.[ !j + 1 ] <- b64.((bits lsr 4) land 0x3f);
108 t.[ !j + 2 ] <- b64.((bits lsl 2) land 0x3f);
114 (* If required, add another line end: *)
116 if linelen > 3 & !q > 0 then begin
119 t.[ !j+1 ] <- '\010';
132 let encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s =
133 let l = match len with None -> String.length s - pos | Some x -> x in
134 encode_with_options rfc_pattern '=' s pos l linelength crlf;;
137 let encode_substring s ~pos ~len ~linelength ~crlf =
138 encode_with_options rfc_pattern '=' s pos len linelength crlf;;
141 let url_encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s =
142 let l = match len with None -> String.length s - pos | Some x -> x in
143 encode_with_options url_pattern '.' s pos l linelength crlf;;
146 let decode_substring t ~pos ~len ~url_variant:p_url ~accept_spaces:p_spaces =
147 if len < 0 or pos < 0 or pos > String.length t then
148 invalid_arg "Netencoding.Base64.decode_substring";
149 if pos + len > String.length t then
150 invalid_arg "Netencoding.Base64.decode_substring";
152 (* Compute the number of effective characters l_t in 't';
153 * pad_chars: number of '=' characters at the end of the string.
156 if p_spaces then begin
157 (* Count all non-whitespace characters: *)
160 for i = pos to pos + len - 1 do
161 match String.unsafe_get t i with
162 (' '|'\t'|'\r'|'\n') -> ()
164 if ch = '.' & not p_url then
165 invalid_arg "Netencoding.Base64.decode_substring";
169 invalid_arg "Netencoding.Base64.decode_substring";
170 for j = i+1 to pos + len - 1 do
171 match String.unsafe_get t j with
172 (' '|'\t'|'\r'|'\n'|'.'|'=') -> ()
174 (* Only another '=' or spaces allowed *)
175 invalid_arg "Netencoding.Base64.decode_substring";
179 if !c mod 4 <> 0 then
180 invalid_arg "Netencoding.Base64.decode_substring";
185 ( if len mod 4 <> 0 then
186 invalid_arg "Netencoding.Base64.decode_substring";
188 if String.sub t (len - 2) 2 = "==" or
189 (p_url & String.sub t (len - 2) 2 = "..") then 2
191 if String.sub t (len - 1) 1 = "=" or
192 (p_url & String.sub t (len - 1) 1 = ".") then 1
200 let l_s = (l_t / 4) * 3 - pad_chars in (* sic! *)
201 let s = String.create l_s in
205 'A' .. 'Z' -> Char.code(c) - 65 (* 65 = Char.code 'A' *)
206 | 'a' .. 'z' -> Char.code(c) - 71 (* 71 = Char.code 'a' - 26 *)
207 | '0' .. '9' -> Char.code(c) + 4 (* -4 = Char.code '0' - 52 *)
209 | '-' -> if not p_url then
210 invalid_arg "Netencoding.Base64.decode_substring";
213 | _ -> invalid_arg "Netencoding.Base64.decode_substring";
216 (* Decode all but the last quartet: *)
218 let cursor = ref pos in
219 let rec next_char() =
220 match t.[ !cursor ] with
221 (' '|'\t'|'\r'|'\n') ->
222 if p_spaces then (incr cursor; next_char())
223 else invalid_arg "Netencoding.Base64.decode_substring"
228 if p_spaces then begin
229 for k = 0 to l_t / 4 - 2 do
231 let c0 = next_char() in
232 let c1 = next_char() in
233 let c2 = next_char() in
234 let c3 = next_char() in
235 let n0 = decode_char c0 in
236 let n1 = decode_char c1 in
237 let n2 = decode_char c2 in
238 let n3 = decode_char c3 in
239 let x0 = (n0 lsl 2) lor (n1 lsr 4) in
240 let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
241 let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
242 String.unsafe_set s q (Char.chr x0);
243 String.unsafe_set s (q+1) (Char.chr x1);
244 String.unsafe_set s (q+2) (Char.chr x2);
249 for k = 0 to l_t / 4 - 2 do
252 let c0 = String.unsafe_get t p in
253 let c1 = String.unsafe_get t (p + 1) in
254 let c2 = String.unsafe_get t (p + 2) in
255 let c3 = String.unsafe_get t (p + 3) in
256 let n0 = decode_char c0 in
257 let n1 = decode_char c1 in
258 let n2 = decode_char c2 in
259 let n3 = decode_char c3 in
260 let x0 = (n0 lsl 2) lor (n1 lsr 4) in
261 let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
262 let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
263 String.unsafe_set s q (Char.chr x0);
264 String.unsafe_set s (q+1) (Char.chr x1);
265 String.unsafe_set s (q+2) (Char.chr x2);
267 cursor := pos + l_t - 4;
270 (* Decode the last quartet: *)
272 if l_t > 0 then begin
273 let q = 3*(l_t / 4 - 1) in
274 let c0 = next_char() in
275 let c1 = next_char() in
276 let c2 = next_char() in
277 let c3 = next_char() in
279 if (c2 = '=' & c3 = '=') or (p_url & c2 = '.' & c3 = '.') then begin
280 let n0 = decode_char c0 in
281 let n1 = decode_char c1 in
282 let x0 = (n0 lsl 2) lor (n1 lsr 4) in
283 s.[ q ] <- Char.chr x0;
286 if (c3 = '=') or (p_url & c3 = '.') then begin
287 let n0 = decode_char c0 in
288 let n1 = decode_char c1 in
289 let n2 = decode_char c2 in
290 let x0 = (n0 lsl 2) lor (n1 lsr 4) in
291 let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
292 s.[ q ] <- Char.chr x0;
293 s.[ q+1 ] <- Char.chr x1;
296 let n0 = decode_char c0 in
297 let n1 = decode_char c1 in
298 let n2 = decode_char c2 in
299 let n3 = decode_char c3 in
300 let x0 = (n0 lsl 2) lor (n1 lsr 4) in
301 let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
302 let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
303 s.[ q ] <- Char.chr x0;
304 s.[ q+1 ] <- Char.chr x1;
305 s.[ q+2 ] <- Char.chr x2;
314 let decode ?(pos=0) ?len ?(url_variant=true) ?(accept_spaces=false) s =
315 let l = match len with None -> String.length s - pos | Some x -> x in
316 decode_substring s pos l url_variant accept_spaces;;
318 let decode_ignore_spaces s =
319 decode_substring s 0 (String.length s) true true;;
326 module QuotedPrintable = struct
328 let encode_substring s ~pos ~len =
330 if len < 0 or pos < 0 or pos > String.length s then
331 invalid_arg "Netencoding.QuotedPrintable.encode_substring";
332 if pos + len > String.length s then
333 invalid_arg "Netencoding.QuotedPrintable.encode_substring";
337 match String.unsafe_get s (pos+i) with
340 | ('\000'..'\031'|'\127'..'\255'|
341 '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') ->
344 (* Protect spaces only if they occur at the end of a line *)
346 match s.[pos+i+1] with
360 let t = String.create l in
363 [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
364 '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in
370 t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 );
371 t.[ !k+2 ] <- hexdigit.( Char.code c land 15 )
374 for i = 0 to len - 1 do
375 match String.unsafe_get s i with
377 String.unsafe_set t !k c;
379 | ('\000'..'\031'|'\127'..'\255'|
380 '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') as c ->
384 (* Protect spaces only if they occur at the end of a line *)
386 match s.[pos+i+1] with
391 String.unsafe_set t !k ' ';
398 String.unsafe_set t !k c;
405 let encode ?(pos=0) ?len s =
406 let l = match len with None -> String.length s - pos | Some x -> x in
407 encode_substring s pos l;;
411 let decode_substring s ~pos ~len =
413 if len < 0 or pos < 0 or pos > String.length s then
414 invalid_arg "Netencoding.QuotedPrintable.decode_substring";
415 if pos + len > String.length s then
416 invalid_arg "Netencoding.QuotedPrintable.decode_substring";
420 '0'..'9' -> Char.code c - 48
421 | 'A'..'F' -> Char.code c - 55
422 | 'a'..'f' -> Char.code c - 87
424 invalid_arg "Netencoding.QuotedPrintable.decode_substring";
429 match String.unsafe_get s (pos+i) with
432 (* A '=' at EOF is ignored *)
436 match s.[pos+i+1] with
438 (* Official soft break *)
439 if i+2 < len & s.[pos+i+2] = '\n' then
444 (* Inofficial soft break *)
449 "Netencoding.QuotedPrintable.decode_substring";
450 let _ = decode_hex s.[pos+i+1] in
451 let _ = decode_hex s.[pos+i+2] in
454 invalid_arg "Netencoding.QuotedPrintable.decode_substring"
462 let t = String.create l in
468 match String.unsafe_get s !k with
471 (* A '=' at EOF is ignored *)
477 (* Official soft break *)
478 if !k+2 < e & s.[!k+2] = '\n' then
483 (* Inofficial soft break *)
488 "Netencoding.QuotedPrintable.decode_substring";
489 let x1 = decode_hex s.[!k+1] in
490 let x2 = decode_hex s.[!k+2] in
491 t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2);
495 invalid_arg "Netencoding.QuotedPrintable.decode_substring"
497 String.unsafe_set t !i c;
505 let decode ?(pos=0) ?len s =
506 let l = match len with None -> String.length s - pos | Some x -> x in
507 decode_substring s pos l;;
514 let encode_substring s ~pos ~len =
516 if len < 0 or pos < 0 or pos > String.length s then
517 invalid_arg "Netencoding.Q.encode_substring";
518 if pos + len > String.length s then
519 invalid_arg "Netencoding.Q.encode_substring";
523 match String.unsafe_get s (pos+i) with
524 | ('A'..'Z'|'a'..'z'|'0'..'9') ->
533 let t = String.create l in
536 [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
537 '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in
543 t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 );
544 t.[ !k+2 ] <- hexdigit.( Char.code c land 15 )
547 for i = 0 to len - 1 do
548 match String.unsafe_get s i with
549 | ('A'..'Z'|'a'..'z'|'0'..'9') as c ->
550 String.unsafe_set t !k c;
560 let encode ?(pos=0) ?len s =
561 let l = match len with None -> String.length s - pos | Some x -> x in
562 encode_substring s pos l;;
566 let decode_substring s ~pos ~len =
568 if len < 0 or pos < 0 or pos > String.length s then
569 invalid_arg "Netencoding.Q.decode_substring";
570 if pos + len > String.length s then
571 invalid_arg "Netencoding.Q.decode_substring";
575 '0'..'9' -> Char.code c - 48
576 | 'A'..'F' -> Char.code c - 55
577 | 'a'..'f' -> Char.code c - 87
579 invalid_arg "Netencoding.Q.decode_substring";
584 match String.unsafe_get s (pos+i) with
587 invalid_arg "Netencoding.Q.decode_substring";
588 let _ = decode_hex s.[pos+i+1] in
589 let _ = decode_hex s.[pos+i+2] in
591 | _ -> (* including '_' *)
598 let t = String.create l in
604 match String.unsafe_get s !k with
607 invalid_arg "Netencoding.Q.decode_substring";
608 let x1 = decode_hex s.[!k+1] in
609 let x2 = decode_hex s.[!k+2] in
610 t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2);
614 String.unsafe_set t !i ' ';
618 String.unsafe_set t !i c;
626 let decode ?(pos=0) ?len s =
627 let l = match len with None -> String.length s - pos | Some x -> x in
628 decode_substring s pos l ;;
635 [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
636 '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F' |];;
639 (* Converts k to a 2-digit hex string *)
640 let s = String.create 2 in
641 s.[0] <- hex_digits.( (k lsr 4) land 15 );
642 s.[1] <- hex_digits.( k land 15 );
648 ('0'..'9') -> Char.code c - Char.code '0'
649 | ('A'..'F') -> Char.code c - Char.code 'A' + 10
650 | ('a'..'f') -> Char.code c - Char.code 'a' + 10
656 let url_encoding_re =
657 Str.regexp "[^A-Za-z0-9$_.!*'(),-]";;
659 let url_decoding_re =
660 Str.regexp "\\+\\|%..\\|%.\\|%";;
664 Str.global_substitute
667 match Str.matched_string r s with
670 let k = Char.code(x.[0]) in
677 let l = String.length s in
678 Str.global_substitute
681 match Str.matched_string r s with
684 let i = Str.match_beginning r in
685 (* Assertion: s.[i] = '%' *)
686 if i+2 >= l then failwith "Cgi.decode";
691 let k1 = of_hex1 c1 in
692 let k2 = of_hex1 c2 in
693 String.make 1 (Char.chr((k1 lsl 4) lor k2))
696 failwith "Cgi.decode"
708 "&\\(#\\([0-9]+\\);\\|\\([a-zA-Z]+\\);\\)" ;;
709 let unsafe_re = Str.regexp "[<>&\"\000-\008\011-\012\014-\031\127-\255]" ;;
716 (* Note: " is new in HTML-4.0, but it has been widely used
818 let ht = Hashtbl.create 50 in
819 List.iter (fun (name,value) -> Hashtbl.add ht name value) etable;
820 (* Entities to be decoded, but that must not be encoded: *)
821 Hashtbl.add ht "apos" "'"; (* used in XML documents *)
825 let a = Array.create 256 "" in
826 List.iter (fun (name,value) ->
827 a.(Char.code(value.[0])) <- "&" ^ name ^ ";") etable;
829 a.(i) <- "&#" ^ string_of_int i ^ ";"
832 a.(i) <- "&#" ^ string_of_int i ^ ";"
835 a.(i) <- "&#" ^ string_of_int i ^ ";"
837 for i = 127 to 159 do
838 a.(i) <- "&#" ^ string_of_int i ^ ";"
842 let decode_to_latin1 s =
843 Str.global_substitute
846 let t = Str.matched_string r s in
848 let n = int_of_string(Str.matched_group r 2 s) in
850 String.make 1 (Char.chr n)
856 let name = Str.matched_group r 3 s in
858 Hashtbl.find quick_etable name
863 Not_found -> assert false
867 let encode_from_latin1 s =
868 Str.global_substitute
871 let t = Str.matched_string r s in
872 let i = Char.code (t.[0]) in
880 (* ======================================================================
884 * Revision 1.1 2000/11/17 09:57:27 lpadovan
887 * Revision 1.5 2000/06/25 22:34:43 gerd
888 * Added labels to arguments.
890 * Revision 1.4 2000/06/25 21:15:48 gerd
891 * Checked thread-safety.
893 * Revision 1.3 2000/03/03 17:03:16 gerd
894 * Q encoding: CR and LF are quoted.
896 * Revision 1.2 2000/03/03 01:08:29 gerd
897 * Added Netencoding.Html functions.
899 * Revision 1.1 2000/03/02 01:14:48 gerd