--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+module Str = Netstring_str;;
+
+module Base64 = struct
+ let b64_pattern plus slash =
+ [| 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M';
+ 'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z';
+ 'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm';
+ 'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z';
+ '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9'; plus; slash |];;
+
+
+ let rfc_pattern = b64_pattern '+' '/';;
+ let url_pattern = b64_pattern '-' '/';;
+
+ let encode_with_options b64 equal s pos len linelen crlf =
+ (* encode using "base64".
+ * 'b64': The encoding table, created by b64_pattern.
+ * 'equal': The character that should be used instead of '=' in the original
+ * encoding scheme. Pass '=' to get the original encoding scheme.
+ * s, pos, len, linelen: See the interface description of encode_substring.
+ *)
+ assert (Array.length b64 = 64);
+ if len < 0 or pos < 0 or pos > String.length s or linelen < 0 then
+ invalid_arg "Netencoding.Base64.encode_with_options";
+ if pos + len > String.length s then
+ invalid_arg "Netencoding.Base64.encode_with_options";
+
+ let linelen =
+ (linelen/4) * 4 in
+
+ let l_t = if len = 0 then 0 else ((len - 1) / 3 + 1) * 4 in
+ (* l_t: length of the result without additional line endings *)
+
+ let l_t' =
+ if linelen < 4 then
+ l_t
+ else
+ if l_t = 0 then 0 else
+ let n_lines = ((l_t - 1) / linelen) + 1 in
+ l_t + n_lines * (if crlf then 2 else 1)
+ in
+ (* l_t': length of the result with CRLF or LF characters *)
+
+ let t = String.make l_t' equal in
+ let j = ref 0 in
+ let q = ref 0 in
+ for k = 0 to len / 3 - 1 do
+ let p = pos + 3*k in
+ (* p >= pos >= 0: this is evident
+ * p+2 < pos+len <= String.length s:
+ * Because k <= len/3-1
+ * 3*k <= 3*(len/3-1) = len - 3
+ * pos+3*k+2 <= pos + len - 3 + 2 = pos + len - 1 < pos + len
+ * So it is proved that the following unsafe string accesses always
+ * work.
+ *)
+ let bits = (Char.code (String.unsafe_get s (p)) lsl 16) lor
+ (Char.code (String.unsafe_get s (p+1)) lsl 8) lor
+ (Char.code (String.unsafe_get s (p+2))) in
+ (* Obviously, 'bits' is a 24 bit entity (i.e. bits < 2**24) *)
+ assert(!j + 3 < l_t');
+ String.unsafe_set t !j (Array.unsafe_get b64 ( bits lsr 18));
+ String.unsafe_set t (!j+1) (Array.unsafe_get b64 ((bits lsr 12) land 63));
+ String.unsafe_set t (!j+2) (Array.unsafe_get b64 ((bits lsr 6) land 63));
+ String.unsafe_set t (!j+3) (Array.unsafe_get b64 ( bits land 63));
+ j := !j + 4;
+ if linelen > 3 then begin
+ q := !q + 4;
+ if !q + 4 > linelen then begin
+ (* The next 4 characters won't fit on the current line. So insert
+ * a line ending.
+ *)
+ if crlf then begin
+ t.[ !j ] <- '\013';
+ t.[ !j+1 ] <- '\010';
+ j := !j + 2;
+ end
+ else begin
+ t.[ !j ] <- '\010';
+ incr j
+ end;
+ q := 0;
+ end;
+ end;
+ done;
+ (* padding if needed: *)
+ let m = len mod 3 in
+ begin
+ match m with
+ 0 -> ()
+ | 1 ->
+ let bits = Char.code (s.[pos + len - 1]) in
+ t.[ !j ] <- b64.( bits lsr 2);
+ t.[ !j + 1 ] <- b64.( (bits land 0x03) lsl 4);
+ j := !j + 4;
+ q := !q + 4;
+ | 2 ->
+ let bits = (Char.code (s.[pos + len - 2]) lsl 8) lor
+ (Char.code (s.[pos + len - 1])) in
+ t.[ !j ] <- b64.( bits lsr 10);
+ t.[ !j + 1 ] <- b64.((bits lsr 4) land 0x3f);
+ t.[ !j + 2 ] <- b64.((bits lsl 2) land 0x3f);
+ j := !j + 4;
+ q := !q + 4;
+ | _ -> assert false
+ end;
+
+ (* If required, add another line end: *)
+
+ if linelen > 3 & !q > 0 then begin
+ if crlf then begin
+ t.[ !j ] <- '\013';
+ t.[ !j+1 ] <- '\010';
+ j := !j + 2;
+ end
+ else begin
+ t.[ !j ] <- '\010';
+ incr j
+ end;
+ end;
+
+ t ;;
+
+
+
+ let encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s =
+ let l = match len with None -> String.length s - pos | Some x -> x in
+ encode_with_options rfc_pattern '=' s pos l linelength crlf;;
+
+
+ let encode_substring s ~pos ~len ~linelength ~crlf =
+ encode_with_options rfc_pattern '=' s pos len linelength crlf;;
+
+
+ let url_encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s =
+ let l = match len with None -> String.length s - pos | Some x -> x in
+ encode_with_options url_pattern '.' s pos l linelength crlf;;
+
+
+ let decode_substring t ~pos ~len ~url_variant:p_url ~accept_spaces:p_spaces =
+ if len < 0 or pos < 0 or pos > String.length t then
+ invalid_arg "Netencoding.Base64.decode_substring";
+ if pos + len > String.length t then
+ invalid_arg "Netencoding.Base64.decode_substring";
+
+ (* Compute the number of effective characters l_t in 't';
+ * pad_chars: number of '=' characters at the end of the string.
+ *)
+ let l_t, pad_chars =
+ if p_spaces then begin
+ (* Count all non-whitespace characters: *)
+ let c = ref 0 in
+ let p = ref 0 in
+ for i = pos to pos + len - 1 do
+ match String.unsafe_get t i with
+ (' '|'\t'|'\r'|'\n') -> ()
+ | ('='|'.') as ch ->
+ if ch = '.' & not p_url then
+ invalid_arg "Netencoding.Base64.decode_substring";
+ incr c;
+ incr p;
+ if !p > 2 then
+ invalid_arg "Netencoding.Base64.decode_substring";
+ for j = i+1 to pos + len - 1 do
+ match String.unsafe_get t j with
+ (' '|'\t'|'\r'|'\n'|'.'|'=') -> ()
+ | _ ->
+ (* Only another '=' or spaces allowed *)
+ invalid_arg "Netencoding.Base64.decode_substring";
+ done
+ | _ -> incr c
+ done;
+ if !c mod 4 <> 0 then
+ invalid_arg "Netencoding.Base64.decode_substring";
+ !c, !p
+ end
+ else
+ len,
+ ( if len mod 4 <> 0 then
+ invalid_arg "Netencoding.Base64.decode_substring";
+ if len > 0 then (
+ if String.sub t (len - 2) 2 = "==" or
+ (p_url & String.sub t (len - 2) 2 = "..") then 2
+ else
+ if String.sub t (len - 1) 1 = "=" or
+ (p_url & String.sub t (len - 1) 1 = ".") then 1
+ else
+ 0
+ )
+ else 0
+ )
+ in
+
+ let l_s = (l_t / 4) * 3 - pad_chars in (* sic! *)
+ let s = String.create l_s in
+
+ let decode_char c =
+ match c with
+ 'A' .. 'Z' -> Char.code(c) - 65 (* 65 = Char.code 'A' *)
+ | 'a' .. 'z' -> Char.code(c) - 71 (* 71 = Char.code 'a' - 26 *)
+ | '0' .. '9' -> Char.code(c) + 4 (* -4 = Char.code '0' - 52 *)
+ | '+' -> 62
+ | '-' -> if not p_url then
+ invalid_arg "Netencoding.Base64.decode_substring";
+ 62
+ | '/' -> 63
+ | _ -> invalid_arg "Netencoding.Base64.decode_substring";
+ in
+
+ (* Decode all but the last quartet: *)
+
+ let cursor = ref pos in
+ let rec next_char() =
+ match t.[ !cursor ] with
+ (' '|'\t'|'\r'|'\n') ->
+ if p_spaces then (incr cursor; next_char())
+ else invalid_arg "Netencoding.Base64.decode_substring"
+ | c ->
+ incr cursor; c
+ in
+
+ if p_spaces then begin
+ for k = 0 to l_t / 4 - 2 do
+ let q = 3*k in
+ let c0 = next_char() in
+ let c1 = next_char() in
+ let c2 = next_char() in
+ let c3 = next_char() in
+ let n0 = decode_char c0 in
+ let n1 = decode_char c1 in
+ let n2 = decode_char c2 in
+ let n3 = decode_char c3 in
+ let x0 = (n0 lsl 2) lor (n1 lsr 4) in
+ let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
+ let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
+ String.unsafe_set s q (Char.chr x0);
+ String.unsafe_set s (q+1) (Char.chr x1);
+ String.unsafe_set s (q+2) (Char.chr x2);
+ done;
+ end
+ else begin
+ (* Much faster: *)
+ for k = 0 to l_t / 4 - 2 do
+ let p = pos + 4*k in
+ let q = 3*k in
+ let c0 = String.unsafe_get t p in
+ let c1 = String.unsafe_get t (p + 1) in
+ let c2 = String.unsafe_get t (p + 2) in
+ let c3 = String.unsafe_get t (p + 3) in
+ let n0 = decode_char c0 in
+ let n1 = decode_char c1 in
+ let n2 = decode_char c2 in
+ let n3 = decode_char c3 in
+ let x0 = (n0 lsl 2) lor (n1 lsr 4) in
+ let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
+ let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
+ String.unsafe_set s q (Char.chr x0);
+ String.unsafe_set s (q+1) (Char.chr x1);
+ String.unsafe_set s (q+2) (Char.chr x2);
+ done;
+ cursor := pos + l_t - 4;
+ end;
+
+ (* Decode the last quartet: *)
+
+ if l_t > 0 then begin
+ let q = 3*(l_t / 4 - 1) in
+ let c0 = next_char() in
+ let c1 = next_char() in
+ let c2 = next_char() in
+ let c3 = next_char() in
+
+ if (c2 = '=' & c3 = '=') or (p_url & c2 = '.' & c3 = '.') then begin
+ let n0 = decode_char c0 in
+ let n1 = decode_char c1 in
+ let x0 = (n0 lsl 2) lor (n1 lsr 4) in
+ s.[ q ] <- Char.chr x0;
+ end
+ else
+ if (c3 = '=') or (p_url & c3 = '.') then begin
+ let n0 = decode_char c0 in
+ let n1 = decode_char c1 in
+ let n2 = decode_char c2 in
+ let x0 = (n0 lsl 2) lor (n1 lsr 4) in
+ let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
+ s.[ q ] <- Char.chr x0;
+ s.[ q+1 ] <- Char.chr x1;
+ end
+ else begin
+ let n0 = decode_char c0 in
+ let n1 = decode_char c1 in
+ let n2 = decode_char c2 in
+ let n3 = decode_char c3 in
+ let x0 = (n0 lsl 2) lor (n1 lsr 4) in
+ let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
+ let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
+ s.[ q ] <- Char.chr x0;
+ s.[ q+1 ] <- Char.chr x1;
+ s.[ q+2 ] <- Char.chr x2;
+ end
+
+ end;
+
+ s ;;
+
+
+
+ let decode ?(pos=0) ?len ?(url_variant=true) ?(accept_spaces=false) s =
+ let l = match len with None -> String.length s - pos | Some x -> x in
+ decode_substring s pos l url_variant accept_spaces;;
+
+ let decode_ignore_spaces s =
+ decode_substring s 0 (String.length s) true true;;
+
+
+end
+
+
+
+module QuotedPrintable = struct
+
+ let encode_substring s ~pos ~len =
+
+ if len < 0 or pos < 0 or pos > String.length s then
+ invalid_arg "Netencoding.QuotedPrintable.encode_substring";
+ if pos + len > String.length s then
+ invalid_arg "Netencoding.QuotedPrintable.encode_substring";
+
+ let rec count n i =
+ if i < len then
+ match String.unsafe_get s (pos+i) with
+ ('\r'|'\n') ->
+ count (n+1) (i+1)
+ | ('\000'..'\031'|'\127'..'\255'|
+ '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') ->
+ count (n+3) (i+1)
+ | ' ' ->
+ (* Protect spaces only if they occur at the end of a line *)
+ if i+1 < len then
+ match s.[pos+i+1] with
+ ('\r'|'\n') ->
+ count (n+3) (i+1)
+ | _ ->
+ count (n+1) (i+1)
+ else
+ count (n+3) (i+1)
+ | _ ->
+ count (n+1) (i+1)
+ else
+ n
+ in
+
+ let l = count 0 0 in
+ let t = String.create l in
+
+ let hexdigit =
+ [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
+ '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in
+
+ let k = ref 0 in
+
+ let add_quoted c =
+ t.[ !k ] <- '=';
+ t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 );
+ t.[ !k+2 ] <- hexdigit.( Char.code c land 15 )
+ in
+
+ for i = 0 to len - 1 do
+ match String.unsafe_get s i with
+ ('\r'|'\n') as c ->
+ String.unsafe_set t !k c;
+ incr k
+ | ('\000'..'\031'|'\127'..'\255'|
+ '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') as c ->
+ add_quoted c;
+ k := !k + 3
+ | ' ' ->
+ (* Protect spaces only if they occur at the end of a line *)
+ if i+1 < len then
+ match s.[pos+i+1] with
+ ('\r'|'\n') ->
+ add_quoted ' ';
+ k := !k + 3;
+ | _ ->
+ String.unsafe_set t !k ' ';
+ incr k
+ else begin
+ add_quoted ' ';
+ k := !k + 3;
+ end
+ | c ->
+ String.unsafe_set t !k c;
+ incr k
+ done;
+
+ t ;;
+
+
+ let encode ?(pos=0) ?len s =
+ let l = match len with None -> String.length s - pos | Some x -> x in
+ encode_substring s pos l;;
+
+
+
+ let decode_substring s ~pos ~len =
+
+ if len < 0 or pos < 0 or pos > String.length s then
+ invalid_arg "Netencoding.QuotedPrintable.decode_substring";
+ if pos + len > String.length s then
+ invalid_arg "Netencoding.QuotedPrintable.decode_substring";
+
+ let decode_hex c =
+ match c with
+ '0'..'9' -> Char.code c - 48
+ | 'A'..'F' -> Char.code c - 55
+ | 'a'..'f' -> Char.code c - 87
+ | _ ->
+ invalid_arg "Netencoding.QuotedPrintable.decode_substring";
+ in
+
+ let rec count n i =
+ if i < len then
+ match String.unsafe_get s (pos+i) with
+ '=' ->
+ if i+1 = len then
+ (* A '=' at EOF is ignored *)
+ count n (i+1)
+ else
+ if i+1 < len then
+ match s.[pos+i+1] with
+ '\r' ->
+ (* Official soft break *)
+ if i+2 < len & s.[pos+i+2] = '\n' then
+ count n (i+3)
+ else
+ count n (i+2)
+ | '\n' ->
+ (* Inofficial soft break *)
+ count n (i+2)
+ | _ ->
+ if i+2 >= len then
+ invalid_arg
+ "Netencoding.QuotedPrintable.decode_substring";
+ let _ = decode_hex s.[pos+i+1] in
+ let _ = decode_hex s.[pos+i+2] in
+ count (n+1) (i+3)
+ else
+ invalid_arg "Netencoding.QuotedPrintable.decode_substring"
+ | _ ->
+ count (n+1) (i+1)
+ else
+ n
+ in
+
+ let l = count 0 0 in
+ let t = String.create l in
+ let k = ref pos in
+ let e = pos + len in
+ let i = ref 0 in
+
+ while !i < l do
+ match String.unsafe_get s !k with
+ '=' ->
+ if !k+1 = e then
+ (* A '=' at EOF is ignored *)
+ ()
+ else
+ if !k+1 < e then
+ match s.[!k+1] with
+ '\r' ->
+ (* Official soft break *)
+ if !k+2 < e & s.[!k+2] = '\n' then
+ k := !k + 3
+ else
+ k := !k + 2
+ | '\n' ->
+ (* Inofficial soft break *)
+ k := !k + 2
+ | _ ->
+ if !k+2 >= e then
+ invalid_arg
+ "Netencoding.QuotedPrintable.decode_substring";
+ let x1 = decode_hex s.[!k+1] in
+ let x2 = decode_hex s.[!k+2] in
+ t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2);
+ k := !k + 3;
+ incr i
+ else
+ invalid_arg "Netencoding.QuotedPrintable.decode_substring"
+ | c ->
+ String.unsafe_set t !i c;
+ incr k;
+ incr i
+ done;
+
+ t ;;
+
+
+ let decode ?(pos=0) ?len s =
+ let l = match len with None -> String.length s - pos | Some x -> x in
+ decode_substring s pos l;;
+
+end
+
+
+module Q = struct
+
+ let encode_substring s ~pos ~len =
+
+ if len < 0 or pos < 0 or pos > String.length s then
+ invalid_arg "Netencoding.Q.encode_substring";
+ if pos + len > String.length s then
+ invalid_arg "Netencoding.Q.encode_substring";
+
+ let rec count n i =
+ if i < len then
+ match String.unsafe_get s (pos+i) with
+ | ('A'..'Z'|'a'..'z'|'0'..'9') ->
+ count (n+1) (i+1)
+ | _ ->
+ count (n+3) (i+1)
+ else
+ n
+ in
+
+ let l = count 0 0 in
+ let t = String.create l in
+
+ let hexdigit =
+ [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
+ '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in
+
+ let k = ref 0 in
+
+ let add_quoted c =
+ t.[ !k ] <- '=';
+ t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 );
+ t.[ !k+2 ] <- hexdigit.( Char.code c land 15 )
+ in
+
+ for i = 0 to len - 1 do
+ match String.unsafe_get s i with
+ | ('A'..'Z'|'a'..'z'|'0'..'9') as c ->
+ String.unsafe_set t !k c;
+ incr k
+ | c ->
+ add_quoted c;
+ k := !k + 3
+ done;
+
+ t ;;
+
+
+ let encode ?(pos=0) ?len s =
+ let l = match len with None -> String.length s - pos | Some x -> x in
+ encode_substring s pos l;;
+
+
+
+ let decode_substring s ~pos ~len =
+
+ if len < 0 or pos < 0 or pos > String.length s then
+ invalid_arg "Netencoding.Q.decode_substring";
+ if pos + len > String.length s then
+ invalid_arg "Netencoding.Q.decode_substring";
+
+ let decode_hex c =
+ match c with
+ '0'..'9' -> Char.code c - 48
+ | 'A'..'F' -> Char.code c - 55
+ | 'a'..'f' -> Char.code c - 87
+ | _ ->
+ invalid_arg "Netencoding.Q.decode_substring";
+ in
+
+ let rec count n i =
+ if i < len then
+ match String.unsafe_get s (pos+i) with
+ '=' ->
+ if i+2 >= len then
+ invalid_arg "Netencoding.Q.decode_substring";
+ let _ = decode_hex s.[pos+i+1] in
+ let _ = decode_hex s.[pos+i+2] in
+ count (n+1) (i+3)
+ | _ -> (* including '_' *)
+ count (n+1) (i+1)
+ else
+ n
+ in
+
+ let l = count 0 0 in
+ let t = String.create l in
+ let k = ref pos in
+ let e = pos + len in
+ let i = ref 0 in
+
+ while !i < l do
+ match String.unsafe_get s !k with
+ '=' ->
+ if !k+2 >= e then
+ invalid_arg "Netencoding.Q.decode_substring";
+ let x1 = decode_hex s.[!k+1] in
+ let x2 = decode_hex s.[!k+2] in
+ t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2);
+ k := !k + 3;
+ incr i
+ | '_' ->
+ String.unsafe_set t !i ' ';
+ incr k;
+ incr i
+ | c ->
+ String.unsafe_set t !i c;
+ incr k;
+ incr i
+ done;
+
+ t ;;
+
+
+ let decode ?(pos=0) ?len s =
+ let l = match len with None -> String.length s - pos | Some x -> x in
+ decode_substring s pos l ;;
+
+end
+
+
+module Url = struct
+ let hex_digits =
+ [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
+ '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F' |];;
+
+ let to_hex2 k =
+ (* Converts k to a 2-digit hex string *)
+ let s = String.create 2 in
+ s.[0] <- hex_digits.( (k lsr 4) land 15 );
+ s.[1] <- hex_digits.( k land 15 );
+ s ;;
+
+
+ let of_hex1 c =
+ match c with
+ ('0'..'9') -> Char.code c - Char.code '0'
+ | ('A'..'F') -> Char.code c - Char.code 'A' + 10
+ | ('a'..'f') -> Char.code c - Char.code 'a' + 10
+ | _ ->
+ raise Not_found ;;
+
+
+
+ let url_encoding_re =
+ Str.regexp "[^A-Za-z0-9$_.!*'(),-]";;
+
+ let url_decoding_re =
+ Str.regexp "\\+\\|%..\\|%.\\|%";;
+
+
+ let encode s =
+ Str.global_substitute
+ url_encoding_re
+ (fun r _ ->
+ match Str.matched_string r s with
+ " " -> "+"
+ | x ->
+ let k = Char.code(x.[0]) in
+ "%" ^ to_hex2 k
+ )
+ s ;;
+
+
+ let decode s =
+ let l = String.length s in
+ Str.global_substitute
+ url_decoding_re
+ (fun r _ ->
+ match Str.matched_string r s with
+ | "+" -> " "
+ | _ ->
+ let i = Str.match_beginning r in
+ (* Assertion: s.[i] = '%' *)
+ if i+2 >= l then failwith "Cgi.decode";
+ let c1 = s.[i+1] in
+ let c2 = s.[i+2] in
+ begin
+ try
+ let k1 = of_hex1 c1 in
+ let k2 = of_hex1 c2 in
+ String.make 1 (Char.chr((k1 lsl 4) lor k2))
+ with
+ Not_found ->
+ failwith "Cgi.decode"
+ end
+ )
+ s ;;
+
+end
+
+
+module Html = struct
+
+ let eref_re =
+ Str.regexp
+ "&\\(#\\([0-9]+\\);\\|\\([a-zA-Z]+\\);\\)" ;;
+ let unsafe_re = Str.regexp "[<>&\"\000-\008\011-\012\014-\031\127-\255]" ;;
+
+ let etable =
+ [ "lt", "<";
+ "gt", ">";
+ "amp", "&";
+ "quot", "\"";
+ (* Note: " is new in HTML-4.0, but it has been widely used
+ * much earlier.
+ *)
+ "nbsp", "\160";
+ "iexcl", "\161";
+ "cent", "\162";
+ "pound", "\163";
+ "curren", "\164";
+ "yen", "\165";
+ "brvbar", "\166";
+ "sect", "\167";
+ "uml", "\168";
+ "copy", "\169";
+ "ordf", "\170";
+ "laquo", "\171";
+ "not", "\172";
+ "shy", "\173";
+ "reg", "\174";
+ "macr", "\175";
+ "deg", "\176";
+ "plusmn", "\177";
+ "sup2", "\178";
+ "sup3", "\179";
+ "acute", "\180";
+ "micro", "\181";
+ "para", "\182";
+ "middot", "\183";
+ "cedil", "\184";
+ "sup1", "\185";
+ "ordm", "\186";
+ "raquo", "\187";
+ "frac14", "\188";
+ "frac12", "\189";
+ "frac34", "\190";
+ "iquest", "\191";
+ "Agrave", "\192";
+ "Aacute", "\193";
+ "Acirc", "\194";
+ "Atilde", "\195";
+ "Auml", "\196";
+ "Aring", "\197";
+ "AElig", "\198";
+ "Ccedil", "\199";
+ "Egrave", "\200";
+ "Eacute", "\201";
+ "Ecirc", "\202";
+ "Euml", "\203";
+ "Igrave", "\204";
+ "Iacute", "\205";
+ "Icirc", "\206";
+ "Iuml", "\207";
+ "ETH", "\208";
+ "Ntilde", "\209";
+ "Ograve", "\210";
+ "Oacute", "\211";
+ "Ocirc", "\212";
+ "Otilde", "\213";
+ "Ouml", "\214";
+ "times", "\215";
+ "Oslash", "\216";
+ "Ugrave", "\217";
+ "Uacute", "\218";
+ "Ucirc", "\219";
+ "Uuml", "\220";
+ "Yacute", "\221";
+ "THORN", "\222";
+ "szlig", "\223";
+ "agrave", "\224";
+ "aacute", "\225";
+ "acirc", "\226";
+ "atilde", "\227";
+ "auml", "\228";
+ "aring", "\229";
+ "aelig", "\230";
+ "ccedil", "\231";
+ "egrave", "\232";
+ "eacute", "\233";
+ "ecirc", "\234";
+ "euml", "\235";
+ "igrave", "\236";
+ "iacute", "\237";
+ "icirc", "\238";
+ "iuml", "\239";
+ "eth", "\240";
+ "ntilde", "\241";
+ "ograve", "\242";
+ "oacute", "\243";
+ "ocirc", "\244";
+ "otilde", "\245";
+ "ouml", "\246";
+ "divide", "\247";
+ "oslash", "\248";
+ "ugrave", "\249";
+ "uacute", "\250";
+ "ucirc", "\251";
+ "uuml", "\252";
+ "yacute", "\253";
+ "thorn", "\254";
+ "yuml", "\255";
+ ] ;;
+
+ let quick_etable =
+ let ht = Hashtbl.create 50 in
+ List.iter (fun (name,value) -> Hashtbl.add ht name value) etable;
+ (* Entities to be decoded, but that must not be encoded: *)
+ Hashtbl.add ht "apos" "'"; (* used in XML documents *)
+ ht ;;
+
+ let rev_etable =
+ let a = Array.create 256 "" in
+ List.iter (fun (name,value) ->
+ a.(Char.code(value.[0])) <- "&" ^ name ^ ";") etable;
+ for i = 0 to 8 do
+ a.(i) <- "&#" ^ string_of_int i ^ ";"
+ done;
+ for i = 11 to 12 do
+ a.(i) <- "&#" ^ string_of_int i ^ ";"
+ done;
+ for i = 14 to 31 do
+ a.(i) <- "&#" ^ string_of_int i ^ ";"
+ done;
+ for i = 127 to 159 do
+ a.(i) <- "&#" ^ string_of_int i ^ ";"
+ done;
+ a ;;
+
+ let decode_to_latin1 s =
+ Str.global_substitute
+ eref_re
+ (fun r _ ->
+ let t = Str.matched_string r s in
+ try
+ let n = int_of_string(Str.matched_group r 2 s) in
+ if n < 256 then
+ String.make 1 (Char.chr n)
+ else
+ t
+ with
+ Not_found ->
+ try
+ let name = Str.matched_group r 3 s in
+ try
+ Hashtbl.find quick_etable name
+ with
+ Not_found ->
+ t
+ with
+ Not_found -> assert false
+ )
+ s ;;
+
+ let encode_from_latin1 s =
+ Str.global_substitute
+ unsafe_re
+ (fun r _ ->
+ let t = Str.matched_string r s in
+ let i = Char.code (t.[0]) in
+ rev_etable.(i)
+ )
+ s ;;
+end
+
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.5 2000/06/25 22:34:43 gerd
+ * Added labels to arguments.
+ *
+ * Revision 1.4 2000/06/25 21:15:48 gerd
+ * Checked thread-safety.
+ *
+ * Revision 1.3 2000/03/03 17:03:16 gerd
+ * Q encoding: CR and LF are quoted.
+ *
+ * Revision 1.2 2000/03/03 01:08:29 gerd
+ * Added Netencoding.Html functions.
+ *
+ * Revision 1.1 2000/03/02 01:14:48 gerd
+ * Initial revision.
+ *
+ *
+ *)