+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-
-module Str = Netstring_str;;
-
-module Base64 = struct
- let b64_pattern plus slash =
- [| 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M';
- 'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z';
- 'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm';
- 'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z';
- '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9'; plus; slash |];;
-
-
- let rfc_pattern = b64_pattern '+' '/';;
- let url_pattern = b64_pattern '-' '/';;
-
- let encode_with_options b64 equal s pos len linelen crlf =
- (* encode using "base64".
- * 'b64': The encoding table, created by b64_pattern.
- * 'equal': The character that should be used instead of '=' in the original
- * encoding scheme. Pass '=' to get the original encoding scheme.
- * s, pos, len, linelen: See the interface description of encode_substring.
- *)
- assert (Array.length b64 = 64);
- if len < 0 or pos < 0 or pos > String.length s or linelen < 0 then
- invalid_arg "Netencoding.Base64.encode_with_options";
- if pos + len > String.length s then
- invalid_arg "Netencoding.Base64.encode_with_options";
-
- let linelen =
- (linelen/4) * 4 in
-
- let l_t = if len = 0 then 0 else ((len - 1) / 3 + 1) * 4 in
- (* l_t: length of the result without additional line endings *)
-
- let l_t' =
- if linelen < 4 then
- l_t
- else
- if l_t = 0 then 0 else
- let n_lines = ((l_t - 1) / linelen) + 1 in
- l_t + n_lines * (if crlf then 2 else 1)
- in
- (* l_t': length of the result with CRLF or LF characters *)
-
- let t = String.make l_t' equal in
- let j = ref 0 in
- let q = ref 0 in
- for k = 0 to len / 3 - 1 do
- let p = pos + 3*k in
- (* p >= pos >= 0: this is evident
- * p+2 < pos+len <= String.length s:
- * Because k <= len/3-1
- * 3*k <= 3*(len/3-1) = len - 3
- * pos+3*k+2 <= pos + len - 3 + 2 = pos + len - 1 < pos + len
- * So it is proved that the following unsafe string accesses always
- * work.
- *)
- let bits = (Char.code (String.unsafe_get s (p)) lsl 16) lor
- (Char.code (String.unsafe_get s (p+1)) lsl 8) lor
- (Char.code (String.unsafe_get s (p+2))) in
- (* Obviously, 'bits' is a 24 bit entity (i.e. bits < 2**24) *)
- assert(!j + 3 < l_t');
- String.unsafe_set t !j (Array.unsafe_get b64 ( bits lsr 18));
- String.unsafe_set t (!j+1) (Array.unsafe_get b64 ((bits lsr 12) land 63));
- String.unsafe_set t (!j+2) (Array.unsafe_get b64 ((bits lsr 6) land 63));
- String.unsafe_set t (!j+3) (Array.unsafe_get b64 ( bits land 63));
- j := !j + 4;
- if linelen > 3 then begin
- q := !q + 4;
- if !q + 4 > linelen then begin
- (* The next 4 characters won't fit on the current line. So insert
- * a line ending.
- *)
- if crlf then begin
- t.[ !j ] <- '\013';
- t.[ !j+1 ] <- '\010';
- j := !j + 2;
- end
- else begin
- t.[ !j ] <- '\010';
- incr j
- end;
- q := 0;
- end;
- end;
- done;
- (* padding if needed: *)
- let m = len mod 3 in
- begin
- match m with
- 0 -> ()
- | 1 ->
- let bits = Char.code (s.[pos + len - 1]) in
- t.[ !j ] <- b64.( bits lsr 2);
- t.[ !j + 1 ] <- b64.( (bits land 0x03) lsl 4);
- j := !j + 4;
- q := !q + 4;
- | 2 ->
- let bits = (Char.code (s.[pos + len - 2]) lsl 8) lor
- (Char.code (s.[pos + len - 1])) in
- t.[ !j ] <- b64.( bits lsr 10);
- t.[ !j + 1 ] <- b64.((bits lsr 4) land 0x3f);
- t.[ !j + 2 ] <- b64.((bits lsl 2) land 0x3f);
- j := !j + 4;
- q := !q + 4;
- | _ -> assert false
- end;
-
- (* If required, add another line end: *)
-
- if linelen > 3 & !q > 0 then begin
- if crlf then begin
- t.[ !j ] <- '\013';
- t.[ !j+1 ] <- '\010';
- j := !j + 2;
- end
- else begin
- t.[ !j ] <- '\010';
- incr j
- end;
- end;
-
- t ;;
-
-
-
- let encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s =
- let l = match len with None -> String.length s - pos | Some x -> x in
- encode_with_options rfc_pattern '=' s pos l linelength crlf;;
-
-
- let encode_substring s ~pos ~len ~linelength ~crlf =
- encode_with_options rfc_pattern '=' s pos len linelength crlf;;
-
-
- let url_encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s =
- let l = match len with None -> String.length s - pos | Some x -> x in
- encode_with_options url_pattern '.' s pos l linelength crlf;;
-
-
- let decode_substring t ~pos ~len ~url_variant:p_url ~accept_spaces:p_spaces =
- if len < 0 or pos < 0 or pos > String.length t then
- invalid_arg "Netencoding.Base64.decode_substring";
- if pos + len > String.length t then
- invalid_arg "Netencoding.Base64.decode_substring";
-
- (* Compute the number of effective characters l_t in 't';
- * pad_chars: number of '=' characters at the end of the string.
- *)
- let l_t, pad_chars =
- if p_spaces then begin
- (* Count all non-whitespace characters: *)
- let c = ref 0 in
- let p = ref 0 in
- for i = pos to pos + len - 1 do
- match String.unsafe_get t i with
- (' '|'\t'|'\r'|'\n') -> ()
- | ('='|'.') as ch ->
- if ch = '.' & not p_url then
- invalid_arg "Netencoding.Base64.decode_substring";
- incr c;
- incr p;
- if !p > 2 then
- invalid_arg "Netencoding.Base64.decode_substring";
- for j = i+1 to pos + len - 1 do
- match String.unsafe_get t j with
- (' '|'\t'|'\r'|'\n'|'.'|'=') -> ()
- | _ ->
- (* Only another '=' or spaces allowed *)
- invalid_arg "Netencoding.Base64.decode_substring";
- done
- | _ -> incr c
- done;
- if !c mod 4 <> 0 then
- invalid_arg "Netencoding.Base64.decode_substring";
- !c, !p
- end
- else
- len,
- ( if len mod 4 <> 0 then
- invalid_arg "Netencoding.Base64.decode_substring";
- if len > 0 then (
- if String.sub t (len - 2) 2 = "==" or
- (p_url & String.sub t (len - 2) 2 = "..") then 2
- else
- if String.sub t (len - 1) 1 = "=" or
- (p_url & String.sub t (len - 1) 1 = ".") then 1
- else
- 0
- )
- else 0
- )
- in
-
- let l_s = (l_t / 4) * 3 - pad_chars in (* sic! *)
- let s = String.create l_s in
-
- let decode_char c =
- match c with
- 'A' .. 'Z' -> Char.code(c) - 65 (* 65 = Char.code 'A' *)
- | 'a' .. 'z' -> Char.code(c) - 71 (* 71 = Char.code 'a' - 26 *)
- | '0' .. '9' -> Char.code(c) + 4 (* -4 = Char.code '0' - 52 *)
- | '+' -> 62
- | '-' -> if not p_url then
- invalid_arg "Netencoding.Base64.decode_substring";
- 62
- | '/' -> 63
- | _ -> invalid_arg "Netencoding.Base64.decode_substring";
- in
-
- (* Decode all but the last quartet: *)
-
- let cursor = ref pos in
- let rec next_char() =
- match t.[ !cursor ] with
- (' '|'\t'|'\r'|'\n') ->
- if p_spaces then (incr cursor; next_char())
- else invalid_arg "Netencoding.Base64.decode_substring"
- | c ->
- incr cursor; c
- in
-
- if p_spaces then begin
- for k = 0 to l_t / 4 - 2 do
- let q = 3*k in
- let c0 = next_char() in
- let c1 = next_char() in
- let c2 = next_char() in
- let c3 = next_char() in
- let n0 = decode_char c0 in
- let n1 = decode_char c1 in
- let n2 = decode_char c2 in
- let n3 = decode_char c3 in
- let x0 = (n0 lsl 2) lor (n1 lsr 4) in
- let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
- let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
- String.unsafe_set s q (Char.chr x0);
- String.unsafe_set s (q+1) (Char.chr x1);
- String.unsafe_set s (q+2) (Char.chr x2);
- done;
- end
- else begin
- (* Much faster: *)
- for k = 0 to l_t / 4 - 2 do
- let p = pos + 4*k in
- let q = 3*k in
- let c0 = String.unsafe_get t p in
- let c1 = String.unsafe_get t (p + 1) in
- let c2 = String.unsafe_get t (p + 2) in
- let c3 = String.unsafe_get t (p + 3) in
- let n0 = decode_char c0 in
- let n1 = decode_char c1 in
- let n2 = decode_char c2 in
- let n3 = decode_char c3 in
- let x0 = (n0 lsl 2) lor (n1 lsr 4) in
- let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
- let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
- String.unsafe_set s q (Char.chr x0);
- String.unsafe_set s (q+1) (Char.chr x1);
- String.unsafe_set s (q+2) (Char.chr x2);
- done;
- cursor := pos + l_t - 4;
- end;
-
- (* Decode the last quartet: *)
-
- if l_t > 0 then begin
- let q = 3*(l_t / 4 - 1) in
- let c0 = next_char() in
- let c1 = next_char() in
- let c2 = next_char() in
- let c3 = next_char() in
-
- if (c2 = '=' & c3 = '=') or (p_url & c2 = '.' & c3 = '.') then begin
- let n0 = decode_char c0 in
- let n1 = decode_char c1 in
- let x0 = (n0 lsl 2) lor (n1 lsr 4) in
- s.[ q ] <- Char.chr x0;
- end
- else
- if (c3 = '=') or (p_url & c3 = '.') then begin
- let n0 = decode_char c0 in
- let n1 = decode_char c1 in
- let n2 = decode_char c2 in
- let x0 = (n0 lsl 2) lor (n1 lsr 4) in
- let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
- s.[ q ] <- Char.chr x0;
- s.[ q+1 ] <- Char.chr x1;
- end
- else begin
- let n0 = decode_char c0 in
- let n1 = decode_char c1 in
- let n2 = decode_char c2 in
- let n3 = decode_char c3 in
- let x0 = (n0 lsl 2) lor (n1 lsr 4) in
- let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
- let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
- s.[ q ] <- Char.chr x0;
- s.[ q+1 ] <- Char.chr x1;
- s.[ q+2 ] <- Char.chr x2;
- end
-
- end;
-
- s ;;
-
-
-
- let decode ?(pos=0) ?len ?(url_variant=true) ?(accept_spaces=false) s =
- let l = match len with None -> String.length s - pos | Some x -> x in
- decode_substring s pos l url_variant accept_spaces;;
-
- let decode_ignore_spaces s =
- decode_substring s 0 (String.length s) true true;;
-
-
-end
-
-
-
-module QuotedPrintable = struct
-
- let encode_substring s ~pos ~len =
-
- if len < 0 or pos < 0 or pos > String.length s then
- invalid_arg "Netencoding.QuotedPrintable.encode_substring";
- if pos + len > String.length s then
- invalid_arg "Netencoding.QuotedPrintable.encode_substring";
-
- let rec count n i =
- if i < len then
- match String.unsafe_get s (pos+i) with
- ('\r'|'\n') ->
- count (n+1) (i+1)
- | ('\000'..'\031'|'\127'..'\255'|
- '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') ->
- count (n+3) (i+1)
- | ' ' ->
- (* Protect spaces only if they occur at the end of a line *)
- if i+1 < len then
- match s.[pos+i+1] with
- ('\r'|'\n') ->
- count (n+3) (i+1)
- | _ ->
- count (n+1) (i+1)
- else
- count (n+3) (i+1)
- | _ ->
- count (n+1) (i+1)
- else
- n
- in
-
- let l = count 0 0 in
- let t = String.create l in
-
- let hexdigit =
- [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
- '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in
-
- let k = ref 0 in
-
- let add_quoted c =
- t.[ !k ] <- '=';
- t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 );
- t.[ !k+2 ] <- hexdigit.( Char.code c land 15 )
- in
-
- for i = 0 to len - 1 do
- match String.unsafe_get s i with
- ('\r'|'\n') as c ->
- String.unsafe_set t !k c;
- incr k
- | ('\000'..'\031'|'\127'..'\255'|
- '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') as c ->
- add_quoted c;
- k := !k + 3
- | ' ' ->
- (* Protect spaces only if they occur at the end of a line *)
- if i+1 < len then
- match s.[pos+i+1] with
- ('\r'|'\n') ->
- add_quoted ' ';
- k := !k + 3;
- | _ ->
- String.unsafe_set t !k ' ';
- incr k
- else begin
- add_quoted ' ';
- k := !k + 3;
- end
- | c ->
- String.unsafe_set t !k c;
- incr k
- done;
-
- t ;;
-
-
- let encode ?(pos=0) ?len s =
- let l = match len with None -> String.length s - pos | Some x -> x in
- encode_substring s pos l;;
-
-
-
- let decode_substring s ~pos ~len =
-
- if len < 0 or pos < 0 or pos > String.length s then
- invalid_arg "Netencoding.QuotedPrintable.decode_substring";
- if pos + len > String.length s then
- invalid_arg "Netencoding.QuotedPrintable.decode_substring";
-
- let decode_hex c =
- match c with
- '0'..'9' -> Char.code c - 48
- | 'A'..'F' -> Char.code c - 55
- | 'a'..'f' -> Char.code c - 87
- | _ ->
- invalid_arg "Netencoding.QuotedPrintable.decode_substring";
- in
-
- let rec count n i =
- if i < len then
- match String.unsafe_get s (pos+i) with
- '=' ->
- if i+1 = len then
- (* A '=' at EOF is ignored *)
- count n (i+1)
- else
- if i+1 < len then
- match s.[pos+i+1] with
- '\r' ->
- (* Official soft break *)
- if i+2 < len & s.[pos+i+2] = '\n' then
- count n (i+3)
- else
- count n (i+2)
- | '\n' ->
- (* Inofficial soft break *)
- count n (i+2)
- | _ ->
- if i+2 >= len then
- invalid_arg
- "Netencoding.QuotedPrintable.decode_substring";
- let _ = decode_hex s.[pos+i+1] in
- let _ = decode_hex s.[pos+i+2] in
- count (n+1) (i+3)
- else
- invalid_arg "Netencoding.QuotedPrintable.decode_substring"
- | _ ->
- count (n+1) (i+1)
- else
- n
- in
-
- let l = count 0 0 in
- let t = String.create l in
- let k = ref pos in
- let e = pos + len in
- let i = ref 0 in
-
- while !i < l do
- match String.unsafe_get s !k with
- '=' ->
- if !k+1 = e then
- (* A '=' at EOF is ignored *)
- ()
- else
- if !k+1 < e then
- match s.[!k+1] with
- '\r' ->
- (* Official soft break *)
- if !k+2 < e & s.[!k+2] = '\n' then
- k := !k + 3
- else
- k := !k + 2
- | '\n' ->
- (* Inofficial soft break *)
- k := !k + 2
- | _ ->
- if !k+2 >= e then
- invalid_arg
- "Netencoding.QuotedPrintable.decode_substring";
- let x1 = decode_hex s.[!k+1] in
- let x2 = decode_hex s.[!k+2] in
- t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2);
- k := !k + 3;
- incr i
- else
- invalid_arg "Netencoding.QuotedPrintable.decode_substring"
- | c ->
- String.unsafe_set t !i c;
- incr k;
- incr i
- done;
-
- t ;;
-
-
- let decode ?(pos=0) ?len s =
- let l = match len with None -> String.length s - pos | Some x -> x in
- decode_substring s pos l;;
-
-end
-
-
-module Q = struct
-
- let encode_substring s ~pos ~len =
-
- if len < 0 or pos < 0 or pos > String.length s then
- invalid_arg "Netencoding.Q.encode_substring";
- if pos + len > String.length s then
- invalid_arg "Netencoding.Q.encode_substring";
-
- let rec count n i =
- if i < len then
- match String.unsafe_get s (pos+i) with
- | ('A'..'Z'|'a'..'z'|'0'..'9') ->
- count (n+1) (i+1)
- | _ ->
- count (n+3) (i+1)
- else
- n
- in
-
- let l = count 0 0 in
- let t = String.create l in
-
- let hexdigit =
- [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
- '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in
-
- let k = ref 0 in
-
- let add_quoted c =
- t.[ !k ] <- '=';
- t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 );
- t.[ !k+2 ] <- hexdigit.( Char.code c land 15 )
- in
-
- for i = 0 to len - 1 do
- match String.unsafe_get s i with
- | ('A'..'Z'|'a'..'z'|'0'..'9') as c ->
- String.unsafe_set t !k c;
- incr k
- | c ->
- add_quoted c;
- k := !k + 3
- done;
-
- t ;;
-
-
- let encode ?(pos=0) ?len s =
- let l = match len with None -> String.length s - pos | Some x -> x in
- encode_substring s pos l;;
-
-
-
- let decode_substring s ~pos ~len =
-
- if len < 0 or pos < 0 or pos > String.length s then
- invalid_arg "Netencoding.Q.decode_substring";
- if pos + len > String.length s then
- invalid_arg "Netencoding.Q.decode_substring";
-
- let decode_hex c =
- match c with
- '0'..'9' -> Char.code c - 48
- | 'A'..'F' -> Char.code c - 55
- | 'a'..'f' -> Char.code c - 87
- | _ ->
- invalid_arg "Netencoding.Q.decode_substring";
- in
-
- let rec count n i =
- if i < len then
- match String.unsafe_get s (pos+i) with
- '=' ->
- if i+2 >= len then
- invalid_arg "Netencoding.Q.decode_substring";
- let _ = decode_hex s.[pos+i+1] in
- let _ = decode_hex s.[pos+i+2] in
- count (n+1) (i+3)
- | _ -> (* including '_' *)
- count (n+1) (i+1)
- else
- n
- in
-
- let l = count 0 0 in
- let t = String.create l in
- let k = ref pos in
- let e = pos + len in
- let i = ref 0 in
-
- while !i < l do
- match String.unsafe_get s !k with
- '=' ->
- if !k+2 >= e then
- invalid_arg "Netencoding.Q.decode_substring";
- let x1 = decode_hex s.[!k+1] in
- let x2 = decode_hex s.[!k+2] in
- t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2);
- k := !k + 3;
- incr i
- | '_' ->
- String.unsafe_set t !i ' ';
- incr k;
- incr i
- | c ->
- String.unsafe_set t !i c;
- incr k;
- incr i
- done;
-
- t ;;
-
-
- let decode ?(pos=0) ?len s =
- let l = match len with None -> String.length s - pos | Some x -> x in
- decode_substring s pos l ;;
-
-end
-
-
-module Url = struct
- let hex_digits =
- [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
- '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F' |];;
-
- let to_hex2 k =
- (* Converts k to a 2-digit hex string *)
- let s = String.create 2 in
- s.[0] <- hex_digits.( (k lsr 4) land 15 );
- s.[1] <- hex_digits.( k land 15 );
- s ;;
-
-
- let of_hex1 c =
- match c with
- ('0'..'9') -> Char.code c - Char.code '0'
- | ('A'..'F') -> Char.code c - Char.code 'A' + 10
- | ('a'..'f') -> Char.code c - Char.code 'a' + 10
- | _ ->
- raise Not_found ;;
-
-
-
- let url_encoding_re =
- Str.regexp "[^A-Za-z0-9$_.!*'(),-]";;
-
- let url_decoding_re =
- Str.regexp "\\+\\|%..\\|%.\\|%";;
-
-
- let encode s =
- Str.global_substitute
- url_encoding_re
- (fun r _ ->
- match Str.matched_string r s with
- " " -> "+"
- | x ->
- let k = Char.code(x.[0]) in
- "%" ^ to_hex2 k
- )
- s ;;
-
-
- let decode s =
- let l = String.length s in
- Str.global_substitute
- url_decoding_re
- (fun r _ ->
- match Str.matched_string r s with
- | "+" -> " "
- | _ ->
- let i = Str.match_beginning r in
- (* Assertion: s.[i] = '%' *)
- if i+2 >= l then failwith "Cgi.decode";
- let c1 = s.[i+1] in
- let c2 = s.[i+2] in
- begin
- try
- let k1 = of_hex1 c1 in
- let k2 = of_hex1 c2 in
- String.make 1 (Char.chr((k1 lsl 4) lor k2))
- with
- Not_found ->
- failwith "Cgi.decode"
- end
- )
- s ;;
-
-end
-
-
-module Html = struct
-
- let eref_re =
- Str.regexp
- "&\\(#\\([0-9]+\\);\\|\\([a-zA-Z]+\\);\\)" ;;
- let unsafe_re = Str.regexp "[<>&\"\000-\008\011-\012\014-\031\127-\255]" ;;
-
- let etable =
- [ "lt", "<";
- "gt", ">";
- "amp", "&";
- "quot", "\"";
- (* Note: " is new in HTML-4.0, but it has been widely used
- * much earlier.
- *)
- "nbsp", "\160";
- "iexcl", "\161";
- "cent", "\162";
- "pound", "\163";
- "curren", "\164";
- "yen", "\165";
- "brvbar", "\166";
- "sect", "\167";
- "uml", "\168";
- "copy", "\169";
- "ordf", "\170";
- "laquo", "\171";
- "not", "\172";
- "shy", "\173";
- "reg", "\174";
- "macr", "\175";
- "deg", "\176";
- "plusmn", "\177";
- "sup2", "\178";
- "sup3", "\179";
- "acute", "\180";
- "micro", "\181";
- "para", "\182";
- "middot", "\183";
- "cedil", "\184";
- "sup1", "\185";
- "ordm", "\186";
- "raquo", "\187";
- "frac14", "\188";
- "frac12", "\189";
- "frac34", "\190";
- "iquest", "\191";
- "Agrave", "\192";
- "Aacute", "\193";
- "Acirc", "\194";
- "Atilde", "\195";
- "Auml", "\196";
- "Aring", "\197";
- "AElig", "\198";
- "Ccedil", "\199";
- "Egrave", "\200";
- "Eacute", "\201";
- "Ecirc", "\202";
- "Euml", "\203";
- "Igrave", "\204";
- "Iacute", "\205";
- "Icirc", "\206";
- "Iuml", "\207";
- "ETH", "\208";
- "Ntilde", "\209";
- "Ograve", "\210";
- "Oacute", "\211";
- "Ocirc", "\212";
- "Otilde", "\213";
- "Ouml", "\214";
- "times", "\215";
- "Oslash", "\216";
- "Ugrave", "\217";
- "Uacute", "\218";
- "Ucirc", "\219";
- "Uuml", "\220";
- "Yacute", "\221";
- "THORN", "\222";
- "szlig", "\223";
- "agrave", "\224";
- "aacute", "\225";
- "acirc", "\226";
- "atilde", "\227";
- "auml", "\228";
- "aring", "\229";
- "aelig", "\230";
- "ccedil", "\231";
- "egrave", "\232";
- "eacute", "\233";
- "ecirc", "\234";
- "euml", "\235";
- "igrave", "\236";
- "iacute", "\237";
- "icirc", "\238";
- "iuml", "\239";
- "eth", "\240";
- "ntilde", "\241";
- "ograve", "\242";
- "oacute", "\243";
- "ocirc", "\244";
- "otilde", "\245";
- "ouml", "\246";
- "divide", "\247";
- "oslash", "\248";
- "ugrave", "\249";
- "uacute", "\250";
- "ucirc", "\251";
- "uuml", "\252";
- "yacute", "\253";
- "thorn", "\254";
- "yuml", "\255";
- ] ;;
-
- let quick_etable =
- let ht = Hashtbl.create 50 in
- List.iter (fun (name,value) -> Hashtbl.add ht name value) etable;
- (* Entities to be decoded, but that must not be encoded: *)
- Hashtbl.add ht "apos" "'"; (* used in XML documents *)
- ht ;;
-
- let rev_etable =
- let a = Array.create 256 "" in
- List.iter (fun (name,value) ->
- a.(Char.code(value.[0])) <- "&" ^ name ^ ";") etable;
- for i = 0 to 8 do
- a.(i) <- "&#" ^ string_of_int i ^ ";"
- done;
- for i = 11 to 12 do
- a.(i) <- "&#" ^ string_of_int i ^ ";"
- done;
- for i = 14 to 31 do
- a.(i) <- "&#" ^ string_of_int i ^ ";"
- done;
- for i = 127 to 159 do
- a.(i) <- "&#" ^ string_of_int i ^ ";"
- done;
- a ;;
-
- let decode_to_latin1 s =
- Str.global_substitute
- eref_re
- (fun r _ ->
- let t = Str.matched_string r s in
- try
- let n = int_of_string(Str.matched_group r 2 s) in
- if n < 256 then
- String.make 1 (Char.chr n)
- else
- t
- with
- Not_found ->
- try
- let name = Str.matched_group r 3 s in
- try
- Hashtbl.find quick_etable name
- with
- Not_found ->
- t
- with
- Not_found -> assert false
- )
- s ;;
-
- let encode_from_latin1 s =
- Str.global_substitute
- unsafe_re
- (fun r _ ->
- let t = Str.matched_string r s in
- let i = Char.code (t.[0]) in
- rev_etable.(i)
- )
- s ;;
-end
-
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:27 lpadovan
- * Initial revision
- *
- * Revision 1.5 2000/06/25 22:34:43 gerd
- * Added labels to arguments.
- *
- * Revision 1.4 2000/06/25 21:15:48 gerd
- * Checked thread-safety.
- *
- * Revision 1.3 2000/03/03 17:03:16 gerd
- * Q encoding: CR and LF are quoted.
- *
- * Revision 1.2 2000/03/03 01:08:29 gerd
- * Added Netencoding.Html functions.
- *
- * Revision 1.1 2000/03/02 01:14:48 gerd
- * Initial revision.
- *
- *
- *)