+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-module Str = Netstring_str;;
-
-let cr_or_lf_re = Str.regexp "[\013\n]";;
-
-let trim_right_spaces_re =
- Str.regexp "[ \t]+$";;
-
-let trim_left_spaces_re =
- Str.regexp "^[ \t]+";;
-
-let header_re =
- Str.regexp "\\([^ \t\r\n:]+\\):\\([ \t]*.*\n\\([ \t].*\n\\)*\\)";;
-
-let empty_line_re =
- Str.regexp "\013?\n";;
-
-let end_of_header_re =
- Str.regexp "\n\013?\n";;
-
-
-let scan_header ?(unfold=true) parstr ~start_pos:i0 ~end_pos:i1 =
- let rec parse_header i l =
- match Str.string_partial_match header_re parstr i with
- Some r ->
- let i' = Str.match_end r in
- if i' > i1 then
- failwith "Mimestring.scan_header";
- let name = String.lowercase(Str.matched_group r 1 parstr) in
- let value_with_crlf =
- Str.matched_group r 2 parstr in
- let value =
- if unfold then begin
- let value_with_rspaces =
- Str.global_replace cr_or_lf_re "" value_with_crlf in
- let value_with_lspaces =
- Str.global_replace trim_right_spaces_re "" value_with_rspaces in
- Str.global_replace trim_left_spaces_re "" value_with_lspaces
- end
- else value_with_crlf
- in
- parse_header i' ( (name,value) :: l)
- | None ->
- (* The header must end with an empty line *)
- begin match Str.string_partial_match empty_line_re parstr i with
- Some r' ->
- List.rev l, Str.match_end r'
- | None ->
- failwith "Mimestring.scan_header"
- end
- in
- parse_header i0 []
-;;
-
-type s_token =
- Atom of string
- | EncodedWord of (string * string * string)
- | QString of string
- | Control of char
- | Special of char
- | DomainLiteral of string
- | Comment
- | End
-;;
-
-type s_option =
- No_backslash_escaping
- | Return_comments
- | Recognize_encoded_words
-;;
-
-type s_extended_token =
- { token : s_token;
- token_pos : int;
- token_line : int;
- token_linepos : int; (* Position of the beginning of the line *)
- token_len : int;
- mutable token_sep : bool; (* separates adjacent encoded words *)
- }
-;;
-
-let get_token et = et.token;;
-let get_pos et = et.token_pos;;
-let get_line et = et.token_line;;
-let get_column et = et.token_pos - et.token_linepos;;
-let get_length et = et.token_len;;
-let separates_adjacent_encoded_words et = et.token_sep;;
-
-let get_decoded_word et =
- match et.token with
- Atom s -> s
- | QString s -> s
- | Control c -> String.make 1 c
- | Special c -> String.make 1 c
- | DomainLiteral s -> s
- | Comment -> ""
- | EncodedWord (_, encoding, content) ->
- ( match encoding with
- ("Q"|"q") ->
- Netencoding.Q.decode content
- | ("B"|"b") ->
- Netencoding.Base64.decode
- ~url_variant:false
- ~accept_spaces:false
- content
- | _ -> failwith "get_decoded_word"
- )
- | End ->
- failwith "get_decoded_word"
-;;
-
-let get_charset et =
- match et.token with
- EncodedWord (charset, _, _) -> charset
- | End -> failwith "get_charset"
- | _ -> "US-ASCII"
-;;
-
-type scanner_spec =
- { (* What the user specifies: *)
- scanner_specials : char list;
- scanner_options : s_option list;
- (* Derived from that: *)
- mutable opt_no_backslash_escaping : bool;
- mutable opt_return_comments : bool;
- mutable opt_recognize_encoded_words : bool;
-
- mutable is_special : bool array;
- mutable space_is_special : bool;
- }
-;;
-
-type scanner_target =
- { scanned_string : string;
- mutable scanner_pos : int;
- mutable scanner_line : int;
- mutable scanner_linepos : int;
- (* Position of the beginning of the line *)
- mutable scanned_tokens : s_extended_token Queue.t;
- (* A queue of already scanned tokens in order to look ahead *)
- mutable last_token : s_token;
- (* The last returned token. It is only important whether it is
- * EncodedWord or not.
- *)
- }
-;;
-
-type mime_scanner = scanner_spec * scanner_target
-;;
-
-let get_pos_of_scanner (spec, target) =
- if spec.opt_recognize_encoded_words then
- failwith "get_pos_of_scanner"
- else
- target.scanner_pos
-;;
-
-let get_line_of_scanner (spec, target) =
- if spec.opt_recognize_encoded_words then
- failwith "get_line_of_scanner"
- else
- target.scanner_line
-;;
-
-let get_column_of_scanner (spec, target) =
- if spec.opt_recognize_encoded_words then
- failwith "get_column_of_scanner"
- else
- target.scanner_pos - target.scanner_linepos
-;;
-
-let create_mime_scanner ~specials ~scan_options =
- let is_spcl = Array.create 256 false in
- List.iter
- (fun c -> is_spcl.( Char.code c ) <- true)
- specials;
- let spec =
- { scanner_specials = specials;
- scanner_options = scan_options;
- opt_no_backslash_escaping =
- List.mem No_backslash_escaping scan_options;
- opt_return_comments =
- List.mem Return_comments scan_options;
- opt_recognize_encoded_words =
- List.mem Recognize_encoded_words scan_options;
- is_special = is_spcl;
- space_is_special = is_spcl.(32);
- }
- in
- (* Grab the remaining arguments: *)
- fun ?(pos=0) ?(line=1) ?(column=0) s ->
- let target =
- { scanned_string = s;
- scanner_pos = pos;
- scanner_line = line;
- scanner_linepos = pos - column;
- scanned_tokens = Queue.create();
- last_token = Comment; (* Must not be initialized with EncodedWord *)
- }
- in
- spec, target
-;;
-
-
-let encoded_word_re =
- Str.regexp "=\\?\\([^?]+\\)\\?\\([^?]+\\)\\?\\([^?]+\\)\\?=";;
-
-let scan_next_token ((spec,target) as scn) =
- let mk_pair t len =
- { token = t;
- token_pos = target.scanner_pos;
- token_line = target.scanner_line;
- token_linepos = target.scanner_linepos;
- token_len = len;
- token_sep = false;
- },
- t
- in
-
- (* Note: mk_pair creates a new token pair, and it assumes that
- * target.scanner_pos (and also scanner_line and scanner_linepos)
- * still contain the position of the beginning of the token.
- *)
-
- let s = target.scanned_string in
- let l = String.length s in
- let rec scan i =
- if i < l then begin
- let c = s.[i] in
- if spec.is_special.( Char.code c ) then begin
- let pair = mk_pair (Special c) 1 in
- target.scanner_pos <- target.scanner_pos + 1;
- (match c with
- '\n' ->
- target.scanner_line <- target.scanner_line + 1;
- target.scanner_linepos <- target.scanner_pos;
- | _ -> ()
- );
- pair
- end
- else
- match c with
- '"' ->
- (* Quoted string: *)
- scan_qstring (i+1) (i+1) 0
- | '(' ->
- (* Comment: *)
- let i', line, linepos =
- scan_comment (i+1) 0 target.scanner_line target.scanner_linepos
- in
- let advance() =
- target.scanner_pos <- i';
- target.scanner_line <- line;
- target.scanner_linepos <- linepos
- in
- if spec.opt_return_comments then begin
- let pair = mk_pair Comment (i' - i) in
- advance();
- pair
- end
- else
- if spec.space_is_special then begin
- let pair = mk_pair (Special ' ') (i' - i) in
- advance();
- pair
- end
- else begin
- advance();
- scan i'
- end
- | (' '|'\t'|'\r') ->
- (* Ignore whitespace by default: *)
- target.scanner_pos <- target.scanner_pos + 1;
- scan (i+1)
- | '\n' ->
- (* Ignore whitespace by default: *)
- target.scanner_pos <- target.scanner_pos + 1;
- target.scanner_line <- target.scanner_line + 1;
- target.scanner_linepos <- target.scanner_pos;
- scan (i+1)
- | ('\000'..'\031'|'\127'..'\255') ->
- let pair = mk_pair (Control c) 1 in
- target.scanner_pos <- target.scanner_pos + 1;
- pair
- | '[' ->
- (* Domain literal: *)
- scan_dliteral (i+1) (i+1) 0
- | _ ->
- scan_atom i i
- end
- else
- mk_pair End 0
-
- and scan_atom i0 i =
- let return_atom() =
- let astring = String.sub s i0 (i-i0) in
- let r =
- if spec.opt_recognize_encoded_words then
- Str.string_match ~groups:4 encoded_word_re astring 0
- else
- None
- in
- match r with
- None ->
- (* An atom contains never a linefeed character, so we can ignore
- * scanner_line here.
- *)
- let pair = mk_pair (Atom astring) (i-i0) in
- target.scanner_pos <- i;
- pair
- | Some mr ->
- (* Found an encoded word. *)
- let charset = Str.matched_group mr 1 astring in
- let encoding = Str.matched_group mr 2 astring in
- let content = Str.matched_group mr 3 astring in
- let t = EncodedWord(String.uppercase charset,
- String.uppercase encoding,
- content) in
- let pair = mk_pair t (i-i0) in
- target.scanner_pos <- i;
- pair
- in
-
- if i < l then
- let c = s.[i] in
- match c with
- ('\000'..'\031'|'\127'..'\255'|'"'|'('|'['|' '|'\t'|'\r'|'\n') ->
- return_atom()
- | _ ->
- if spec.is_special.( Char.code c ) then
- return_atom()
- else
- scan_atom i0 (i+1)
- else
- return_atom()
-
- and scan_qstring i0 i n =
- if i < l then
- let c = s.[i] in
- match c with
- '"' ->
- (* Regular end of the quoted string: *)
- let content, line, linepos = copy_qstring i0 (i-1) n in
- let pair = mk_pair (QString content) (i-i0+2) in
- target.scanner_pos <- i+1;
- target.scanner_line <- line;
- target.scanner_linepos <- linepos;
- pair
- | '\\' when not spec.opt_no_backslash_escaping ->
- scan_qstring i0 (i+2) (n+1)
- | _ ->
- scan_qstring i0 (i+1) (n+1)
- else
- (* Missing right double quote *)
- let content, line, linepos = copy_qstring i0 (l-1) n in
- let pair = mk_pair (QString content) (l-i0+1) in
- target.scanner_pos <- l;
- target.scanner_line <- line;
- target.scanner_linepos <- linepos;
- pair
-
- and copy_qstring i0 i1 n =
- (* Used for quoted strings and for domain literals *)
- let r = String.create n in
- let k = ref 0 in
- let line = ref target.scanner_line in
- let linepos = ref target.scanner_linepos in
- for i = i0 to i1 do
- let c = s.[i] in
- match c with
- '\\' when i < i1 && not spec.opt_no_backslash_escaping -> ()
- | '\n' ->
- line := !line + 1;
- linepos := i+1;
- r.[ !k ] <- c;
- incr k
- | _ ->
- r.[ !k ] <- c;
- incr k
- done;
- assert (!k = n);
- r, !line, !linepos
-
- and scan_dliteral i0 i n =
- if i < l then
- let c = s.[i] in
- match c with
- ']' ->
- (* Regular end of the domain literal: *)
- let content, line, linepos = copy_qstring i0 (i-1) n in
- let pair = mk_pair (DomainLiteral content) (i-i0+2) in
- target.scanner_pos <- i+1;
- target.scanner_line <- line;
- target.scanner_linepos <- linepos;
- pair
- | '\\' when not spec.opt_no_backslash_escaping ->
- scan_dliteral i0 (i+2) (n+1)
- | _ ->
- (* Note: '[' is not allowed by RFC 822; we treat it here as
- * a regular character (questionable)
- *)
- scan_dliteral i0 (i+1) (n+1)
- else
- (* Missing right bracket *)
- let content, line, linepos = copy_qstring i0 (l-1) n in
- let pair = mk_pair (DomainLiteral content) (l-i0+1) in
- target.scanner_pos <- l;
- target.scanner_line <- line;
- target.scanner_linepos <- linepos;
- pair
-
-
- and scan_comment i level line linepos =
- if i < l then
- let c = s.[i] in
- match c with
- ')' ->
- (i+1), line, linepos
- | '(' ->
- (* nested comment *)
- let i', line', linepos' =
- scan_comment (i+1) (level+1) line linepos
- in
- scan_comment i' level line' linepos'
- | '\\' when not spec.opt_no_backslash_escaping ->
- if (i+1) < l && s.[i+1] = '\n' then
- scan_comment (i+2) level (line+1) (i+2)
- else
- scan_comment (i+2) level line linepos
- | '\n' ->
- scan_comment (i+1) level (line+1) (i+1)
- | _ ->
- scan_comment (i+1) level line linepos
- else
- (* Missing closing ')' *)
- i, line, linepos
- in
-
- scan target.scanner_pos
-;;
-
-
-let scan_token ((spec,target) as scn) =
- (* This function handles token queueing in order to recognize white space
- * that separates adjacent encoded words.
- *)
-
- let rec collect_whitespace () =
- (* Scans whitespace tokens and returns them as:
- * (ws_list, other_tok) if there is some other_tok following the
- * list (other_tok = End is possible)
- *)
- let (et, t) as pair = scan_next_token scn in
- ( match t with
- (Special ' '|Special '\t'|Special '\n'|Special '\r') ->
- let ws_list, tok = collect_whitespace() in
- pair :: ws_list, tok
- | _ ->
- [], pair
- )
- in
-
- try
- (* Is there an already scanned token in the queue? *)
- let et = Queue.take target.scanned_tokens in
- let t = et.token in
- target.last_token <- t;
- et, et.token
- with
- Queue.Empty ->
- (* If not: inspect the last token. If that token is an EncodedWord,
- * the next tokens are scanned in advance to determine if there
- * are spaces separating two EncodedWords. These tokens are put
- * into the queue such that it is avoided that they are scanned
- * twice. (The sole purpose of the queue.)
- *)
- match target.last_token with
- EncodedWord(_,_,_) as ew ->
- let ws_list, tok = collect_whitespace() in
- (* If tok is an EncodedWord, too, the tokens in ws_list must
- * be flagged as separating two adjacent encoded words.
- *)
- ( match tok with
- _, EncodedWord(_,_,_) ->
- List.iter
- (fun (et,t) ->
- et.token_sep <- true)
- ws_list
- | _ ->
- ()
- );
- (* Anyway, queue the read tokens but the first up *)
- ( match ws_list with
- [] ->
- (* Nothing to queue *)
- let et, t = tok in
- target.last_token <- t;
- tok
- | (et,t) as pair :: ws_list' ->
- List.iter
- (fun (et',_) ->
- Queue.add et' target.scanned_tokens)
- ws_list';
- ( match tok with
- | _, End ->
- ()
- | (et',_) ->
- Queue.add et' target.scanned_tokens
- );
- (* Return the first scanned token *)
- target.last_token <- t;
- pair
- )
- | _ ->
- (* Regular case: Scan one token; do not queue it up *)
- let (et, t) as pair = scan_next_token scn in
- target.last_token <- t;
- pair
-;;
-
-
-let scan_token_list scn =
- let rec collect() =
- match scan_token scn with
- _, End ->
- []
- | pair ->
- pair :: collect()
- in
- collect()
-;;
-
-
-let scan_structured_value s specials options =
- let rec collect scn =
- match scan_token scn with
- _, End ->
- []
- | _, t ->
- t :: collect scn
- in
- let scn = create_mime_scanner specials options s in
- collect scn
-;;
-
-
-let specials_rfc822 =
- [ '<'; '>'; '@'; ','; ';'; ':'; '\\'; '.' ];;
-
-
-let specials_rfc2045 =
- [ '<'; '>'; '@'; ','; ';'; ':'; '\\'; '/' ];;
-
-
-let scan_encoded_text_value s =
- let specials = [ ' '; '\t'; '\r'; '\n'; '('; '['; '"' ] in
- let options = [ Recognize_encoded_words ] in
- let scn = create_mime_scanner specials options s in
-
- let rec collect () =
- match scan_token scn with
- _, End ->
- []
- | et, _ when separates_adjacent_encoded_words et ->
- collect()
- | et, (Special _|Atom _|EncodedWord(_,_,_)) ->
- et :: collect ()
- | _, _ ->
- assert false
- in
- collect()
-;;
-
-
-let scan_value_with_parameters s options =
- let rec parse_params tl =
- match tl with
- Atom n :: Special '=' :: Atom v :: tl' ->
- (n,v) :: parse_rest tl'
- | Atom n :: Special '=' :: QString v :: tl' ->
- (n,v) :: parse_rest tl'
- | _ ->
- failwith "Mimestring.scan_value_with_parameters"
- and parse_rest tl =
- match tl with
- [] -> []
- | Special ';' :: tl' ->
- parse_params tl'
- | _ ->
- failwith "Mimestring.scan_value_with_parameters"
- in
-
- (* Note: Even if not used here, the comma is a very common separator
- * and should be recognized as being special. You will get a
- * failure if there is a comma in the scanned string.
- *)
- let tl = scan_structured_value s [ ';'; '='; ',' ] options in
- match tl with
- [ Atom n ] -> n, []
- | [ QString n ] -> n, []
- | Atom n :: Special ';' :: tl' ->
- n, parse_params tl'
- | QString n :: Special ';' :: tl' ->
- n, parse_params tl'
- | _ ->
- failwith "Mimestring.scan_value_with_parameters"
-;;
-
-
-let scan_mime_type s options =
- let n, params = scan_value_with_parameters s options in
- (String.lowercase n),
- (List.map (fun (n,v) -> (String.lowercase n, v)) params)
-;;
-
-
-let lf_re = Str.regexp "[\n]";;
-
-let scan_multipart_body s ~start_pos:i0 ~end_pos:i1 ~boundary =
- let l_s = String.length s in
- if i0 < 0 or i1 < 0 or i0 > l_s or i1 >l_s then
- invalid_arg "Mimestring.scan_multipart_body";
-
- (* First compile the regexps scanning for 'boundary': *)
- let boundary1_re =
- Str.regexp ("\n--" ^ Str.quote boundary) in
- let boundary2_re =
- Str.regexp ("--" ^ Str.quote boundary) in
-
- let rec parse i =
- (* i: Beginning of the current part (position directly after the
- * boundary line
- *)
- (* Search for next boundary at position i *)
- let i' =
- try min (fst (Str.search_forward boundary1_re s i) + 1) i1
- with
- Not_found -> i1
- in
- (* i': Either the position of the first '-' of the boundary line,
- * or i1 if no boundary has been found
- *)
- if i' >= i1 then
- [] (* Ignore everything after the last boundary *)
- else
- let i'' =
- try min (fst (Str.search_forward lf_re s i') + 1) i1
- with
- Not_found -> i1
- in
- (* i'': The position after the boundary line *)
-(*
- print_int i; print_newline();
- print_int i'; print_newline();
- print_int i''; print_newline();
- flush stdout;
-*)
- let header, k = scan_header s i i' in
- (* header: the header of the part
- * k: beginning of the body
- *)
-
- let value =
- (* We know that i'-1 is a linefeed character. i'-2 should be a CR
- * character. Both characters are not part of the value.
- *)
- if i' >= 2 then
- match s.[i'-2] with
- '\013' -> String.sub s k (i'-2-k)
- | _ -> String.sub s k (i'-1-k)
- else
- String.sub s k (i'-1-k)
- in
-
- let pair =
- (header, value) in
-
- if i'' >= i1
- then
- [ pair ]
- else
- pair :: parse i''
- in
-
- (* Find the first boundary. This is a special case, because it may be
- * right at the beginning of the string (no preceding CRLF)
- *)
-
- let i_bnd =
- if Str.string_partial_match boundary2_re s i0 <> None then
- i0
- else
- try min (fst (Str.search_forward boundary1_re s i0)) i1
- with
- Not_found -> i1
- in
-
- if i_bnd >= i1 then
- []
- else
- let i_bnd' =
- try min (fst (Str.search_forward lf_re s (i_bnd + 1)) + 1) i1
- with
- Not_found -> i1
- in
- if i_bnd' >= i1 then
- []
- else
- parse i_bnd'
-;;
-
-
-let scan_multipart_body_and_decode s ~start_pos:i0 ~end_pos:i1 ~boundary =
- let parts = scan_multipart_body s i0 i1 boundary in
- List.map
- (fun (params, value) ->
- let encoding =
- try List.assoc "content-transfer-encoding" params
- with Not_found -> "7bit"
- in
-
- (* NOTE: In the case of "base64" and "quoted-printable", the allocation
- * of the string "value" could be avoided.
- *)
-
- let value' =
- match encoding with
- ("7bit"|"8bit"|"binary") -> value
- | "base64" ->
- Netencoding.Base64.decode_substring
- value 0 (String.length value) false true
- | "quoted-printable" ->
- Netencoding.QuotedPrintable.decode_substring
- value 0 (String.length value)
- | _ ->
- failwith "Mimestring.scan_multipart_body_and_decode: Unknown content-transfer-encoding"
- in
- (params, value')
- )
- parts
-;;
-
-
-let scan_multipart_body_from_netstream s ~boundary ~create ~add ~stop =
-
- (* The block size of s must be at least the length of the boundary + 3.
- * Otherwise it is not guaranteed that the boundary is always recognized.
- *)
- if Netstream.block_size s < String.length boundary + 3 then
- invalid_arg "Mimestring.scan_multipart_body_from_netstream";
-
- (* First compile the regexps scanning for 'boundary': *)
- let boundary1_re =
- Str.regexp ("\n--" ^ Str.quote boundary) in
- let boundary2_re =
- Str.regexp ("--" ^ Str.quote boundary) in
-
- (* Subtask 1: Search the end of the MIME header: CR LF CR LF
- * (or LF LF). Enlarge the window until the complete header
- * is covered by the window.
- *)
- let rec search_end_of_header k =
- (* Search the end of the header beginning at position k of the
- * current window.
- * Return the position of the first character of the body.
- *)
- try
- (* Search for LF CR? LF: *)
- let i, r = Str.search_forward
- end_of_header_re
- (Netbuffer.unsafe_buffer (Netstream.window s))
- k
- in
- (* If match_end <= window_length, the search was successful.
- * Otherwise, we searched in the uninitialized region of the
- * buffer.
- *)
- if Str.match_end r <= Netstream.window_length s then
- Str.match_end r
- else
- raise Not_found
- with
- Not_found ->
- (* If the end of the stream is reached, the end of the header
- * is missing: Error.
- * Otherwise, we try to read another block, and continue.
- *)
- if Netstream.at_eos s then
- failwith "Mimestring.scan_multipart_body_from_netstream: Unexpected end of stream";
- let w0 = Netstream.window_length s in
- Netstream.want_another_block s;
- search_end_of_header (max (w0 - 2) 0)
- in
-
- (* Subtask 2: Search the first boundary line. *)
- let rec search_first_boundary() =
- (* Search boundary per regexp; return the position of the character
- * immediately following the boundary (on the same line), or
- * raise Not_found.
- *)
- try
- (* Search boundary per regexp: *)
- let i, r = Str.search_forward
- boundary1_re
- (Netbuffer.unsafe_buffer (Netstream.window s))
- 0
- in
- (* If match_end <= window_length, the search was successful.
- * Otherwise, we searched in the uninitialized region of the
- * buffer.
- *)
- if Str.match_end r <= Netstream.window_length s then begin
- Str.match_end r
- end
- else raise Not_found
- with
- Not_found ->
- if Netstream.at_eos s then raise Not_found;
- (* The regexp did not match: Move the window by one block.
- *)
- let n =
- min
- (Netstream.window_length s)
- (Netstream.block_size s)
- in
- Netstream.move s n;
- search_first_boundary()
- in
-
- (* Subtask 3: Search the next boundary line. Invoke 'add' for every
- * read chunk
- *)
- let rec search_next_boundary p =
- (* Returns the position directly after the boundary on the same line *)
- try
- (* Search boundary per regexp: *)
- let i,r = Str.search_forward
- boundary1_re
- (Netbuffer.unsafe_buffer (Netstream.window s))
- 0
- in
- (* If match_end <= window_length, the search was successful.
- * Otherwise, we searched in the uninitialized region of the
- * buffer.
- *)
- if Str.match_end r <= Netstream.window_length s then begin
- (* Add the last chunk of the part. *)
- let n =
- (* i is a LF. i - 1 should be CR. Ignore these characters. *)
- if i >= 1 then
- match (Netbuffer.unsafe_buffer (Netstream.window s)).[ i - 1 ] with
- '\013' -> i - 1
- | _ -> i
- else
- i
- in
- (* Printf.printf "add n=%d\n" n; *)
- add p s 0 n;
- Str.match_end r
- end
- else raise Not_found
- with
- Not_found ->
- if Netstream.at_eos s then
- failwith "Mimestring.scan_multipart_body_from_netstream: next MIME boundary not found";
- (* The regexp did not match: Add the first block of the window;
- * and move the window.
- *)
- let n =
- min
- (Netstream.window_length s)
- (Netstream.block_size s)
- in
- (* Printf.printf "add n=%d\n" n; *)
- add p s 0 n;
- Netstream.move s n;
- search_next_boundary p
- in
-
- (* Subtask 4: Search the end of the boundary line *)
- let rec search_end_of_line k =
- (* Search LF beginning at position k. Discard any contents until that. *)
- try
- let i,r = Str.search_forward
- lf_re
- (Netbuffer.unsafe_buffer (Netstream.window s))
- k
- in
- (* If match_end <= window_length, the search was successful.
- * Otherwise, we searched in the uninitialized region of the
- * buffer.
- *)
- if Str.match_end r <= Netstream.window_length s then begin
- Str.match_end r
- end
- else raise Not_found
- with
- Not_found ->
- if Netstream.at_eos s then
- failwith "Mimestring.scan_multipart_body_from_netstream: MIME boundary without line end";
- (* The regexp did not match: move the window.
- *)
- let n = Netstream.window_length s in
- Netstream.move s n;
- search_end_of_line 0
- in
-
- (* Subtask 5: Check whether "--" follows the boundary on the same line *)
- let check_whether_last_boundary k =
- (* k: The position directly after the boundary. *)
- Netstream.want s (k+2);
- let str = Netbuffer.unsafe_buffer (Netstream.window s) in
- (Netstream.window_length s >= k+2) && str.[k] = '-' && str.[k+1] = '-'
- in
-
- (* Subtask 6: Check whether the buffer begins with a boundary. *)
- let check_beginning_is_boundary () =
- let m = String.length boundary + 2 in
- Netstream.want s m;
- let str = Netbuffer.unsafe_buffer (Netstream.window s) in
- (Netstream.window_length s >= m) &&
- (Str.string_partial_match boundary2_re str 0 <> None)
- in
-
- let rec parse_part () =
- (* The first byte of the current window of s contains the character
- * directly following the boundary line that starts this part.
- *)
- (* Search the end of the MIME header: *)
- let k_eoh = search_end_of_header 0 in
- (* Printf.printf "k_eoh=%d\n" k_eoh; *)
- (* Get the MIME header: *)
- let str = Netbuffer.unsafe_buffer (Netstream.window s) in
- let header, k_eoh' = scan_header str 0 k_eoh in
- assert (k_eoh = k_eoh');
- (* Move the window over the header: *)
- Netstream.move s k_eoh;
- (* Create the part: *)
- let p = create header in
- let continue =
- begin try
- (* Search the next boundary; add the chunks while searching: *)
- let k_eob = search_next_boundary p in
- (* Printf.printf "k_eob=%d\n" k_eob; *)
- (* Is this the last boundary? *)
- if check_whether_last_boundary k_eob then begin
- (* Skip the rest: *)
- while not (Netstream.at_eos s) do
- Netstream.move s (Netstream.window_length s)
- done;
- Netstream.move s (Netstream.window_length s);
- false
- end
- else begin
- (* Move to the beginning of the next line: *)
- let k_eol = search_end_of_line k_eob in
- Netstream.move s k_eol;
- true
- end
- with
- any ->
- (try stop p with _ -> ());
- raise any
- end in
- stop p;
- if continue then
- (* Continue with next part: *)
- parse_part()
- in
-
- (* Check whether s directly begins with a boundary: *)
- if check_beginning_is_boundary() then begin
- (* Move to the beginning of the next line: *)
- let k_eol = search_end_of_line 0 in
- Netstream.move s k_eol;
- (* Begin with first part: *)
- parse_part()
- end
- else begin
- (* Search the first boundary: *)
- try
- let k_eob = search_first_boundary() in
- (* Printf.printf "k_eob=%d\n" k_eob; *)
- (* Move to the beginning of the next line: *)
- let k_eol = search_end_of_line k_eob in
- (* Printf.printf "k_eol=%d\n" k_eol; *)
- Netstream.move s k_eol;
- (* Begin with first part: *)
- parse_part()
- with
- Not_found ->
- (* No boundary at all: The body is empty. *)
- ()
- end;
-;;
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:27 lpadovan
- * Initial revision
- *
- * Revision 1.8 2000/08/13 00:04:36 gerd
- * Encoded_word -> EncodedWord
- * Bugfixes.
- *
- * Revision 1.7 2000/08/07 00:25:14 gerd
- * Implemented the new functions for structured field lexing.
- *
- * Revision 1.6 2000/06/25 22:34:43 gerd
- * Added labels to arguments.
- *
- * Revision 1.5 2000/06/25 21:15:48 gerd
- * Checked thread-safety.
- *
- * Revision 1.4 2000/05/16 22:30:14 gerd
- * Added support for some types of malformed MIME messages.
- *
- * Revision 1.3 2000/04/15 13:09:01 gerd
- * Implemented uploads to temporary files.
- *
- * Revision 1.2 2000/03/02 01:15:30 gerd
- * Updated.
- *
- * Revision 1.1 2000/02/25 15:21:12 gerd
- * Initial revision.
- *
- *
- *)