--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+module Str = Netstring_str;;
+
+let cr_or_lf_re = Str.regexp "[\013\n]";;
+
+let trim_right_spaces_re =
+ Str.regexp "[ \t]+$";;
+
+let trim_left_spaces_re =
+ Str.regexp "^[ \t]+";;
+
+let header_re =
+ Str.regexp "\\([^ \t\r\n:]+\\):\\([ \t]*.*\n\\([ \t].*\n\\)*\\)";;
+
+let empty_line_re =
+ Str.regexp "\013?\n";;
+
+let end_of_header_re =
+ Str.regexp "\n\013?\n";;
+
+
+let scan_header ?(unfold=true) parstr ~start_pos:i0 ~end_pos:i1 =
+ let rec parse_header i l =
+ match Str.string_partial_match header_re parstr i with
+ Some r ->
+ let i' = Str.match_end r in
+ if i' > i1 then
+ failwith "Mimestring.scan_header";
+ let name = String.lowercase(Str.matched_group r 1 parstr) in
+ let value_with_crlf =
+ Str.matched_group r 2 parstr in
+ let value =
+ if unfold then begin
+ let value_with_rspaces =
+ Str.global_replace cr_or_lf_re "" value_with_crlf in
+ let value_with_lspaces =
+ Str.global_replace trim_right_spaces_re "" value_with_rspaces in
+ Str.global_replace trim_left_spaces_re "" value_with_lspaces
+ end
+ else value_with_crlf
+ in
+ parse_header i' ( (name,value) :: l)
+ | None ->
+ (* The header must end with an empty line *)
+ begin match Str.string_partial_match empty_line_re parstr i with
+ Some r' ->
+ List.rev l, Str.match_end r'
+ | None ->
+ failwith "Mimestring.scan_header"
+ end
+ in
+ parse_header i0 []
+;;
+
+type s_token =
+ Atom of string
+ | EncodedWord of (string * string * string)
+ | QString of string
+ | Control of char
+ | Special of char
+ | DomainLiteral of string
+ | Comment
+ | End
+;;
+
+type s_option =
+ No_backslash_escaping
+ | Return_comments
+ | Recognize_encoded_words
+;;
+
+type s_extended_token =
+ { token : s_token;
+ token_pos : int;
+ token_line : int;
+ token_linepos : int; (* Position of the beginning of the line *)
+ token_len : int;
+ mutable token_sep : bool; (* separates adjacent encoded words *)
+ }
+;;
+
+let get_token et = et.token;;
+let get_pos et = et.token_pos;;
+let get_line et = et.token_line;;
+let get_column et = et.token_pos - et.token_linepos;;
+let get_length et = et.token_len;;
+let separates_adjacent_encoded_words et = et.token_sep;;
+
+let get_decoded_word et =
+ match et.token with
+ Atom s -> s
+ | QString s -> s
+ | Control c -> String.make 1 c
+ | Special c -> String.make 1 c
+ | DomainLiteral s -> s
+ | Comment -> ""
+ | EncodedWord (_, encoding, content) ->
+ ( match encoding with
+ ("Q"|"q") ->
+ Netencoding.Q.decode content
+ | ("B"|"b") ->
+ Netencoding.Base64.decode
+ ~url_variant:false
+ ~accept_spaces:false
+ content
+ | _ -> failwith "get_decoded_word"
+ )
+ | End ->
+ failwith "get_decoded_word"
+;;
+
+let get_charset et =
+ match et.token with
+ EncodedWord (charset, _, _) -> charset
+ | End -> failwith "get_charset"
+ | _ -> "US-ASCII"
+;;
+
+type scanner_spec =
+ { (* What the user specifies: *)
+ scanner_specials : char list;
+ scanner_options : s_option list;
+ (* Derived from that: *)
+ mutable opt_no_backslash_escaping : bool;
+ mutable opt_return_comments : bool;
+ mutable opt_recognize_encoded_words : bool;
+
+ mutable is_special : bool array;
+ mutable space_is_special : bool;
+ }
+;;
+
+type scanner_target =
+ { scanned_string : string;
+ mutable scanner_pos : int;
+ mutable scanner_line : int;
+ mutable scanner_linepos : int;
+ (* Position of the beginning of the line *)
+ mutable scanned_tokens : s_extended_token Queue.t;
+ (* A queue of already scanned tokens in order to look ahead *)
+ mutable last_token : s_token;
+ (* The last returned token. It is only important whether it is
+ * EncodedWord or not.
+ *)
+ }
+;;
+
+type mime_scanner = scanner_spec * scanner_target
+;;
+
+let get_pos_of_scanner (spec, target) =
+ if spec.opt_recognize_encoded_words then
+ failwith "get_pos_of_scanner"
+ else
+ target.scanner_pos
+;;
+
+let get_line_of_scanner (spec, target) =
+ if spec.opt_recognize_encoded_words then
+ failwith "get_line_of_scanner"
+ else
+ target.scanner_line
+;;
+
+let get_column_of_scanner (spec, target) =
+ if spec.opt_recognize_encoded_words then
+ failwith "get_column_of_scanner"
+ else
+ target.scanner_pos - target.scanner_linepos
+;;
+
+let create_mime_scanner ~specials ~scan_options =
+ let is_spcl = Array.create 256 false in
+ List.iter
+ (fun c -> is_spcl.( Char.code c ) <- true)
+ specials;
+ let spec =
+ { scanner_specials = specials;
+ scanner_options = scan_options;
+ opt_no_backslash_escaping =
+ List.mem No_backslash_escaping scan_options;
+ opt_return_comments =
+ List.mem Return_comments scan_options;
+ opt_recognize_encoded_words =
+ List.mem Recognize_encoded_words scan_options;
+ is_special = is_spcl;
+ space_is_special = is_spcl.(32);
+ }
+ in
+ (* Grab the remaining arguments: *)
+ fun ?(pos=0) ?(line=1) ?(column=0) s ->
+ let target =
+ { scanned_string = s;
+ scanner_pos = pos;
+ scanner_line = line;
+ scanner_linepos = pos - column;
+ scanned_tokens = Queue.create();
+ last_token = Comment; (* Must not be initialized with EncodedWord *)
+ }
+ in
+ spec, target
+;;
+
+
+let encoded_word_re =
+ Str.regexp "=\\?\\([^?]+\\)\\?\\([^?]+\\)\\?\\([^?]+\\)\\?=";;
+
+let scan_next_token ((spec,target) as scn) =
+ let mk_pair t len =
+ { token = t;
+ token_pos = target.scanner_pos;
+ token_line = target.scanner_line;
+ token_linepos = target.scanner_linepos;
+ token_len = len;
+ token_sep = false;
+ },
+ t
+ in
+
+ (* Note: mk_pair creates a new token pair, and it assumes that
+ * target.scanner_pos (and also scanner_line and scanner_linepos)
+ * still contain the position of the beginning of the token.
+ *)
+
+ let s = target.scanned_string in
+ let l = String.length s in
+ let rec scan i =
+ if i < l then begin
+ let c = s.[i] in
+ if spec.is_special.( Char.code c ) then begin
+ let pair = mk_pair (Special c) 1 in
+ target.scanner_pos <- target.scanner_pos + 1;
+ (match c with
+ '\n' ->
+ target.scanner_line <- target.scanner_line + 1;
+ target.scanner_linepos <- target.scanner_pos;
+ | _ -> ()
+ );
+ pair
+ end
+ else
+ match c with
+ '"' ->
+ (* Quoted string: *)
+ scan_qstring (i+1) (i+1) 0
+ | '(' ->
+ (* Comment: *)
+ let i', line, linepos =
+ scan_comment (i+1) 0 target.scanner_line target.scanner_linepos
+ in
+ let advance() =
+ target.scanner_pos <- i';
+ target.scanner_line <- line;
+ target.scanner_linepos <- linepos
+ in
+ if spec.opt_return_comments then begin
+ let pair = mk_pair Comment (i' - i) in
+ advance();
+ pair
+ end
+ else
+ if spec.space_is_special then begin
+ let pair = mk_pair (Special ' ') (i' - i) in
+ advance();
+ pair
+ end
+ else begin
+ advance();
+ scan i'
+ end
+ | (' '|'\t'|'\r') ->
+ (* Ignore whitespace by default: *)
+ target.scanner_pos <- target.scanner_pos + 1;
+ scan (i+1)
+ | '\n' ->
+ (* Ignore whitespace by default: *)
+ target.scanner_pos <- target.scanner_pos + 1;
+ target.scanner_line <- target.scanner_line + 1;
+ target.scanner_linepos <- target.scanner_pos;
+ scan (i+1)
+ | ('\000'..'\031'|'\127'..'\255') ->
+ let pair = mk_pair (Control c) 1 in
+ target.scanner_pos <- target.scanner_pos + 1;
+ pair
+ | '[' ->
+ (* Domain literal: *)
+ scan_dliteral (i+1) (i+1) 0
+ | _ ->
+ scan_atom i i
+ end
+ else
+ mk_pair End 0
+
+ and scan_atom i0 i =
+ let return_atom() =
+ let astring = String.sub s i0 (i-i0) in
+ let r =
+ if spec.opt_recognize_encoded_words then
+ Str.string_match ~groups:4 encoded_word_re astring 0
+ else
+ None
+ in
+ match r with
+ None ->
+ (* An atom contains never a linefeed character, so we can ignore
+ * scanner_line here.
+ *)
+ let pair = mk_pair (Atom astring) (i-i0) in
+ target.scanner_pos <- i;
+ pair
+ | Some mr ->
+ (* Found an encoded word. *)
+ let charset = Str.matched_group mr 1 astring in
+ let encoding = Str.matched_group mr 2 astring in
+ let content = Str.matched_group mr 3 astring in
+ let t = EncodedWord(String.uppercase charset,
+ String.uppercase encoding,
+ content) in
+ let pair = mk_pair t (i-i0) in
+ target.scanner_pos <- i;
+ pair
+ in
+
+ if i < l then
+ let c = s.[i] in
+ match c with
+ ('\000'..'\031'|'\127'..'\255'|'"'|'('|'['|' '|'\t'|'\r'|'\n') ->
+ return_atom()
+ | _ ->
+ if spec.is_special.( Char.code c ) then
+ return_atom()
+ else
+ scan_atom i0 (i+1)
+ else
+ return_atom()
+
+ and scan_qstring i0 i n =
+ if i < l then
+ let c = s.[i] in
+ match c with
+ '"' ->
+ (* Regular end of the quoted string: *)
+ let content, line, linepos = copy_qstring i0 (i-1) n in
+ let pair = mk_pair (QString content) (i-i0+2) in
+ target.scanner_pos <- i+1;
+ target.scanner_line <- line;
+ target.scanner_linepos <- linepos;
+ pair
+ | '\\' when not spec.opt_no_backslash_escaping ->
+ scan_qstring i0 (i+2) (n+1)
+ | _ ->
+ scan_qstring i0 (i+1) (n+1)
+ else
+ (* Missing right double quote *)
+ let content, line, linepos = copy_qstring i0 (l-1) n in
+ let pair = mk_pair (QString content) (l-i0+1) in
+ target.scanner_pos <- l;
+ target.scanner_line <- line;
+ target.scanner_linepos <- linepos;
+ pair
+
+ and copy_qstring i0 i1 n =
+ (* Used for quoted strings and for domain literals *)
+ let r = String.create n in
+ let k = ref 0 in
+ let line = ref target.scanner_line in
+ let linepos = ref target.scanner_linepos in
+ for i = i0 to i1 do
+ let c = s.[i] in
+ match c with
+ '\\' when i < i1 && not spec.opt_no_backslash_escaping -> ()
+ | '\n' ->
+ line := !line + 1;
+ linepos := i+1;
+ r.[ !k ] <- c;
+ incr k
+ | _ ->
+ r.[ !k ] <- c;
+ incr k
+ done;
+ assert (!k = n);
+ r, !line, !linepos
+
+ and scan_dliteral i0 i n =
+ if i < l then
+ let c = s.[i] in
+ match c with
+ ']' ->
+ (* Regular end of the domain literal: *)
+ let content, line, linepos = copy_qstring i0 (i-1) n in
+ let pair = mk_pair (DomainLiteral content) (i-i0+2) in
+ target.scanner_pos <- i+1;
+ target.scanner_line <- line;
+ target.scanner_linepos <- linepos;
+ pair
+ | '\\' when not spec.opt_no_backslash_escaping ->
+ scan_dliteral i0 (i+2) (n+1)
+ | _ ->
+ (* Note: '[' is not allowed by RFC 822; we treat it here as
+ * a regular character (questionable)
+ *)
+ scan_dliteral i0 (i+1) (n+1)
+ else
+ (* Missing right bracket *)
+ let content, line, linepos = copy_qstring i0 (l-1) n in
+ let pair = mk_pair (DomainLiteral content) (l-i0+1) in
+ target.scanner_pos <- l;
+ target.scanner_line <- line;
+ target.scanner_linepos <- linepos;
+ pair
+
+
+ and scan_comment i level line linepos =
+ if i < l then
+ let c = s.[i] in
+ match c with
+ ')' ->
+ (i+1), line, linepos
+ | '(' ->
+ (* nested comment *)
+ let i', line', linepos' =
+ scan_comment (i+1) (level+1) line linepos
+ in
+ scan_comment i' level line' linepos'
+ | '\\' when not spec.opt_no_backslash_escaping ->
+ if (i+1) < l && s.[i+1] = '\n' then
+ scan_comment (i+2) level (line+1) (i+2)
+ else
+ scan_comment (i+2) level line linepos
+ | '\n' ->
+ scan_comment (i+1) level (line+1) (i+1)
+ | _ ->
+ scan_comment (i+1) level line linepos
+ else
+ (* Missing closing ')' *)
+ i, line, linepos
+ in
+
+ scan target.scanner_pos
+;;
+
+
+let scan_token ((spec,target) as scn) =
+ (* This function handles token queueing in order to recognize white space
+ * that separates adjacent encoded words.
+ *)
+
+ let rec collect_whitespace () =
+ (* Scans whitespace tokens and returns them as:
+ * (ws_list, other_tok) if there is some other_tok following the
+ * list (other_tok = End is possible)
+ *)
+ let (et, t) as pair = scan_next_token scn in
+ ( match t with
+ (Special ' '|Special '\t'|Special '\n'|Special '\r') ->
+ let ws_list, tok = collect_whitespace() in
+ pair :: ws_list, tok
+ | _ ->
+ [], pair
+ )
+ in
+
+ try
+ (* Is there an already scanned token in the queue? *)
+ let et = Queue.take target.scanned_tokens in
+ let t = et.token in
+ target.last_token <- t;
+ et, et.token
+ with
+ Queue.Empty ->
+ (* If not: inspect the last token. If that token is an EncodedWord,
+ * the next tokens are scanned in advance to determine if there
+ * are spaces separating two EncodedWords. These tokens are put
+ * into the queue such that it is avoided that they are scanned
+ * twice. (The sole purpose of the queue.)
+ *)
+ match target.last_token with
+ EncodedWord(_,_,_) as ew ->
+ let ws_list, tok = collect_whitespace() in
+ (* If tok is an EncodedWord, too, the tokens in ws_list must
+ * be flagged as separating two adjacent encoded words.
+ *)
+ ( match tok with
+ _, EncodedWord(_,_,_) ->
+ List.iter
+ (fun (et,t) ->
+ et.token_sep <- true)
+ ws_list
+ | _ ->
+ ()
+ );
+ (* Anyway, queue the read tokens but the first up *)
+ ( match ws_list with
+ [] ->
+ (* Nothing to queue *)
+ let et, t = tok in
+ target.last_token <- t;
+ tok
+ | (et,t) as pair :: ws_list' ->
+ List.iter
+ (fun (et',_) ->
+ Queue.add et' target.scanned_tokens)
+ ws_list';
+ ( match tok with
+ | _, End ->
+ ()
+ | (et',_) ->
+ Queue.add et' target.scanned_tokens
+ );
+ (* Return the first scanned token *)
+ target.last_token <- t;
+ pair
+ )
+ | _ ->
+ (* Regular case: Scan one token; do not queue it up *)
+ let (et, t) as pair = scan_next_token scn in
+ target.last_token <- t;
+ pair
+;;
+
+
+let scan_token_list scn =
+ let rec collect() =
+ match scan_token scn with
+ _, End ->
+ []
+ | pair ->
+ pair :: collect()
+ in
+ collect()
+;;
+
+
+let scan_structured_value s specials options =
+ let rec collect scn =
+ match scan_token scn with
+ _, End ->
+ []
+ | _, t ->
+ t :: collect scn
+ in
+ let scn = create_mime_scanner specials options s in
+ collect scn
+;;
+
+
+let specials_rfc822 =
+ [ '<'; '>'; '@'; ','; ';'; ':'; '\\'; '.' ];;
+
+
+let specials_rfc2045 =
+ [ '<'; '>'; '@'; ','; ';'; ':'; '\\'; '/' ];;
+
+
+let scan_encoded_text_value s =
+ let specials = [ ' '; '\t'; '\r'; '\n'; '('; '['; '"' ] in
+ let options = [ Recognize_encoded_words ] in
+ let scn = create_mime_scanner specials options s in
+
+ let rec collect () =
+ match scan_token scn with
+ _, End ->
+ []
+ | et, _ when separates_adjacent_encoded_words et ->
+ collect()
+ | et, (Special _|Atom _|EncodedWord(_,_,_)) ->
+ et :: collect ()
+ | _, _ ->
+ assert false
+ in
+ collect()
+;;
+
+
+let scan_value_with_parameters s options =
+ let rec parse_params tl =
+ match tl with
+ Atom n :: Special '=' :: Atom v :: tl' ->
+ (n,v) :: parse_rest tl'
+ | Atom n :: Special '=' :: QString v :: tl' ->
+ (n,v) :: parse_rest tl'
+ | _ ->
+ failwith "Mimestring.scan_value_with_parameters"
+ and parse_rest tl =
+ match tl with
+ [] -> []
+ | Special ';' :: tl' ->
+ parse_params tl'
+ | _ ->
+ failwith "Mimestring.scan_value_with_parameters"
+ in
+
+ (* Note: Even if not used here, the comma is a very common separator
+ * and should be recognized as being special. You will get a
+ * failure if there is a comma in the scanned string.
+ *)
+ let tl = scan_structured_value s [ ';'; '='; ',' ] options in
+ match tl with
+ [ Atom n ] -> n, []
+ | [ QString n ] -> n, []
+ | Atom n :: Special ';' :: tl' ->
+ n, parse_params tl'
+ | QString n :: Special ';' :: tl' ->
+ n, parse_params tl'
+ | _ ->
+ failwith "Mimestring.scan_value_with_parameters"
+;;
+
+
+let scan_mime_type s options =
+ let n, params = scan_value_with_parameters s options in
+ (String.lowercase n),
+ (List.map (fun (n,v) -> (String.lowercase n, v)) params)
+;;
+
+
+let lf_re = Str.regexp "[\n]";;
+
+let scan_multipart_body s ~start_pos:i0 ~end_pos:i1 ~boundary =
+ let l_s = String.length s in
+ if i0 < 0 or i1 < 0 or i0 > l_s or i1 >l_s then
+ invalid_arg "Mimestring.scan_multipart_body";
+
+ (* First compile the regexps scanning for 'boundary': *)
+ let boundary1_re =
+ Str.regexp ("\n--" ^ Str.quote boundary) in
+ let boundary2_re =
+ Str.regexp ("--" ^ Str.quote boundary) in
+
+ let rec parse i =
+ (* i: Beginning of the current part (position directly after the
+ * boundary line
+ *)
+ (* Search for next boundary at position i *)
+ let i' =
+ try min (fst (Str.search_forward boundary1_re s i) + 1) i1
+ with
+ Not_found -> i1
+ in
+ (* i': Either the position of the first '-' of the boundary line,
+ * or i1 if no boundary has been found
+ *)
+ if i' >= i1 then
+ [] (* Ignore everything after the last boundary *)
+ else
+ let i'' =
+ try min (fst (Str.search_forward lf_re s i') + 1) i1
+ with
+ Not_found -> i1
+ in
+ (* i'': The position after the boundary line *)
+(*
+ print_int i; print_newline();
+ print_int i'; print_newline();
+ print_int i''; print_newline();
+ flush stdout;
+*)
+ let header, k = scan_header s i i' in
+ (* header: the header of the part
+ * k: beginning of the body
+ *)
+
+ let value =
+ (* We know that i'-1 is a linefeed character. i'-2 should be a CR
+ * character. Both characters are not part of the value.
+ *)
+ if i' >= 2 then
+ match s.[i'-2] with
+ '\013' -> String.sub s k (i'-2-k)
+ | _ -> String.sub s k (i'-1-k)
+ else
+ String.sub s k (i'-1-k)
+ in
+
+ let pair =
+ (header, value) in
+
+ if i'' >= i1
+ then
+ [ pair ]
+ else
+ pair :: parse i''
+ in
+
+ (* Find the first boundary. This is a special case, because it may be
+ * right at the beginning of the string (no preceding CRLF)
+ *)
+
+ let i_bnd =
+ if Str.string_partial_match boundary2_re s i0 <> None then
+ i0
+ else
+ try min (fst (Str.search_forward boundary1_re s i0)) i1
+ with
+ Not_found -> i1
+ in
+
+ if i_bnd >= i1 then
+ []
+ else
+ let i_bnd' =
+ try min (fst (Str.search_forward lf_re s (i_bnd + 1)) + 1) i1
+ with
+ Not_found -> i1
+ in
+ if i_bnd' >= i1 then
+ []
+ else
+ parse i_bnd'
+;;
+
+
+let scan_multipart_body_and_decode s ~start_pos:i0 ~end_pos:i1 ~boundary =
+ let parts = scan_multipart_body s i0 i1 boundary in
+ List.map
+ (fun (params, value) ->
+ let encoding =
+ try List.assoc "content-transfer-encoding" params
+ with Not_found -> "7bit"
+ in
+
+ (* NOTE: In the case of "base64" and "quoted-printable", the allocation
+ * of the string "value" could be avoided.
+ *)
+
+ let value' =
+ match encoding with
+ ("7bit"|"8bit"|"binary") -> value
+ | "base64" ->
+ Netencoding.Base64.decode_substring
+ value 0 (String.length value) false true
+ | "quoted-printable" ->
+ Netencoding.QuotedPrintable.decode_substring
+ value 0 (String.length value)
+ | _ ->
+ failwith "Mimestring.scan_multipart_body_and_decode: Unknown content-transfer-encoding"
+ in
+ (params, value')
+ )
+ parts
+;;
+
+
+let scan_multipart_body_from_netstream s ~boundary ~create ~add ~stop =
+
+ (* The block size of s must be at least the length of the boundary + 3.
+ * Otherwise it is not guaranteed that the boundary is always recognized.
+ *)
+ if Netstream.block_size s < String.length boundary + 3 then
+ invalid_arg "Mimestring.scan_multipart_body_from_netstream";
+
+ (* First compile the regexps scanning for 'boundary': *)
+ let boundary1_re =
+ Str.regexp ("\n--" ^ Str.quote boundary) in
+ let boundary2_re =
+ Str.regexp ("--" ^ Str.quote boundary) in
+
+ (* Subtask 1: Search the end of the MIME header: CR LF CR LF
+ * (or LF LF). Enlarge the window until the complete header
+ * is covered by the window.
+ *)
+ let rec search_end_of_header k =
+ (* Search the end of the header beginning at position k of the
+ * current window.
+ * Return the position of the first character of the body.
+ *)
+ try
+ (* Search for LF CR? LF: *)
+ let i, r = Str.search_forward
+ end_of_header_re
+ (Netbuffer.unsafe_buffer (Netstream.window s))
+ k
+ in
+ (* If match_end <= window_length, the search was successful.
+ * Otherwise, we searched in the uninitialized region of the
+ * buffer.
+ *)
+ if Str.match_end r <= Netstream.window_length s then
+ Str.match_end r
+ else
+ raise Not_found
+ with
+ Not_found ->
+ (* If the end of the stream is reached, the end of the header
+ * is missing: Error.
+ * Otherwise, we try to read another block, and continue.
+ *)
+ if Netstream.at_eos s then
+ failwith "Mimestring.scan_multipart_body_from_netstream: Unexpected end of stream";
+ let w0 = Netstream.window_length s in
+ Netstream.want_another_block s;
+ search_end_of_header (max (w0 - 2) 0)
+ in
+
+ (* Subtask 2: Search the first boundary line. *)
+ let rec search_first_boundary() =
+ (* Search boundary per regexp; return the position of the character
+ * immediately following the boundary (on the same line), or
+ * raise Not_found.
+ *)
+ try
+ (* Search boundary per regexp: *)
+ let i, r = Str.search_forward
+ boundary1_re
+ (Netbuffer.unsafe_buffer (Netstream.window s))
+ 0
+ in
+ (* If match_end <= window_length, the search was successful.
+ * Otherwise, we searched in the uninitialized region of the
+ * buffer.
+ *)
+ if Str.match_end r <= Netstream.window_length s then begin
+ Str.match_end r
+ end
+ else raise Not_found
+ with
+ Not_found ->
+ if Netstream.at_eos s then raise Not_found;
+ (* The regexp did not match: Move the window by one block.
+ *)
+ let n =
+ min
+ (Netstream.window_length s)
+ (Netstream.block_size s)
+ in
+ Netstream.move s n;
+ search_first_boundary()
+ in
+
+ (* Subtask 3: Search the next boundary line. Invoke 'add' for every
+ * read chunk
+ *)
+ let rec search_next_boundary p =
+ (* Returns the position directly after the boundary on the same line *)
+ try
+ (* Search boundary per regexp: *)
+ let i,r = Str.search_forward
+ boundary1_re
+ (Netbuffer.unsafe_buffer (Netstream.window s))
+ 0
+ in
+ (* If match_end <= window_length, the search was successful.
+ * Otherwise, we searched in the uninitialized region of the
+ * buffer.
+ *)
+ if Str.match_end r <= Netstream.window_length s then begin
+ (* Add the last chunk of the part. *)
+ let n =
+ (* i is a LF. i - 1 should be CR. Ignore these characters. *)
+ if i >= 1 then
+ match (Netbuffer.unsafe_buffer (Netstream.window s)).[ i - 1 ] with
+ '\013' -> i - 1
+ | _ -> i
+ else
+ i
+ in
+ (* Printf.printf "add n=%d\n" n; *)
+ add p s 0 n;
+ Str.match_end r
+ end
+ else raise Not_found
+ with
+ Not_found ->
+ if Netstream.at_eos s then
+ failwith "Mimestring.scan_multipart_body_from_netstream: next MIME boundary not found";
+ (* The regexp did not match: Add the first block of the window;
+ * and move the window.
+ *)
+ let n =
+ min
+ (Netstream.window_length s)
+ (Netstream.block_size s)
+ in
+ (* Printf.printf "add n=%d\n" n; *)
+ add p s 0 n;
+ Netstream.move s n;
+ search_next_boundary p
+ in
+
+ (* Subtask 4: Search the end of the boundary line *)
+ let rec search_end_of_line k =
+ (* Search LF beginning at position k. Discard any contents until that. *)
+ try
+ let i,r = Str.search_forward
+ lf_re
+ (Netbuffer.unsafe_buffer (Netstream.window s))
+ k
+ in
+ (* If match_end <= window_length, the search was successful.
+ * Otherwise, we searched in the uninitialized region of the
+ * buffer.
+ *)
+ if Str.match_end r <= Netstream.window_length s then begin
+ Str.match_end r
+ end
+ else raise Not_found
+ with
+ Not_found ->
+ if Netstream.at_eos s then
+ failwith "Mimestring.scan_multipart_body_from_netstream: MIME boundary without line end";
+ (* The regexp did not match: move the window.
+ *)
+ let n = Netstream.window_length s in
+ Netstream.move s n;
+ search_end_of_line 0
+ in
+
+ (* Subtask 5: Check whether "--" follows the boundary on the same line *)
+ let check_whether_last_boundary k =
+ (* k: The position directly after the boundary. *)
+ Netstream.want s (k+2);
+ let str = Netbuffer.unsafe_buffer (Netstream.window s) in
+ (Netstream.window_length s >= k+2) && str.[k] = '-' && str.[k+1] = '-'
+ in
+
+ (* Subtask 6: Check whether the buffer begins with a boundary. *)
+ let check_beginning_is_boundary () =
+ let m = String.length boundary + 2 in
+ Netstream.want s m;
+ let str = Netbuffer.unsafe_buffer (Netstream.window s) in
+ (Netstream.window_length s >= m) &&
+ (Str.string_partial_match boundary2_re str 0 <> None)
+ in
+
+ let rec parse_part () =
+ (* The first byte of the current window of s contains the character
+ * directly following the boundary line that starts this part.
+ *)
+ (* Search the end of the MIME header: *)
+ let k_eoh = search_end_of_header 0 in
+ (* Printf.printf "k_eoh=%d\n" k_eoh; *)
+ (* Get the MIME header: *)
+ let str = Netbuffer.unsafe_buffer (Netstream.window s) in
+ let header, k_eoh' = scan_header str 0 k_eoh in
+ assert (k_eoh = k_eoh');
+ (* Move the window over the header: *)
+ Netstream.move s k_eoh;
+ (* Create the part: *)
+ let p = create header in
+ let continue =
+ begin try
+ (* Search the next boundary; add the chunks while searching: *)
+ let k_eob = search_next_boundary p in
+ (* Printf.printf "k_eob=%d\n" k_eob; *)
+ (* Is this the last boundary? *)
+ if check_whether_last_boundary k_eob then begin
+ (* Skip the rest: *)
+ while not (Netstream.at_eos s) do
+ Netstream.move s (Netstream.window_length s)
+ done;
+ Netstream.move s (Netstream.window_length s);
+ false
+ end
+ else begin
+ (* Move to the beginning of the next line: *)
+ let k_eol = search_end_of_line k_eob in
+ Netstream.move s k_eol;
+ true
+ end
+ with
+ any ->
+ (try stop p with _ -> ());
+ raise any
+ end in
+ stop p;
+ if continue then
+ (* Continue with next part: *)
+ parse_part()
+ in
+
+ (* Check whether s directly begins with a boundary: *)
+ if check_beginning_is_boundary() then begin
+ (* Move to the beginning of the next line: *)
+ let k_eol = search_end_of_line 0 in
+ Netstream.move s k_eol;
+ (* Begin with first part: *)
+ parse_part()
+ end
+ else begin
+ (* Search the first boundary: *)
+ try
+ let k_eob = search_first_boundary() in
+ (* Printf.printf "k_eob=%d\n" k_eob; *)
+ (* Move to the beginning of the next line: *)
+ let k_eol = search_end_of_line k_eob in
+ (* Printf.printf "k_eol=%d\n" k_eol; *)
+ Netstream.move s k_eol;
+ (* Begin with first part: *)
+ parse_part()
+ with
+ Not_found ->
+ (* No boundary at all: The body is empty. *)
+ ()
+ end;
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.8 2000/08/13 00:04:36 gerd
+ * Encoded_word -> EncodedWord
+ * Bugfixes.
+ *
+ * Revision 1.7 2000/08/07 00:25:14 gerd
+ * Implemented the new functions for structured field lexing.
+ *
+ * Revision 1.6 2000/06/25 22:34:43 gerd
+ * Added labels to arguments.
+ *
+ * Revision 1.5 2000/06/25 21:15:48 gerd
+ * Checked thread-safety.
+ *
+ * Revision 1.4 2000/05/16 22:30:14 gerd
+ * Added support for some types of malformed MIME messages.
+ *
+ * Revision 1.3 2000/04/15 13:09:01 gerd
+ * Implemented uploads to temporary files.
+ *
+ * Revision 1.2 2000/03/02 01:15:30 gerd
+ * Updated.
+ *
+ * Revision 1.1 2000/02/25 15:21:12 gerd
+ * Initial revision.
+ *
+ *
+ *)