X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2FDEVEL%2Fpxp%2Fnetstring%2Fmimestring.ml;fp=helm%2FDEVEL%2Fpxp%2Fnetstring%2Fmimestring.ml;h=8fc4bfcbe15ee2cb21189d895af063ac84dd5690;hb=c03d2c1fdab8d228cb88aaba5ca0f556318bebc5;hp=0000000000000000000000000000000000000000;hpb=758057e85325f94cd88583feb1fdf6b038e35055;p=helm.git diff --git a/helm/DEVEL/pxp/netstring/mimestring.ml b/helm/DEVEL/pxp/netstring/mimestring.ml new file mode 100644 index 000000000..8fc4bfcbe --- /dev/null +++ b/helm/DEVEL/pxp/netstring/mimestring.ml @@ -0,0 +1,1035 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + +module Str = Netstring_str;; + +let cr_or_lf_re = Str.regexp "[\013\n]";; + +let trim_right_spaces_re = + Str.regexp "[ \t]+$";; + +let trim_left_spaces_re = + Str.regexp "^[ \t]+";; + +let header_re = + Str.regexp "\\([^ \t\r\n:]+\\):\\([ \t]*.*\n\\([ \t].*\n\\)*\\)";; + +let empty_line_re = + Str.regexp "\013?\n";; + +let end_of_header_re = + Str.regexp "\n\013?\n";; + + +let scan_header ?(unfold=true) parstr ~start_pos:i0 ~end_pos:i1 = + let rec parse_header i l = + match Str.string_partial_match header_re parstr i with + Some r -> + let i' = Str.match_end r in + if i' > i1 then + failwith "Mimestring.scan_header"; + let name = String.lowercase(Str.matched_group r 1 parstr) in + let value_with_crlf = + Str.matched_group r 2 parstr in + let value = + if unfold then begin + let value_with_rspaces = + Str.global_replace cr_or_lf_re "" value_with_crlf in + let value_with_lspaces = + Str.global_replace trim_right_spaces_re "" value_with_rspaces in + Str.global_replace trim_left_spaces_re "" value_with_lspaces + end + else value_with_crlf + in + parse_header i' ( (name,value) :: l) + | None -> + (* The header must end with an empty line *) + begin match Str.string_partial_match empty_line_re parstr i with + Some r' -> + List.rev l, Str.match_end r' + | None -> + failwith "Mimestring.scan_header" + end + in + parse_header i0 [] +;; + +type s_token = + Atom of string + | EncodedWord of (string * string * string) + | QString of string + | Control of char + | Special of char + | DomainLiteral of string + | Comment + | End +;; + +type s_option = + No_backslash_escaping + | Return_comments + | Recognize_encoded_words +;; + +type s_extended_token = + { token : s_token; + token_pos : int; + token_line : int; + token_linepos : int; (* Position of the beginning of the line *) + token_len : int; + mutable token_sep : bool; (* separates adjacent encoded words *) + } +;; + +let get_token et = et.token;; +let get_pos et = et.token_pos;; +let get_line et = et.token_line;; +let get_column et = et.token_pos - et.token_linepos;; +let get_length et = et.token_len;; +let separates_adjacent_encoded_words et = et.token_sep;; + +let get_decoded_word et = + match et.token with + Atom s -> s + | QString s -> s + | Control c -> String.make 1 c + | Special c -> String.make 1 c + | DomainLiteral s -> s + | Comment -> "" + | EncodedWord (_, encoding, content) -> + ( match encoding with + ("Q"|"q") -> + Netencoding.Q.decode content + | ("B"|"b") -> + Netencoding.Base64.decode + ~url_variant:false + ~accept_spaces:false + content + | _ -> failwith "get_decoded_word" + ) + | End -> + failwith "get_decoded_word" +;; + +let get_charset et = + match et.token with + EncodedWord (charset, _, _) -> charset + | End -> failwith "get_charset" + | _ -> "US-ASCII" +;; + +type scanner_spec = + { (* What the user specifies: *) + scanner_specials : char list; + scanner_options : s_option list; + (* Derived from that: *) + mutable opt_no_backslash_escaping : bool; + mutable opt_return_comments : bool; + mutable opt_recognize_encoded_words : bool; + + mutable is_special : bool array; + mutable space_is_special : bool; + } +;; + +type scanner_target = + { scanned_string : string; + mutable scanner_pos : int; + mutable scanner_line : int; + mutable scanner_linepos : int; + (* Position of the beginning of the line *) + mutable scanned_tokens : s_extended_token Queue.t; + (* A queue of already scanned tokens in order to look ahead *) + mutable last_token : s_token; + (* The last returned token. It is only important whether it is + * EncodedWord or not. + *) + } +;; + +type mime_scanner = scanner_spec * scanner_target +;; + +let get_pos_of_scanner (spec, target) = + if spec.opt_recognize_encoded_words then + failwith "get_pos_of_scanner" + else + target.scanner_pos +;; + +let get_line_of_scanner (spec, target) = + if spec.opt_recognize_encoded_words then + failwith "get_line_of_scanner" + else + target.scanner_line +;; + +let get_column_of_scanner (spec, target) = + if spec.opt_recognize_encoded_words then + failwith "get_column_of_scanner" + else + target.scanner_pos - target.scanner_linepos +;; + +let create_mime_scanner ~specials ~scan_options = + let is_spcl = Array.create 256 false in + List.iter + (fun c -> is_spcl.( Char.code c ) <- true) + specials; + let spec = + { scanner_specials = specials; + scanner_options = scan_options; + opt_no_backslash_escaping = + List.mem No_backslash_escaping scan_options; + opt_return_comments = + List.mem Return_comments scan_options; + opt_recognize_encoded_words = + List.mem Recognize_encoded_words scan_options; + is_special = is_spcl; + space_is_special = is_spcl.(32); + } + in + (* Grab the remaining arguments: *) + fun ?(pos=0) ?(line=1) ?(column=0) s -> + let target = + { scanned_string = s; + scanner_pos = pos; + scanner_line = line; + scanner_linepos = pos - column; + scanned_tokens = Queue.create(); + last_token = Comment; (* Must not be initialized with EncodedWord *) + } + in + spec, target +;; + + +let encoded_word_re = + Str.regexp "=\\?\\([^?]+\\)\\?\\([^?]+\\)\\?\\([^?]+\\)\\?=";; + +let scan_next_token ((spec,target) as scn) = + let mk_pair t len = + { token = t; + token_pos = target.scanner_pos; + token_line = target.scanner_line; + token_linepos = target.scanner_linepos; + token_len = len; + token_sep = false; + }, + t + in + + (* Note: mk_pair creates a new token pair, and it assumes that + * target.scanner_pos (and also scanner_line and scanner_linepos) + * still contain the position of the beginning of the token. + *) + + let s = target.scanned_string in + let l = String.length s in + let rec scan i = + if i < l then begin + let c = s.[i] in + if spec.is_special.( Char.code c ) then begin + let pair = mk_pair (Special c) 1 in + target.scanner_pos <- target.scanner_pos + 1; + (match c with + '\n' -> + target.scanner_line <- target.scanner_line + 1; + target.scanner_linepos <- target.scanner_pos; + | _ -> () + ); + pair + end + else + match c with + '"' -> + (* Quoted string: *) + scan_qstring (i+1) (i+1) 0 + | '(' -> + (* Comment: *) + let i', line, linepos = + scan_comment (i+1) 0 target.scanner_line target.scanner_linepos + in + let advance() = + target.scanner_pos <- i'; + target.scanner_line <- line; + target.scanner_linepos <- linepos + in + if spec.opt_return_comments then begin + let pair = mk_pair Comment (i' - i) in + advance(); + pair + end + else + if spec.space_is_special then begin + let pair = mk_pair (Special ' ') (i' - i) in + advance(); + pair + end + else begin + advance(); + scan i' + end + | (' '|'\t'|'\r') -> + (* Ignore whitespace by default: *) + target.scanner_pos <- target.scanner_pos + 1; + scan (i+1) + | '\n' -> + (* Ignore whitespace by default: *) + target.scanner_pos <- target.scanner_pos + 1; + target.scanner_line <- target.scanner_line + 1; + target.scanner_linepos <- target.scanner_pos; + scan (i+1) + | ('\000'..'\031'|'\127'..'\255') -> + let pair = mk_pair (Control c) 1 in + target.scanner_pos <- target.scanner_pos + 1; + pair + | '[' -> + (* Domain literal: *) + scan_dliteral (i+1) (i+1) 0 + | _ -> + scan_atom i i + end + else + mk_pair End 0 + + and scan_atom i0 i = + let return_atom() = + let astring = String.sub s i0 (i-i0) in + let r = + if spec.opt_recognize_encoded_words then + Str.string_match ~groups:4 encoded_word_re astring 0 + else + None + in + match r with + None -> + (* An atom contains never a linefeed character, so we can ignore + * scanner_line here. + *) + let pair = mk_pair (Atom astring) (i-i0) in + target.scanner_pos <- i; + pair + | Some mr -> + (* Found an encoded word. *) + let charset = Str.matched_group mr 1 astring in + let encoding = Str.matched_group mr 2 astring in + let content = Str.matched_group mr 3 astring in + let t = EncodedWord(String.uppercase charset, + String.uppercase encoding, + content) in + let pair = mk_pair t (i-i0) in + target.scanner_pos <- i; + pair + in + + if i < l then + let c = s.[i] in + match c with + ('\000'..'\031'|'\127'..'\255'|'"'|'('|'['|' '|'\t'|'\r'|'\n') -> + return_atom() + | _ -> + if spec.is_special.( Char.code c ) then + return_atom() + else + scan_atom i0 (i+1) + else + return_atom() + + and scan_qstring i0 i n = + if i < l then + let c = s.[i] in + match c with + '"' -> + (* Regular end of the quoted string: *) + let content, line, linepos = copy_qstring i0 (i-1) n in + let pair = mk_pair (QString content) (i-i0+2) in + target.scanner_pos <- i+1; + target.scanner_line <- line; + target.scanner_linepos <- linepos; + pair + | '\\' when not spec.opt_no_backslash_escaping -> + scan_qstring i0 (i+2) (n+1) + | _ -> + scan_qstring i0 (i+1) (n+1) + else + (* Missing right double quote *) + let content, line, linepos = copy_qstring i0 (l-1) n in + let pair = mk_pair (QString content) (l-i0+1) in + target.scanner_pos <- l; + target.scanner_line <- line; + target.scanner_linepos <- linepos; + pair + + and copy_qstring i0 i1 n = + (* Used for quoted strings and for domain literals *) + let r = String.create n in + let k = ref 0 in + let line = ref target.scanner_line in + let linepos = ref target.scanner_linepos in + for i = i0 to i1 do + let c = s.[i] in + match c with + '\\' when i < i1 && not spec.opt_no_backslash_escaping -> () + | '\n' -> + line := !line + 1; + linepos := i+1; + r.[ !k ] <- c; + incr k + | _ -> + r.[ !k ] <- c; + incr k + done; + assert (!k = n); + r, !line, !linepos + + and scan_dliteral i0 i n = + if i < l then + let c = s.[i] in + match c with + ']' -> + (* Regular end of the domain literal: *) + let content, line, linepos = copy_qstring i0 (i-1) n in + let pair = mk_pair (DomainLiteral content) (i-i0+2) in + target.scanner_pos <- i+1; + target.scanner_line <- line; + target.scanner_linepos <- linepos; + pair + | '\\' when not spec.opt_no_backslash_escaping -> + scan_dliteral i0 (i+2) (n+1) + | _ -> + (* Note: '[' is not allowed by RFC 822; we treat it here as + * a regular character (questionable) + *) + scan_dliteral i0 (i+1) (n+1) + else + (* Missing right bracket *) + let content, line, linepos = copy_qstring i0 (l-1) n in + let pair = mk_pair (DomainLiteral content) (l-i0+1) in + target.scanner_pos <- l; + target.scanner_line <- line; + target.scanner_linepos <- linepos; + pair + + + and scan_comment i level line linepos = + if i < l then + let c = s.[i] in + match c with + ')' -> + (i+1), line, linepos + | '(' -> + (* nested comment *) + let i', line', linepos' = + scan_comment (i+1) (level+1) line linepos + in + scan_comment i' level line' linepos' + | '\\' when not spec.opt_no_backslash_escaping -> + if (i+1) < l && s.[i+1] = '\n' then + scan_comment (i+2) level (line+1) (i+2) + else + scan_comment (i+2) level line linepos + | '\n' -> + scan_comment (i+1) level (line+1) (i+1) + | _ -> + scan_comment (i+1) level line linepos + else + (* Missing closing ')' *) + i, line, linepos + in + + scan target.scanner_pos +;; + + +let scan_token ((spec,target) as scn) = + (* This function handles token queueing in order to recognize white space + * that separates adjacent encoded words. + *) + + let rec collect_whitespace () = + (* Scans whitespace tokens and returns them as: + * (ws_list, other_tok) if there is some other_tok following the + * list (other_tok = End is possible) + *) + let (et, t) as pair = scan_next_token scn in + ( match t with + (Special ' '|Special '\t'|Special '\n'|Special '\r') -> + let ws_list, tok = collect_whitespace() in + pair :: ws_list, tok + | _ -> + [], pair + ) + in + + try + (* Is there an already scanned token in the queue? *) + let et = Queue.take target.scanned_tokens in + let t = et.token in + target.last_token <- t; + et, et.token + with + Queue.Empty -> + (* If not: inspect the last token. If that token is an EncodedWord, + * the next tokens are scanned in advance to determine if there + * are spaces separating two EncodedWords. These tokens are put + * into the queue such that it is avoided that they are scanned + * twice. (The sole purpose of the queue.) + *) + match target.last_token with + EncodedWord(_,_,_) as ew -> + let ws_list, tok = collect_whitespace() in + (* If tok is an EncodedWord, too, the tokens in ws_list must + * be flagged as separating two adjacent encoded words. + *) + ( match tok with + _, EncodedWord(_,_,_) -> + List.iter + (fun (et,t) -> + et.token_sep <- true) + ws_list + | _ -> + () + ); + (* Anyway, queue the read tokens but the first up *) + ( match ws_list with + [] -> + (* Nothing to queue *) + let et, t = tok in + target.last_token <- t; + tok + | (et,t) as pair :: ws_list' -> + List.iter + (fun (et',_) -> + Queue.add et' target.scanned_tokens) + ws_list'; + ( match tok with + | _, End -> + () + | (et',_) -> + Queue.add et' target.scanned_tokens + ); + (* Return the first scanned token *) + target.last_token <- t; + pair + ) + | _ -> + (* Regular case: Scan one token; do not queue it up *) + let (et, t) as pair = scan_next_token scn in + target.last_token <- t; + pair +;; + + +let scan_token_list scn = + let rec collect() = + match scan_token scn with + _, End -> + [] + | pair -> + pair :: collect() + in + collect() +;; + + +let scan_structured_value s specials options = + let rec collect scn = + match scan_token scn with + _, End -> + [] + | _, t -> + t :: collect scn + in + let scn = create_mime_scanner specials options s in + collect scn +;; + + +let specials_rfc822 = + [ '<'; '>'; '@'; ','; ';'; ':'; '\\'; '.' ];; + + +let specials_rfc2045 = + [ '<'; '>'; '@'; ','; ';'; ':'; '\\'; '/' ];; + + +let scan_encoded_text_value s = + let specials = [ ' '; '\t'; '\r'; '\n'; '('; '['; '"' ] in + let options = [ Recognize_encoded_words ] in + let scn = create_mime_scanner specials options s in + + let rec collect () = + match scan_token scn with + _, End -> + [] + | et, _ when separates_adjacent_encoded_words et -> + collect() + | et, (Special _|Atom _|EncodedWord(_,_,_)) -> + et :: collect () + | _, _ -> + assert false + in + collect() +;; + + +let scan_value_with_parameters s options = + let rec parse_params tl = + match tl with + Atom n :: Special '=' :: Atom v :: tl' -> + (n,v) :: parse_rest tl' + | Atom n :: Special '=' :: QString v :: tl' -> + (n,v) :: parse_rest tl' + | _ -> + failwith "Mimestring.scan_value_with_parameters" + and parse_rest tl = + match tl with + [] -> [] + | Special ';' :: tl' -> + parse_params tl' + | _ -> + failwith "Mimestring.scan_value_with_parameters" + in + + (* Note: Even if not used here, the comma is a very common separator + * and should be recognized as being special. You will get a + * failure if there is a comma in the scanned string. + *) + let tl = scan_structured_value s [ ';'; '='; ',' ] options in + match tl with + [ Atom n ] -> n, [] + | [ QString n ] -> n, [] + | Atom n :: Special ';' :: tl' -> + n, parse_params tl' + | QString n :: Special ';' :: tl' -> + n, parse_params tl' + | _ -> + failwith "Mimestring.scan_value_with_parameters" +;; + + +let scan_mime_type s options = + let n, params = scan_value_with_parameters s options in + (String.lowercase n), + (List.map (fun (n,v) -> (String.lowercase n, v)) params) +;; + + +let lf_re = Str.regexp "[\n]";; + +let scan_multipart_body s ~start_pos:i0 ~end_pos:i1 ~boundary = + let l_s = String.length s in + if i0 < 0 or i1 < 0 or i0 > l_s or i1 >l_s then + invalid_arg "Mimestring.scan_multipart_body"; + + (* First compile the regexps scanning for 'boundary': *) + let boundary1_re = + Str.regexp ("\n--" ^ Str.quote boundary) in + let boundary2_re = + Str.regexp ("--" ^ Str.quote boundary) in + + let rec parse i = + (* i: Beginning of the current part (position directly after the + * boundary line + *) + (* Search for next boundary at position i *) + let i' = + try min (fst (Str.search_forward boundary1_re s i) + 1) i1 + with + Not_found -> i1 + in + (* i': Either the position of the first '-' of the boundary line, + * or i1 if no boundary has been found + *) + if i' >= i1 then + [] (* Ignore everything after the last boundary *) + else + let i'' = + try min (fst (Str.search_forward lf_re s i') + 1) i1 + with + Not_found -> i1 + in + (* i'': The position after the boundary line *) +(* + print_int i; print_newline(); + print_int i'; print_newline(); + print_int i''; print_newline(); + flush stdout; +*) + let header, k = scan_header s i i' in + (* header: the header of the part + * k: beginning of the body + *) + + let value = + (* We know that i'-1 is a linefeed character. i'-2 should be a CR + * character. Both characters are not part of the value. + *) + if i' >= 2 then + match s.[i'-2] with + '\013' -> String.sub s k (i'-2-k) + | _ -> String.sub s k (i'-1-k) + else + String.sub s k (i'-1-k) + in + + let pair = + (header, value) in + + if i'' >= i1 + then + [ pair ] + else + pair :: parse i'' + in + + (* Find the first boundary. This is a special case, because it may be + * right at the beginning of the string (no preceding CRLF) + *) + + let i_bnd = + if Str.string_partial_match boundary2_re s i0 <> None then + i0 + else + try min (fst (Str.search_forward boundary1_re s i0)) i1 + with + Not_found -> i1 + in + + if i_bnd >= i1 then + [] + else + let i_bnd' = + try min (fst (Str.search_forward lf_re s (i_bnd + 1)) + 1) i1 + with + Not_found -> i1 + in + if i_bnd' >= i1 then + [] + else + parse i_bnd' +;; + + +let scan_multipart_body_and_decode s ~start_pos:i0 ~end_pos:i1 ~boundary = + let parts = scan_multipart_body s i0 i1 boundary in + List.map + (fun (params, value) -> + let encoding = + try List.assoc "content-transfer-encoding" params + with Not_found -> "7bit" + in + + (* NOTE: In the case of "base64" and "quoted-printable", the allocation + * of the string "value" could be avoided. + *) + + let value' = + match encoding with + ("7bit"|"8bit"|"binary") -> value + | "base64" -> + Netencoding.Base64.decode_substring + value 0 (String.length value) false true + | "quoted-printable" -> + Netencoding.QuotedPrintable.decode_substring + value 0 (String.length value) + | _ -> + failwith "Mimestring.scan_multipart_body_and_decode: Unknown content-transfer-encoding" + in + (params, value') + ) + parts +;; + + +let scan_multipart_body_from_netstream s ~boundary ~create ~add ~stop = + + (* The block size of s must be at least the length of the boundary + 3. + * Otherwise it is not guaranteed that the boundary is always recognized. + *) + if Netstream.block_size s < String.length boundary + 3 then + invalid_arg "Mimestring.scan_multipart_body_from_netstream"; + + (* First compile the regexps scanning for 'boundary': *) + let boundary1_re = + Str.regexp ("\n--" ^ Str.quote boundary) in + let boundary2_re = + Str.regexp ("--" ^ Str.quote boundary) in + + (* Subtask 1: Search the end of the MIME header: CR LF CR LF + * (or LF LF). Enlarge the window until the complete header + * is covered by the window. + *) + let rec search_end_of_header k = + (* Search the end of the header beginning at position k of the + * current window. + * Return the position of the first character of the body. + *) + try + (* Search for LF CR? LF: *) + let i, r = Str.search_forward + end_of_header_re + (Netbuffer.unsafe_buffer (Netstream.window s)) + k + in + (* If match_end <= window_length, the search was successful. + * Otherwise, we searched in the uninitialized region of the + * buffer. + *) + if Str.match_end r <= Netstream.window_length s then + Str.match_end r + else + raise Not_found + with + Not_found -> + (* If the end of the stream is reached, the end of the header + * is missing: Error. + * Otherwise, we try to read another block, and continue. + *) + if Netstream.at_eos s then + failwith "Mimestring.scan_multipart_body_from_netstream: Unexpected end of stream"; + let w0 = Netstream.window_length s in + Netstream.want_another_block s; + search_end_of_header (max (w0 - 2) 0) + in + + (* Subtask 2: Search the first boundary line. *) + let rec search_first_boundary() = + (* Search boundary per regexp; return the position of the character + * immediately following the boundary (on the same line), or + * raise Not_found. + *) + try + (* Search boundary per regexp: *) + let i, r = Str.search_forward + boundary1_re + (Netbuffer.unsafe_buffer (Netstream.window s)) + 0 + in + (* If match_end <= window_length, the search was successful. + * Otherwise, we searched in the uninitialized region of the + * buffer. + *) + if Str.match_end r <= Netstream.window_length s then begin + Str.match_end r + end + else raise Not_found + with + Not_found -> + if Netstream.at_eos s then raise Not_found; + (* The regexp did not match: Move the window by one block. + *) + let n = + min + (Netstream.window_length s) + (Netstream.block_size s) + in + Netstream.move s n; + search_first_boundary() + in + + (* Subtask 3: Search the next boundary line. Invoke 'add' for every + * read chunk + *) + let rec search_next_boundary p = + (* Returns the position directly after the boundary on the same line *) + try + (* Search boundary per regexp: *) + let i,r = Str.search_forward + boundary1_re + (Netbuffer.unsafe_buffer (Netstream.window s)) + 0 + in + (* If match_end <= window_length, the search was successful. + * Otherwise, we searched in the uninitialized region of the + * buffer. + *) + if Str.match_end r <= Netstream.window_length s then begin + (* Add the last chunk of the part. *) + let n = + (* i is a LF. i - 1 should be CR. Ignore these characters. *) + if i >= 1 then + match (Netbuffer.unsafe_buffer (Netstream.window s)).[ i - 1 ] with + '\013' -> i - 1 + | _ -> i + else + i + in + (* Printf.printf "add n=%d\n" n; *) + add p s 0 n; + Str.match_end r + end + else raise Not_found + with + Not_found -> + if Netstream.at_eos s then + failwith "Mimestring.scan_multipart_body_from_netstream: next MIME boundary not found"; + (* The regexp did not match: Add the first block of the window; + * and move the window. + *) + let n = + min + (Netstream.window_length s) + (Netstream.block_size s) + in + (* Printf.printf "add n=%d\n" n; *) + add p s 0 n; + Netstream.move s n; + search_next_boundary p + in + + (* Subtask 4: Search the end of the boundary line *) + let rec search_end_of_line k = + (* Search LF beginning at position k. Discard any contents until that. *) + try + let i,r = Str.search_forward + lf_re + (Netbuffer.unsafe_buffer (Netstream.window s)) + k + in + (* If match_end <= window_length, the search was successful. + * Otherwise, we searched in the uninitialized region of the + * buffer. + *) + if Str.match_end r <= Netstream.window_length s then begin + Str.match_end r + end + else raise Not_found + with + Not_found -> + if Netstream.at_eos s then + failwith "Mimestring.scan_multipart_body_from_netstream: MIME boundary without line end"; + (* The regexp did not match: move the window. + *) + let n = Netstream.window_length s in + Netstream.move s n; + search_end_of_line 0 + in + + (* Subtask 5: Check whether "--" follows the boundary on the same line *) + let check_whether_last_boundary k = + (* k: The position directly after the boundary. *) + Netstream.want s (k+2); + let str = Netbuffer.unsafe_buffer (Netstream.window s) in + (Netstream.window_length s >= k+2) && str.[k] = '-' && str.[k+1] = '-' + in + + (* Subtask 6: Check whether the buffer begins with a boundary. *) + let check_beginning_is_boundary () = + let m = String.length boundary + 2 in + Netstream.want s m; + let str = Netbuffer.unsafe_buffer (Netstream.window s) in + (Netstream.window_length s >= m) && + (Str.string_partial_match boundary2_re str 0 <> None) + in + + let rec parse_part () = + (* The first byte of the current window of s contains the character + * directly following the boundary line that starts this part. + *) + (* Search the end of the MIME header: *) + let k_eoh = search_end_of_header 0 in + (* Printf.printf "k_eoh=%d\n" k_eoh; *) + (* Get the MIME header: *) + let str = Netbuffer.unsafe_buffer (Netstream.window s) in + let header, k_eoh' = scan_header str 0 k_eoh in + assert (k_eoh = k_eoh'); + (* Move the window over the header: *) + Netstream.move s k_eoh; + (* Create the part: *) + let p = create header in + let continue = + begin try + (* Search the next boundary; add the chunks while searching: *) + let k_eob = search_next_boundary p in + (* Printf.printf "k_eob=%d\n" k_eob; *) + (* Is this the last boundary? *) + if check_whether_last_boundary k_eob then begin + (* Skip the rest: *) + while not (Netstream.at_eos s) do + Netstream.move s (Netstream.window_length s) + done; + Netstream.move s (Netstream.window_length s); + false + end + else begin + (* Move to the beginning of the next line: *) + let k_eol = search_end_of_line k_eob in + Netstream.move s k_eol; + true + end + with + any -> + (try stop p with _ -> ()); + raise any + end in + stop p; + if continue then + (* Continue with next part: *) + parse_part() + in + + (* Check whether s directly begins with a boundary: *) + if check_beginning_is_boundary() then begin + (* Move to the beginning of the next line: *) + let k_eol = search_end_of_line 0 in + Netstream.move s k_eol; + (* Begin with first part: *) + parse_part() + end + else begin + (* Search the first boundary: *) + try + let k_eob = search_first_boundary() in + (* Printf.printf "k_eob=%d\n" k_eob; *) + (* Move to the beginning of the next line: *) + let k_eol = search_end_of_line k_eob in + (* Printf.printf "k_eol=%d\n" k_eol; *) + Netstream.move s k_eol; + (* Begin with first part: *) + parse_part() + with + Not_found -> + (* No boundary at all: The body is empty. *) + () + end; +;; + + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:27 lpadovan + * Initial revision + * + * Revision 1.8 2000/08/13 00:04:36 gerd + * Encoded_word -> EncodedWord + * Bugfixes. + * + * Revision 1.7 2000/08/07 00:25:14 gerd + * Implemented the new functions for structured field lexing. + * + * Revision 1.6 2000/06/25 22:34:43 gerd + * Added labels to arguments. + * + * Revision 1.5 2000/06/25 21:15:48 gerd + * Checked thread-safety. + * + * Revision 1.4 2000/05/16 22:30:14 gerd + * Added support for some types of malformed MIME messages. + * + * Revision 1.3 2000/04/15 13:09:01 gerd + * Implemented uploads to temporary files. + * + * Revision 1.2 2000/03/02 01:15:30 gerd + * Updated. + * + * Revision 1.1 2000/02/25 15:21:12 gerd + * Initial revision. + * + * + *)