X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2FDEVEL%2Fpxp%2Fnetstring%2Fmimestring.ml;fp=helm%2FDEVEL%2Fpxp%2Fnetstring%2Fmimestring.ml;h=0000000000000000000000000000000000000000;hb=c7514aaa249a96c5fdd39b1123fbdb38d92f20b6;hp=8fc4bfcbe15ee2cb21189d895af063ac84dd5690;hpb=1c7fb836e2af4f2f3d18afd0396701f2094265ff;p=helm.git diff --git a/helm/DEVEL/pxp/netstring/mimestring.ml b/helm/DEVEL/pxp/netstring/mimestring.ml deleted file mode 100644 index 8fc4bfcbe..000000000 --- a/helm/DEVEL/pxp/netstring/mimestring.ml +++ /dev/null @@ -1,1035 +0,0 @@ -(* $Id$ - * ---------------------------------------------------------------------- - * - *) - -module Str = Netstring_str;; - -let cr_or_lf_re = Str.regexp "[\013\n]";; - -let trim_right_spaces_re = - Str.regexp "[ \t]+$";; - -let trim_left_spaces_re = - Str.regexp "^[ \t]+";; - -let header_re = - Str.regexp "\\([^ \t\r\n:]+\\):\\([ \t]*.*\n\\([ \t].*\n\\)*\\)";; - -let empty_line_re = - Str.regexp "\013?\n";; - -let end_of_header_re = - Str.regexp "\n\013?\n";; - - -let scan_header ?(unfold=true) parstr ~start_pos:i0 ~end_pos:i1 = - let rec parse_header i l = - match Str.string_partial_match header_re parstr i with - Some r -> - let i' = Str.match_end r in - if i' > i1 then - failwith "Mimestring.scan_header"; - let name = String.lowercase(Str.matched_group r 1 parstr) in - let value_with_crlf = - Str.matched_group r 2 parstr in - let value = - if unfold then begin - let value_with_rspaces = - Str.global_replace cr_or_lf_re "" value_with_crlf in - let value_with_lspaces = - Str.global_replace trim_right_spaces_re "" value_with_rspaces in - Str.global_replace trim_left_spaces_re "" value_with_lspaces - end - else value_with_crlf - in - parse_header i' ( (name,value) :: l) - | None -> - (* The header must end with an empty line *) - begin match Str.string_partial_match empty_line_re parstr i with - Some r' -> - List.rev l, Str.match_end r' - | None -> - failwith "Mimestring.scan_header" - end - in - parse_header i0 [] -;; - -type s_token = - Atom of string - | EncodedWord of (string * string * string) - | QString of string - | Control of char - | Special of char - | DomainLiteral of string - | Comment - | End -;; - -type s_option = - No_backslash_escaping - | Return_comments - | Recognize_encoded_words -;; - -type s_extended_token = - { token : s_token; - token_pos : int; - token_line : int; - token_linepos : int; (* Position of the beginning of the line *) - token_len : int; - mutable token_sep : bool; (* separates adjacent encoded words *) - } -;; - -let get_token et = et.token;; -let get_pos et = et.token_pos;; -let get_line et = et.token_line;; -let get_column et = et.token_pos - et.token_linepos;; -let get_length et = et.token_len;; -let separates_adjacent_encoded_words et = et.token_sep;; - -let get_decoded_word et = - match et.token with - Atom s -> s - | QString s -> s - | Control c -> String.make 1 c - | Special c -> String.make 1 c - | DomainLiteral s -> s - | Comment -> "" - | EncodedWord (_, encoding, content) -> - ( match encoding with - ("Q"|"q") -> - Netencoding.Q.decode content - | ("B"|"b") -> - Netencoding.Base64.decode - ~url_variant:false - ~accept_spaces:false - content - | _ -> failwith "get_decoded_word" - ) - | End -> - failwith "get_decoded_word" -;; - -let get_charset et = - match et.token with - EncodedWord (charset, _, _) -> charset - | End -> failwith "get_charset" - | _ -> "US-ASCII" -;; - -type scanner_spec = - { (* What the user specifies: *) - scanner_specials : char list; - scanner_options : s_option list; - (* Derived from that: *) - mutable opt_no_backslash_escaping : bool; - mutable opt_return_comments : bool; - mutable opt_recognize_encoded_words : bool; - - mutable is_special : bool array; - mutable space_is_special : bool; - } -;; - -type scanner_target = - { scanned_string : string; - mutable scanner_pos : int; - mutable scanner_line : int; - mutable scanner_linepos : int; - (* Position of the beginning of the line *) - mutable scanned_tokens : s_extended_token Queue.t; - (* A queue of already scanned tokens in order to look ahead *) - mutable last_token : s_token; - (* The last returned token. It is only important whether it is - * EncodedWord or not. - *) - } -;; - -type mime_scanner = scanner_spec * scanner_target -;; - -let get_pos_of_scanner (spec, target) = - if spec.opt_recognize_encoded_words then - failwith "get_pos_of_scanner" - else - target.scanner_pos -;; - -let get_line_of_scanner (spec, target) = - if spec.opt_recognize_encoded_words then - failwith "get_line_of_scanner" - else - target.scanner_line -;; - -let get_column_of_scanner (spec, target) = - if spec.opt_recognize_encoded_words then - failwith "get_column_of_scanner" - else - target.scanner_pos - target.scanner_linepos -;; - -let create_mime_scanner ~specials ~scan_options = - let is_spcl = Array.create 256 false in - List.iter - (fun c -> is_spcl.( Char.code c ) <- true) - specials; - let spec = - { scanner_specials = specials; - scanner_options = scan_options; - opt_no_backslash_escaping = - List.mem No_backslash_escaping scan_options; - opt_return_comments = - List.mem Return_comments scan_options; - opt_recognize_encoded_words = - List.mem Recognize_encoded_words scan_options; - is_special = is_spcl; - space_is_special = is_spcl.(32); - } - in - (* Grab the remaining arguments: *) - fun ?(pos=0) ?(line=1) ?(column=0) s -> - let target = - { scanned_string = s; - scanner_pos = pos; - scanner_line = line; - scanner_linepos = pos - column; - scanned_tokens = Queue.create(); - last_token = Comment; (* Must not be initialized with EncodedWord *) - } - in - spec, target -;; - - -let encoded_word_re = - Str.regexp "=\\?\\([^?]+\\)\\?\\([^?]+\\)\\?\\([^?]+\\)\\?=";; - -let scan_next_token ((spec,target) as scn) = - let mk_pair t len = - { token = t; - token_pos = target.scanner_pos; - token_line = target.scanner_line; - token_linepos = target.scanner_linepos; - token_len = len; - token_sep = false; - }, - t - in - - (* Note: mk_pair creates a new token pair, and it assumes that - * target.scanner_pos (and also scanner_line and scanner_linepos) - * still contain the position of the beginning of the token. - *) - - let s = target.scanned_string in - let l = String.length s in - let rec scan i = - if i < l then begin - let c = s.[i] in - if spec.is_special.( Char.code c ) then begin - let pair = mk_pair (Special c) 1 in - target.scanner_pos <- target.scanner_pos + 1; - (match c with - '\n' -> - target.scanner_line <- target.scanner_line + 1; - target.scanner_linepos <- target.scanner_pos; - | _ -> () - ); - pair - end - else - match c with - '"' -> - (* Quoted string: *) - scan_qstring (i+1) (i+1) 0 - | '(' -> - (* Comment: *) - let i', line, linepos = - scan_comment (i+1) 0 target.scanner_line target.scanner_linepos - in - let advance() = - target.scanner_pos <- i'; - target.scanner_line <- line; - target.scanner_linepos <- linepos - in - if spec.opt_return_comments then begin - let pair = mk_pair Comment (i' - i) in - advance(); - pair - end - else - if spec.space_is_special then begin - let pair = mk_pair (Special ' ') (i' - i) in - advance(); - pair - end - else begin - advance(); - scan i' - end - | (' '|'\t'|'\r') -> - (* Ignore whitespace by default: *) - target.scanner_pos <- target.scanner_pos + 1; - scan (i+1) - | '\n' -> - (* Ignore whitespace by default: *) - target.scanner_pos <- target.scanner_pos + 1; - target.scanner_line <- target.scanner_line + 1; - target.scanner_linepos <- target.scanner_pos; - scan (i+1) - | ('\000'..'\031'|'\127'..'\255') -> - let pair = mk_pair (Control c) 1 in - target.scanner_pos <- target.scanner_pos + 1; - pair - | '[' -> - (* Domain literal: *) - scan_dliteral (i+1) (i+1) 0 - | _ -> - scan_atom i i - end - else - mk_pair End 0 - - and scan_atom i0 i = - let return_atom() = - let astring = String.sub s i0 (i-i0) in - let r = - if spec.opt_recognize_encoded_words then - Str.string_match ~groups:4 encoded_word_re astring 0 - else - None - in - match r with - None -> - (* An atom contains never a linefeed character, so we can ignore - * scanner_line here. - *) - let pair = mk_pair (Atom astring) (i-i0) in - target.scanner_pos <- i; - pair - | Some mr -> - (* Found an encoded word. *) - let charset = Str.matched_group mr 1 astring in - let encoding = Str.matched_group mr 2 astring in - let content = Str.matched_group mr 3 astring in - let t = EncodedWord(String.uppercase charset, - String.uppercase encoding, - content) in - let pair = mk_pair t (i-i0) in - target.scanner_pos <- i; - pair - in - - if i < l then - let c = s.[i] in - match c with - ('\000'..'\031'|'\127'..'\255'|'"'|'('|'['|' '|'\t'|'\r'|'\n') -> - return_atom() - | _ -> - if spec.is_special.( Char.code c ) then - return_atom() - else - scan_atom i0 (i+1) - else - return_atom() - - and scan_qstring i0 i n = - if i < l then - let c = s.[i] in - match c with - '"' -> - (* Regular end of the quoted string: *) - let content, line, linepos = copy_qstring i0 (i-1) n in - let pair = mk_pair (QString content) (i-i0+2) in - target.scanner_pos <- i+1; - target.scanner_line <- line; - target.scanner_linepos <- linepos; - pair - | '\\' when not spec.opt_no_backslash_escaping -> - scan_qstring i0 (i+2) (n+1) - | _ -> - scan_qstring i0 (i+1) (n+1) - else - (* Missing right double quote *) - let content, line, linepos = copy_qstring i0 (l-1) n in - let pair = mk_pair (QString content) (l-i0+1) in - target.scanner_pos <- l; - target.scanner_line <- line; - target.scanner_linepos <- linepos; - pair - - and copy_qstring i0 i1 n = - (* Used for quoted strings and for domain literals *) - let r = String.create n in - let k = ref 0 in - let line = ref target.scanner_line in - let linepos = ref target.scanner_linepos in - for i = i0 to i1 do - let c = s.[i] in - match c with - '\\' when i < i1 && not spec.opt_no_backslash_escaping -> () - | '\n' -> - line := !line + 1; - linepos := i+1; - r.[ !k ] <- c; - incr k - | _ -> - r.[ !k ] <- c; - incr k - done; - assert (!k = n); - r, !line, !linepos - - and scan_dliteral i0 i n = - if i < l then - let c = s.[i] in - match c with - ']' -> - (* Regular end of the domain literal: *) - let content, line, linepos = copy_qstring i0 (i-1) n in - let pair = mk_pair (DomainLiteral content) (i-i0+2) in - target.scanner_pos <- i+1; - target.scanner_line <- line; - target.scanner_linepos <- linepos; - pair - | '\\' when not spec.opt_no_backslash_escaping -> - scan_dliteral i0 (i+2) (n+1) - | _ -> - (* Note: '[' is not allowed by RFC 822; we treat it here as - * a regular character (questionable) - *) - scan_dliteral i0 (i+1) (n+1) - else - (* Missing right bracket *) - let content, line, linepos = copy_qstring i0 (l-1) n in - let pair = mk_pair (DomainLiteral content) (l-i0+1) in - target.scanner_pos <- l; - target.scanner_line <- line; - target.scanner_linepos <- linepos; - pair - - - and scan_comment i level line linepos = - if i < l then - let c = s.[i] in - match c with - ')' -> - (i+1), line, linepos - | '(' -> - (* nested comment *) - let i', line', linepos' = - scan_comment (i+1) (level+1) line linepos - in - scan_comment i' level line' linepos' - | '\\' when not spec.opt_no_backslash_escaping -> - if (i+1) < l && s.[i+1] = '\n' then - scan_comment (i+2) level (line+1) (i+2) - else - scan_comment (i+2) level line linepos - | '\n' -> - scan_comment (i+1) level (line+1) (i+1) - | _ -> - scan_comment (i+1) level line linepos - else - (* Missing closing ')' *) - i, line, linepos - in - - scan target.scanner_pos -;; - - -let scan_token ((spec,target) as scn) = - (* This function handles token queueing in order to recognize white space - * that separates adjacent encoded words. - *) - - let rec collect_whitespace () = - (* Scans whitespace tokens and returns them as: - * (ws_list, other_tok) if there is some other_tok following the - * list (other_tok = End is possible) - *) - let (et, t) as pair = scan_next_token scn in - ( match t with - (Special ' '|Special '\t'|Special '\n'|Special '\r') -> - let ws_list, tok = collect_whitespace() in - pair :: ws_list, tok - | _ -> - [], pair - ) - in - - try - (* Is there an already scanned token in the queue? *) - let et = Queue.take target.scanned_tokens in - let t = et.token in - target.last_token <- t; - et, et.token - with - Queue.Empty -> - (* If not: inspect the last token. If that token is an EncodedWord, - * the next tokens are scanned in advance to determine if there - * are spaces separating two EncodedWords. These tokens are put - * into the queue such that it is avoided that they are scanned - * twice. (The sole purpose of the queue.) - *) - match target.last_token with - EncodedWord(_,_,_) as ew -> - let ws_list, tok = collect_whitespace() in - (* If tok is an EncodedWord, too, the tokens in ws_list must - * be flagged as separating two adjacent encoded words. - *) - ( match tok with - _, EncodedWord(_,_,_) -> - List.iter - (fun (et,t) -> - et.token_sep <- true) - ws_list - | _ -> - () - ); - (* Anyway, queue the read tokens but the first up *) - ( match ws_list with - [] -> - (* Nothing to queue *) - let et, t = tok in - target.last_token <- t; - tok - | (et,t) as pair :: ws_list' -> - List.iter - (fun (et',_) -> - Queue.add et' target.scanned_tokens) - ws_list'; - ( match tok with - | _, End -> - () - | (et',_) -> - Queue.add et' target.scanned_tokens - ); - (* Return the first scanned token *) - target.last_token <- t; - pair - ) - | _ -> - (* Regular case: Scan one token; do not queue it up *) - let (et, t) as pair = scan_next_token scn in - target.last_token <- t; - pair -;; - - -let scan_token_list scn = - let rec collect() = - match scan_token scn with - _, End -> - [] - | pair -> - pair :: collect() - in - collect() -;; - - -let scan_structured_value s specials options = - let rec collect scn = - match scan_token scn with - _, End -> - [] - | _, t -> - t :: collect scn - in - let scn = create_mime_scanner specials options s in - collect scn -;; - - -let specials_rfc822 = - [ '<'; '>'; '@'; ','; ';'; ':'; '\\'; '.' ];; - - -let specials_rfc2045 = - [ '<'; '>'; '@'; ','; ';'; ':'; '\\'; '/' ];; - - -let scan_encoded_text_value s = - let specials = [ ' '; '\t'; '\r'; '\n'; '('; '['; '"' ] in - let options = [ Recognize_encoded_words ] in - let scn = create_mime_scanner specials options s in - - let rec collect () = - match scan_token scn with - _, End -> - [] - | et, _ when separates_adjacent_encoded_words et -> - collect() - | et, (Special _|Atom _|EncodedWord(_,_,_)) -> - et :: collect () - | _, _ -> - assert false - in - collect() -;; - - -let scan_value_with_parameters s options = - let rec parse_params tl = - match tl with - Atom n :: Special '=' :: Atom v :: tl' -> - (n,v) :: parse_rest tl' - | Atom n :: Special '=' :: QString v :: tl' -> - (n,v) :: parse_rest tl' - | _ -> - failwith "Mimestring.scan_value_with_parameters" - and parse_rest tl = - match tl with - [] -> [] - | Special ';' :: tl' -> - parse_params tl' - | _ -> - failwith "Mimestring.scan_value_with_parameters" - in - - (* Note: Even if not used here, the comma is a very common separator - * and should be recognized as being special. You will get a - * failure if there is a comma in the scanned string. - *) - let tl = scan_structured_value s [ ';'; '='; ',' ] options in - match tl with - [ Atom n ] -> n, [] - | [ QString n ] -> n, [] - | Atom n :: Special ';' :: tl' -> - n, parse_params tl' - | QString n :: Special ';' :: tl' -> - n, parse_params tl' - | _ -> - failwith "Mimestring.scan_value_with_parameters" -;; - - -let scan_mime_type s options = - let n, params = scan_value_with_parameters s options in - (String.lowercase n), - (List.map (fun (n,v) -> (String.lowercase n, v)) params) -;; - - -let lf_re = Str.regexp "[\n]";; - -let scan_multipart_body s ~start_pos:i0 ~end_pos:i1 ~boundary = - let l_s = String.length s in - if i0 < 0 or i1 < 0 or i0 > l_s or i1 >l_s then - invalid_arg "Mimestring.scan_multipart_body"; - - (* First compile the regexps scanning for 'boundary': *) - let boundary1_re = - Str.regexp ("\n--" ^ Str.quote boundary) in - let boundary2_re = - Str.regexp ("--" ^ Str.quote boundary) in - - let rec parse i = - (* i: Beginning of the current part (position directly after the - * boundary line - *) - (* Search for next boundary at position i *) - let i' = - try min (fst (Str.search_forward boundary1_re s i) + 1) i1 - with - Not_found -> i1 - in - (* i': Either the position of the first '-' of the boundary line, - * or i1 if no boundary has been found - *) - if i' >= i1 then - [] (* Ignore everything after the last boundary *) - else - let i'' = - try min (fst (Str.search_forward lf_re s i') + 1) i1 - with - Not_found -> i1 - in - (* i'': The position after the boundary line *) -(* - print_int i; print_newline(); - print_int i'; print_newline(); - print_int i''; print_newline(); - flush stdout; -*) - let header, k = scan_header s i i' in - (* header: the header of the part - * k: beginning of the body - *) - - let value = - (* We know that i'-1 is a linefeed character. i'-2 should be a CR - * character. Both characters are not part of the value. - *) - if i' >= 2 then - match s.[i'-2] with - '\013' -> String.sub s k (i'-2-k) - | _ -> String.sub s k (i'-1-k) - else - String.sub s k (i'-1-k) - in - - let pair = - (header, value) in - - if i'' >= i1 - then - [ pair ] - else - pair :: parse i'' - in - - (* Find the first boundary. This is a special case, because it may be - * right at the beginning of the string (no preceding CRLF) - *) - - let i_bnd = - if Str.string_partial_match boundary2_re s i0 <> None then - i0 - else - try min (fst (Str.search_forward boundary1_re s i0)) i1 - with - Not_found -> i1 - in - - if i_bnd >= i1 then - [] - else - let i_bnd' = - try min (fst (Str.search_forward lf_re s (i_bnd + 1)) + 1) i1 - with - Not_found -> i1 - in - if i_bnd' >= i1 then - [] - else - parse i_bnd' -;; - - -let scan_multipart_body_and_decode s ~start_pos:i0 ~end_pos:i1 ~boundary = - let parts = scan_multipart_body s i0 i1 boundary in - List.map - (fun (params, value) -> - let encoding = - try List.assoc "content-transfer-encoding" params - with Not_found -> "7bit" - in - - (* NOTE: In the case of "base64" and "quoted-printable", the allocation - * of the string "value" could be avoided. - *) - - let value' = - match encoding with - ("7bit"|"8bit"|"binary") -> value - | "base64" -> - Netencoding.Base64.decode_substring - value 0 (String.length value) false true - | "quoted-printable" -> - Netencoding.QuotedPrintable.decode_substring - value 0 (String.length value) - | _ -> - failwith "Mimestring.scan_multipart_body_and_decode: Unknown content-transfer-encoding" - in - (params, value') - ) - parts -;; - - -let scan_multipart_body_from_netstream s ~boundary ~create ~add ~stop = - - (* The block size of s must be at least the length of the boundary + 3. - * Otherwise it is not guaranteed that the boundary is always recognized. - *) - if Netstream.block_size s < String.length boundary + 3 then - invalid_arg "Mimestring.scan_multipart_body_from_netstream"; - - (* First compile the regexps scanning for 'boundary': *) - let boundary1_re = - Str.regexp ("\n--" ^ Str.quote boundary) in - let boundary2_re = - Str.regexp ("--" ^ Str.quote boundary) in - - (* Subtask 1: Search the end of the MIME header: CR LF CR LF - * (or LF LF). Enlarge the window until the complete header - * is covered by the window. - *) - let rec search_end_of_header k = - (* Search the end of the header beginning at position k of the - * current window. - * Return the position of the first character of the body. - *) - try - (* Search for LF CR? LF: *) - let i, r = Str.search_forward - end_of_header_re - (Netbuffer.unsafe_buffer (Netstream.window s)) - k - in - (* If match_end <= window_length, the search was successful. - * Otherwise, we searched in the uninitialized region of the - * buffer. - *) - if Str.match_end r <= Netstream.window_length s then - Str.match_end r - else - raise Not_found - with - Not_found -> - (* If the end of the stream is reached, the end of the header - * is missing: Error. - * Otherwise, we try to read another block, and continue. - *) - if Netstream.at_eos s then - failwith "Mimestring.scan_multipart_body_from_netstream: Unexpected end of stream"; - let w0 = Netstream.window_length s in - Netstream.want_another_block s; - search_end_of_header (max (w0 - 2) 0) - in - - (* Subtask 2: Search the first boundary line. *) - let rec search_first_boundary() = - (* Search boundary per regexp; return the position of the character - * immediately following the boundary (on the same line), or - * raise Not_found. - *) - try - (* Search boundary per regexp: *) - let i, r = Str.search_forward - boundary1_re - (Netbuffer.unsafe_buffer (Netstream.window s)) - 0 - in - (* If match_end <= window_length, the search was successful. - * Otherwise, we searched in the uninitialized region of the - * buffer. - *) - if Str.match_end r <= Netstream.window_length s then begin - Str.match_end r - end - else raise Not_found - with - Not_found -> - if Netstream.at_eos s then raise Not_found; - (* The regexp did not match: Move the window by one block. - *) - let n = - min - (Netstream.window_length s) - (Netstream.block_size s) - in - Netstream.move s n; - search_first_boundary() - in - - (* Subtask 3: Search the next boundary line. Invoke 'add' for every - * read chunk - *) - let rec search_next_boundary p = - (* Returns the position directly after the boundary on the same line *) - try - (* Search boundary per regexp: *) - let i,r = Str.search_forward - boundary1_re - (Netbuffer.unsafe_buffer (Netstream.window s)) - 0 - in - (* If match_end <= window_length, the search was successful. - * Otherwise, we searched in the uninitialized region of the - * buffer. - *) - if Str.match_end r <= Netstream.window_length s then begin - (* Add the last chunk of the part. *) - let n = - (* i is a LF. i - 1 should be CR. Ignore these characters. *) - if i >= 1 then - match (Netbuffer.unsafe_buffer (Netstream.window s)).[ i - 1 ] with - '\013' -> i - 1 - | _ -> i - else - i - in - (* Printf.printf "add n=%d\n" n; *) - add p s 0 n; - Str.match_end r - end - else raise Not_found - with - Not_found -> - if Netstream.at_eos s then - failwith "Mimestring.scan_multipart_body_from_netstream: next MIME boundary not found"; - (* The regexp did not match: Add the first block of the window; - * and move the window. - *) - let n = - min - (Netstream.window_length s) - (Netstream.block_size s) - in - (* Printf.printf "add n=%d\n" n; *) - add p s 0 n; - Netstream.move s n; - search_next_boundary p - in - - (* Subtask 4: Search the end of the boundary line *) - let rec search_end_of_line k = - (* Search LF beginning at position k. Discard any contents until that. *) - try - let i,r = Str.search_forward - lf_re - (Netbuffer.unsafe_buffer (Netstream.window s)) - k - in - (* If match_end <= window_length, the search was successful. - * Otherwise, we searched in the uninitialized region of the - * buffer. - *) - if Str.match_end r <= Netstream.window_length s then begin - Str.match_end r - end - else raise Not_found - with - Not_found -> - if Netstream.at_eos s then - failwith "Mimestring.scan_multipart_body_from_netstream: MIME boundary without line end"; - (* The regexp did not match: move the window. - *) - let n = Netstream.window_length s in - Netstream.move s n; - search_end_of_line 0 - in - - (* Subtask 5: Check whether "--" follows the boundary on the same line *) - let check_whether_last_boundary k = - (* k: The position directly after the boundary. *) - Netstream.want s (k+2); - let str = Netbuffer.unsafe_buffer (Netstream.window s) in - (Netstream.window_length s >= k+2) && str.[k] = '-' && str.[k+1] = '-' - in - - (* Subtask 6: Check whether the buffer begins with a boundary. *) - let check_beginning_is_boundary () = - let m = String.length boundary + 2 in - Netstream.want s m; - let str = Netbuffer.unsafe_buffer (Netstream.window s) in - (Netstream.window_length s >= m) && - (Str.string_partial_match boundary2_re str 0 <> None) - in - - let rec parse_part () = - (* The first byte of the current window of s contains the character - * directly following the boundary line that starts this part. - *) - (* Search the end of the MIME header: *) - let k_eoh = search_end_of_header 0 in - (* Printf.printf "k_eoh=%d\n" k_eoh; *) - (* Get the MIME header: *) - let str = Netbuffer.unsafe_buffer (Netstream.window s) in - let header, k_eoh' = scan_header str 0 k_eoh in - assert (k_eoh = k_eoh'); - (* Move the window over the header: *) - Netstream.move s k_eoh; - (* Create the part: *) - let p = create header in - let continue = - begin try - (* Search the next boundary; add the chunks while searching: *) - let k_eob = search_next_boundary p in - (* Printf.printf "k_eob=%d\n" k_eob; *) - (* Is this the last boundary? *) - if check_whether_last_boundary k_eob then begin - (* Skip the rest: *) - while not (Netstream.at_eos s) do - Netstream.move s (Netstream.window_length s) - done; - Netstream.move s (Netstream.window_length s); - false - end - else begin - (* Move to the beginning of the next line: *) - let k_eol = search_end_of_line k_eob in - Netstream.move s k_eol; - true - end - with - any -> - (try stop p with _ -> ()); - raise any - end in - stop p; - if continue then - (* Continue with next part: *) - parse_part() - in - - (* Check whether s directly begins with a boundary: *) - if check_beginning_is_boundary() then begin - (* Move to the beginning of the next line: *) - let k_eol = search_end_of_line 0 in - Netstream.move s k_eol; - (* Begin with first part: *) - parse_part() - end - else begin - (* Search the first boundary: *) - try - let k_eob = search_first_boundary() in - (* Printf.printf "k_eob=%d\n" k_eob; *) - (* Move to the beginning of the next line: *) - let k_eol = search_end_of_line k_eob in - (* Printf.printf "k_eol=%d\n" k_eol; *) - Netstream.move s k_eol; - (* Begin with first part: *) - parse_part() - with - Not_found -> - (* No boundary at all: The body is empty. *) - () - end; -;; - - -(* ====================================================================== - * History: - * - * $Log$ - * Revision 1.1 2000/11/17 09:57:27 lpadovan - * Initial revision - * - * Revision 1.8 2000/08/13 00:04:36 gerd - * Encoded_word -> EncodedWord - * Bugfixes. - * - * Revision 1.7 2000/08/07 00:25:14 gerd - * Implemented the new functions for structured field lexing. - * - * Revision 1.6 2000/06/25 22:34:43 gerd - * Added labels to arguments. - * - * Revision 1.5 2000/06/25 21:15:48 gerd - * Checked thread-safety. - * - * Revision 1.4 2000/05/16 22:30:14 gerd - * Added support for some types of malformed MIME messages. - * - * Revision 1.3 2000/04/15 13:09:01 gerd - * Implemented uploads to temporary files. - * - * Revision 1.2 2000/03/02 01:15:30 gerd - * Updated. - * - * Revision 1.1 2000/02/25 15:21:12 gerd - * Initial revision. - * - * - *)