X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2FDEVEL%2Fpxp%2Fnetstring%2Fmimestring.ml;fp=helm%2FDEVEL%2Fpxp%2Fnetstring%2Fmimestring.ml;h=8fc4bfcbe15ee2cb21189d895af063ac84dd5690;hb=c03d2c1fdab8d228cb88aaba5ca0f556318bebc5;hp=0000000000000000000000000000000000000000;hpb=758057e85325f94cd88583feb1fdf6b038e35055;p=helm.git

diff --git a/helm/DEVEL/pxp/netstring/mimestring.ml b/helm/DEVEL/pxp/netstring/mimestring.ml
new file mode 100644
index 000000000..8fc4bfcbe
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/mimestring.ml
@@ -0,0 +1,1035 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+module Str = Netstring_str;;
+
+let cr_or_lf_re = Str.regexp "[\013\n]";;
+
+let trim_right_spaces_re =
+  Str.regexp "[ \t]+$";;
+
+let trim_left_spaces_re =
+  Str.regexp "^[ \t]+";;
+
+let header_re =
+  Str.regexp "\\([^ \t\r\n:]+\\):\\([ \t]*.*\n\\([ \t].*\n\\)*\\)";;
+
+let empty_line_re =
+  Str.regexp "\013?\n";;
+
+let end_of_header_re =
+  Str.regexp "\n\013?\n";;
+
+
+let scan_header ?(unfold=true) parstr ~start_pos:i0 ~end_pos:i1 =
+  let rec parse_header i l =
+    match Str.string_partial_match header_re parstr i with
+	Some r ->
+	  let i' = Str.match_end r in
+	  if i' > i1 then
+	    failwith "Mimestring.scan_header";
+	  let name = String.lowercase(Str.matched_group r 1 parstr) in
+	  let value_with_crlf =
+	    Str.matched_group r 2 parstr in
+	  let value =
+	    if unfold then begin
+	      let value_with_rspaces =
+		Str.global_replace cr_or_lf_re "" value_with_crlf in
+	      let value_with_lspaces =
+		Str.global_replace trim_right_spaces_re "" value_with_rspaces in
+	      Str.global_replace trim_left_spaces_re "" value_with_lspaces 
+	    end
+	    else value_with_crlf
+	  in
+	  parse_header i' ( (name,value) :: l)
+      | None ->
+	  (* The header must end with an empty line *)
+	  begin match Str.string_partial_match empty_line_re parstr i with
+	      Some r' ->
+		List.rev l, Str.match_end r'
+	    | None ->
+		failwith "Mimestring.scan_header"
+	  end
+  in
+  parse_header i0 []
+;;
+
+type s_token =
+    Atom of string
+  | EncodedWord of (string * string * string)
+  | QString of string
+  | Control of char
+  | Special of char
+  | DomainLiteral of string
+  | Comment
+  | End
+;;
+
+type s_option =
+    No_backslash_escaping
+  | Return_comments
+  | Recognize_encoded_words
+;;
+
+type s_extended_token =
+    { token      : s_token;
+      token_pos  : int;
+      token_line : int;
+      token_linepos : int;   (* Position of the beginning of the line *)
+      token_len  : int;
+      mutable token_sep : bool; (* separates adjacent encoded words *)
+    }
+;;
+
+let get_token et  = et.token;;
+let get_pos et    = et.token_pos;;
+let get_line et   = et.token_line;;
+let get_column et = et.token_pos - et.token_linepos;;
+let get_length et = et.token_len;;
+let separates_adjacent_encoded_words et = et.token_sep;;
+
+let get_decoded_word et =
+  match et.token with
+      Atom s -> s
+    | QString s -> s
+    | Control c -> String.make 1 c
+    | Special c -> String.make 1 c
+    | DomainLiteral s -> s
+    | Comment -> ""
+    | EncodedWord (_, encoding, content) ->
+	( match encoding with
+	      ("Q"|"q") ->
+		Netencoding.Q.decode content
+	    | ("B"|"b") -> 
+		Netencoding.Base64.decode 
+		  ~url_variant:false
+		  ~accept_spaces:false
+		  content
+	    | _ -> failwith "get_decoded_word"
+	)
+    | End -> 
+	failwith "get_decoded_word"
+;;
+
+let get_charset et =
+  match et.token with
+      EncodedWord (charset, _, _) -> charset
+    | End -> failwith "get_charset"
+    | _ -> "US-ASCII"
+;;
+
+type scanner_spec =
+    { (* What the user specifies: *)
+      scanner_specials : char list;
+      scanner_options : s_option list;
+      (* Derived from that: *)
+      mutable opt_no_backslash_escaping : bool;
+      mutable opt_return_comments : bool;
+      mutable opt_recognize_encoded_words : bool;
+
+      mutable is_special : bool array;
+      mutable space_is_special : bool;
+    }
+;;
+
+type scanner_target =
+    { scanned_string : string;
+      mutable scanner_pos : int;
+      mutable scanner_line : int;
+      mutable scanner_linepos : int; 
+      (* Position of the beginning of the line *)
+      mutable scanned_tokens : s_extended_token Queue.t;
+      (* A queue of already scanned tokens in order to look ahead *)
+      mutable last_token : s_token;
+      (* The last returned token. It is only important whether it is
+       * EncodedWord or not.
+       *)
+    }
+;;
+
+type mime_scanner = scanner_spec * scanner_target
+;;
+
+let get_pos_of_scanner (spec, target) = 
+  if spec.opt_recognize_encoded_words then
+    failwith "get_pos_of_scanner"
+  else
+    target.scanner_pos
+;;
+
+let get_line_of_scanner (spec, target) = 
+  if spec.opt_recognize_encoded_words then
+    failwith "get_line_of_scanner"
+  else
+    target.scanner_line
+;;
+
+let get_column_of_scanner (spec, target) = 
+  if spec.opt_recognize_encoded_words then
+    failwith "get_column_of_scanner"
+  else
+    target.scanner_pos - target.scanner_linepos 
+;;
+
+let create_mime_scanner ~specials ~scan_options =
+  let is_spcl = Array.create 256 false in
+  List.iter
+    (fun c -> is_spcl.( Char.code c ) <- true)
+    specials;
+  let spec =
+    { scanner_specials = specials;
+      scanner_options = scan_options;
+      opt_no_backslash_escaping = 
+	List.mem No_backslash_escaping scan_options;
+      opt_return_comments = 
+	List.mem Return_comments scan_options;
+      opt_recognize_encoded_words = 
+	List.mem Recognize_encoded_words scan_options;
+      is_special = is_spcl;
+      space_is_special = is_spcl.(32);
+    }
+  in
+  (* Grab the remaining arguments: *)
+  fun ?(pos=0) ?(line=1) ?(column=0) s ->
+    let target =
+      { scanned_string = s;
+	scanner_pos = pos;
+	scanner_line = line;
+	scanner_linepos = pos - column;
+	scanned_tokens = Queue.create();
+	last_token = Comment;   (* Must not be initialized with EncodedWord *)
+      }
+    in
+    spec, target
+;;
+
+
+let encoded_word_re =
+  Str.regexp "=\\?\\([^?]+\\)\\?\\([^?]+\\)\\?\\([^?]+\\)\\?=";;
+
+let scan_next_token ((spec,target) as scn) =
+  let mk_pair t len =
+    { token = t;
+      token_pos = target.scanner_pos;
+      token_line = target.scanner_line;
+      token_linepos = target.scanner_linepos;
+      token_len = len;
+      token_sep = false;
+    },
+    t
+  in
+
+  (* Note: mk_pair creates a new token pair, and it assumes that 
+   * target.scanner_pos (and also scanner_line and scanner_linepos)
+   * still contain the position of the beginning of the token.
+   *)
+
+  let s = target.scanned_string in
+  let l = String.length s in
+  let rec scan i =
+    if i < l then begin
+      let c = s.[i] in
+      if spec.is_special.( Char.code c ) then begin
+	let pair = mk_pair (Special c) 1 in
+	target.scanner_pos <- target.scanner_pos + 1;
+	(match c with
+	     '\n' -> 
+	       target.scanner_line    <- target.scanner_line + 1;
+	       target.scanner_linepos <- target.scanner_pos;
+	   | _ -> ()
+	);
+	pair
+      end
+      else
+	match c with
+	    '"' -> 
+	      (* Quoted string: *)
+	      scan_qstring (i+1) (i+1) 0
+	  | '(' ->
+	      (* Comment: *)
+	      let i', line, linepos = 
+		scan_comment (i+1) 0 target.scanner_line target.scanner_linepos
+	      in
+	      let advance() =
+		target.scanner_pos <- i';
+		target.scanner_line <- line;
+		target.scanner_linepos <- linepos
+	      in
+	      if spec.opt_return_comments then begin
+		let pair = mk_pair Comment (i' - i) in
+		advance();
+		pair
+	      end
+	      else 
+		if spec.space_is_special then begin
+		  let pair = mk_pair (Special ' ') (i' - i) in
+		  advance();
+		  pair
+		end
+		else begin
+		  advance();
+		  scan i'
+		end
+	  | (' '|'\t'|'\r') ->
+	      (* Ignore whitespace by default: *)
+	      target.scanner_pos <- target.scanner_pos + 1;
+	      scan (i+1)
+	  | '\n' ->
+	      (* Ignore whitespace by default: *)
+	      target.scanner_pos     <- target.scanner_pos + 1;
+	      target.scanner_line    <- target.scanner_line + 1;
+	      target.scanner_linepos <- target.scanner_pos;
+	      scan (i+1)
+	  | ('\000'..'\031'|'\127'..'\255') ->
+	      let pair = mk_pair (Control c) 1 in
+	      target.scanner_pos <- target.scanner_pos + 1;
+	      pair
+	  | '[' ->
+	      (* Domain literal: *)
+	      scan_dliteral (i+1) (i+1) 0
+	  | _ ->
+	      scan_atom i i
+    end
+    else 
+      mk_pair End 0
+
+  and scan_atom i0 i =
+    let return_atom() =
+      let astring = String.sub s i0 (i-i0) in
+      let r =
+	if spec.opt_recognize_encoded_words then
+	  Str.string_match ~groups:4 encoded_word_re astring 0
+	else
+	  None
+      in
+      match r with
+	  None ->
+	    (* An atom contains never a linefeed character, so we can ignore
+	     * scanner_line here.
+	     *)
+	    let pair = mk_pair (Atom astring) (i-i0) in
+	    target.scanner_pos <- i;
+	    pair
+	| Some mr ->
+	    (* Found an encoded word. *)
+	    let charset  = Str.matched_group mr 1 astring in
+	    let encoding = Str.matched_group mr 2 astring in
+	    let content  = Str.matched_group mr 3 astring in
+	    let t = EncodedWord(String.uppercase charset,
+				 String.uppercase encoding,
+				 content) in
+	    let pair = mk_pair t (i-i0) in
+	    target.scanner_pos <- i;
+	    pair
+    in
+
+    if i < l then
+      let c = s.[i] in
+      match c with
+	  ('\000'..'\031'|'\127'..'\255'|'"'|'('|'['|' '|'\t'|'\r'|'\n') ->
+	    return_atom()
+	| _ ->
+	    if spec.is_special.( Char.code c ) then
+	      return_atom()
+	    else
+	      scan_atom i0 (i+1)
+    else
+      return_atom()
+
+  and scan_qstring i0 i n =
+    if i < l then
+      let c = s.[i] in
+      match c with
+	  '"' ->
+	    (* Regular end of the quoted string: *)
+	    let content, line, linepos = copy_qstring i0 (i-1) n in
+	    let pair = mk_pair (QString content) (i-i0+2) in
+	    target.scanner_pos <- i+1;
+	    target.scanner_line <- line;
+	    target.scanner_linepos <- linepos;
+	    pair
+	| '\\' when not spec.opt_no_backslash_escaping ->
+	    scan_qstring i0 (i+2) (n+1)
+	| _ ->
+	    scan_qstring i0 (i+1) (n+1)
+    else
+      (* Missing right double quote *)
+      let content, line, linepos = copy_qstring i0 (l-1) n in
+      let pair = mk_pair (QString content) (l-i0+1) in
+      target.scanner_pos <- l;
+      target.scanner_line <- line;
+      target.scanner_linepos <- linepos;
+      pair
+
+  and copy_qstring i0 i1 n =
+    (* Used for quoted strings and for domain literals *)
+    let r = String.create n in
+    let k = ref 0 in
+    let line = ref target.scanner_line in
+    let linepos = ref target.scanner_linepos in
+    for i = i0 to i1 do
+      let c = s.[i] in
+      match c with
+	  '\\' when i < i1 &&  not spec.opt_no_backslash_escaping -> ()
+	| '\n' ->
+	    line := !line + 1;
+	    linepos := i+1;
+	    r.[ !k ] <- c; 
+	    incr k
+	| _ -> 
+	    r.[ !k ] <- c; 
+	    incr k
+    done;
+    assert (!k = n);
+    r, !line, !linepos
+
+  and scan_dliteral i0 i n =
+    if i < l then
+      let c = s.[i] in
+      match c with
+	  ']' ->
+	    (* Regular end of the domain literal: *)
+	    let content, line, linepos = copy_qstring i0 (i-1) n in
+	    let pair = mk_pair (DomainLiteral content) (i-i0+2) in
+	    target.scanner_pos <- i+1;
+	    target.scanner_line <- line;
+	    target.scanner_linepos <- linepos;
+	    pair
+	| '\\' when not spec.opt_no_backslash_escaping ->
+	    scan_dliteral i0 (i+2) (n+1)
+	| _ ->
+	    (* Note: '[' is not allowed by RFC 822; we treat it here as
+	     * a regular character (questionable)
+	     *)
+	    scan_dliteral i0 (i+1) (n+1)
+    else
+      (* Missing right bracket *)
+      let content, line, linepos = copy_qstring i0 (l-1) n in
+      let pair = mk_pair (DomainLiteral content) (l-i0+1) in
+      target.scanner_pos <- l;
+      target.scanner_line <- line;
+      target.scanner_linepos <- linepos;
+      pair
+
+
+  and scan_comment i level line linepos =
+    if i < l then
+      let c = s.[i] in
+      match c with
+	  ')' ->
+	    (i+1), line, linepos
+	| '(' ->
+	    (* nested comment *)
+	    let i', line', linepos' = 
+	      scan_comment (i+1) (level+1) line linepos 
+	    in
+	    scan_comment i' level line' linepos'
+	| '\\' when not spec.opt_no_backslash_escaping ->
+	    if (i+1) < l && s.[i+1] = '\n' then
+	      scan_comment (i+2) level (line+1) (i+2)
+	    else
+	      scan_comment (i+2) level line linepos
+	| '\n' ->
+	    scan_comment (i+1) level (line+1) (i+1)
+	| _ ->
+	    scan_comment (i+1) level line linepos
+    else
+      (* Missing closing ')' *)
+      i, line, linepos
+  in
+
+  scan target.scanner_pos
+;;
+
+
+let scan_token ((spec,target) as scn) =
+  (* This function handles token queueing in order to recognize white space
+   * that separates adjacent encoded words.
+   *)
+
+  let rec collect_whitespace () =
+    (* Scans whitespace tokens and returns them as:
+     * (ws_list, other_tok)     if there is some other_tok following the
+     *                          list (other_tok = End is possible)
+     *)
+    let (et, t) as pair = scan_next_token scn in
+    ( match t with
+	  (Special ' '|Special '\t'|Special '\n'|Special '\r') ->
+	    let ws_list, tok = collect_whitespace() in
+	    pair :: ws_list, tok
+	| _ ->
+	    [], pair
+    )
+  in
+
+  try
+    (* Is there an already scanned token in the queue? *)
+    let et = Queue.take target.scanned_tokens in
+    let t = et.token in
+    target.last_token <- t;
+    et, et.token
+  with
+      Queue.Empty ->
+	(* If not: inspect the last token. If that token is an EncodedWord,
+	 * the next tokens are scanned in advance to determine if there
+	 * are spaces separating two EncodedWords. These tokens are put
+	 * into the queue such that it is avoided that they are scanned
+	 * twice. (The sole purpose of the queue.)
+	 *)
+	match target.last_token with
+	    EncodedWord(_,_,_) as ew ->
+	      let ws_list, tok = collect_whitespace() in
+	      (* If tok is an EncodedWord, too, the tokens in ws_list must
+	       * be flagged as separating two adjacent encoded words. 
+	       *)
+	      ( match tok with
+		    _, EncodedWord(_,_,_) ->
+		      List.iter
+			(fun (et,t) ->
+			   et.token_sep <- true)
+			ws_list
+		  | _ ->
+		      ()
+	      );
+	      (* Anyway, queue the read tokens but the first up *)
+	      ( match ws_list with
+		    [] ->
+		      (* Nothing to queue *)
+		      let et, t = tok in
+		      target.last_token <- t;
+		      tok
+		  | (et,t) as pair :: ws_list' ->
+		      List.iter
+			(fun (et',_) -> 
+			   Queue.add et' target.scanned_tokens)
+			ws_list';
+		      ( match tok with
+			  | _, End ->
+			      ()
+			  | (et',_) ->
+			      Queue.add et' target.scanned_tokens
+		      );
+		      (* Return the first scanned token *)
+		      target.last_token <- t;
+		      pair
+	      )
+	  | _ ->
+	      (* Regular case: Scan one token; do not queue it up *)
+	      let (et, t) as pair = scan_next_token scn in 
+	      target.last_token <- t;
+	      pair
+;;
+	
+
+let scan_token_list scn =
+  let rec collect() =
+    match scan_token scn with
+	_, End ->
+	  []
+      | pair ->
+	  pair :: collect()
+  in
+  collect()
+;;
+
+
+let scan_structured_value s specials options =
+  let rec collect scn =
+    match scan_token scn with
+	_, End ->
+	  []
+      | _, t ->
+	  t :: collect scn
+  in
+  let scn = create_mime_scanner specials options s in
+  collect scn
+;;
+
+
+let specials_rfc822 =
+  [ '<'; '>'; '@'; ','; ';'; ':'; '\\'; '.' ];;
+
+
+let specials_rfc2045 =
+  [ '<'; '>'; '@'; ','; ';'; ':'; '\\'; '/' ];;
+
+
+let scan_encoded_text_value s =
+  let specials = [ ' '; '\t'; '\r'; '\n'; '('; '['; '"' ] in
+  let options =  [ Recognize_encoded_words ] in
+  let scn = create_mime_scanner specials options s in
+  
+  let rec collect () =
+    match scan_token scn with
+	_, End ->
+	  []
+      | et, _ when separates_adjacent_encoded_words et ->
+	  collect()
+      | et, (Special _|Atom _|EncodedWord(_,_,_)) ->
+	  et :: collect ()
+      | _, _ ->
+	  assert false
+  in
+  collect()
+;;
+
+
+let scan_value_with_parameters s options =
+  let rec parse_params tl =
+    match tl with
+	Atom n :: Special '=' :: Atom v :: tl' ->
+	  (n,v) :: parse_rest tl'
+      | Atom n :: Special '=' :: QString v :: tl' ->
+	  (n,v) :: parse_rest tl'
+      | _ ->
+	  failwith "Mimestring.scan_value_with_parameters"
+  and parse_rest tl =
+    match tl with
+	[] -> []
+      | Special ';' :: tl' ->
+	  parse_params tl'
+      | _ ->
+	  failwith "Mimestring.scan_value_with_parameters"
+  in
+
+  (* Note: Even if not used here, the comma is a very common separator
+   * and should be recognized as being special. You will get a
+   * failure if there is a comma in the scanned string.
+   *)
+  let tl = scan_structured_value s [ ';'; '='; ',' ] options in
+  match tl with
+      [ Atom n ] -> n, []
+    | [ QString n ] -> n, []
+    | Atom n :: Special ';' :: tl' ->
+	n, parse_params tl'
+    | QString n :: Special ';' :: tl' ->
+	n, parse_params tl'
+    | _ ->
+	failwith "Mimestring.scan_value_with_parameters"
+;;
+
+
+let scan_mime_type s options =
+  let n, params = scan_value_with_parameters s options in
+  (String.lowercase n),
+  (List.map (fun (n,v) -> (String.lowercase n, v)) params)
+;;
+
+
+let lf_re = Str.regexp "[\n]";;
+
+let scan_multipart_body s ~start_pos:i0 ~end_pos:i1 ~boundary =
+  let l_s = String.length s in
+  if i0 < 0 or i1 < 0 or i0 > l_s or i1 >l_s then
+    invalid_arg "Mimestring.scan_multipart_body";
+
+  (* First compile the regexps scanning for 'boundary': *)
+  let boundary1_re =
+    Str.regexp ("\n--" ^ Str.quote boundary) in
+  let boundary2_re =
+    Str.regexp ("--" ^ Str.quote boundary) in
+
+  let rec parse i =
+    (* i: Beginning of the current part (position directly after the
+     * boundary line
+     *)
+    (* Search for next boundary at position i *)
+    let i' =
+      try min (fst (Str.search_forward boundary1_re s i) + 1) i1
+      with
+	  Not_found -> i1
+    in
+    (* i': Either the position of the first '-' of the boundary line,
+     *     or i1 if no boundary has been found
+     *)
+    if i' >= i1 then
+      [] (* Ignore everything after the last boundary *)
+    else
+      let i'' =
+	try min (fst (Str.search_forward lf_re s i') + 1) i1
+	with
+	    Not_found -> i1
+      in
+      (* i'': The position after the boundary line *)
+(*
+      print_int i; print_newline();
+      print_int i'; print_newline();
+      print_int i''; print_newline();
+      flush stdout;
+*)
+      let header, k = scan_header s i i' in
+      (* header: the header of the part
+       * k: beginning of the body
+       *)
+
+      let value =
+	(* We know that i'-1 is a linefeed character. i'-2 should be a CR
+	 * character. Both characters are not part of the value.
+	 *)
+	if i' >= 2 then
+	  match s.[i'-2] with
+	      '\013' -> String.sub s k (i'-2-k)
+	    | _      -> String.sub s k (i'-1-k)
+	else
+	  String.sub s k (i'-1-k)
+      in
+
+      let pair =
+	(header, value) in
+
+      if i'' >= i1
+      then
+	[ pair ]
+      else
+      	pair :: parse i''
+  in
+
+  (* Find the first boundary. This is a special case, because it may be
+   * right at the beginning of the string (no preceding CRLF)
+   *)
+
+  let i_bnd =
+    if Str.string_partial_match boundary2_re s i0 <> None then
+      i0
+    else
+      try min (fst (Str.search_forward boundary1_re s i0)) i1
+      with
+	  Not_found -> i1
+  in
+
+  if i_bnd >= i1 then
+    []
+  else
+    let i_bnd' =
+      try min (fst (Str.search_forward lf_re s (i_bnd + 1)) + 1) i1
+      with
+	  Not_found -> i1
+    in
+    if i_bnd' >= i1 then
+      []
+    else
+      parse i_bnd'
+;;
+
+
+let scan_multipart_body_and_decode s ~start_pos:i0 ~end_pos:i1 ~boundary =
+  let parts = scan_multipart_body s i0 i1 boundary in
+  List.map
+    (fun (params, value) ->
+       let encoding =
+	 try List.assoc "content-transfer-encoding" params
+	 with Not_found -> "7bit"
+       in
+
+       (* NOTE: In the case of "base64" and "quoted-printable", the allocation
+	* of the string "value" could be avoided.
+	*)
+
+       let value' =
+	 match encoding with
+	     ("7bit"|"8bit"|"binary") -> value
+	   | "base64" ->
+	       Netencoding.Base64.decode_substring
+		 value 0 (String.length value) false true
+	   | "quoted-printable" ->
+	       Netencoding.QuotedPrintable.decode_substring
+		 value 0 (String.length value)
+	   | _ ->
+	       failwith "Mimestring.scan_multipart_body_and_decode: Unknown content-transfer-encoding"
+       in
+       (params, value')
+    )
+    parts
+;;
+
+
+let scan_multipart_body_from_netstream s ~boundary ~create ~add ~stop =
+
+  (* The block size of s must be at least the length of the boundary + 3.
+   * Otherwise it is not guaranteed that the boundary is always recognized.
+   *)
+  if Netstream.block_size s < String.length boundary + 3 then
+    invalid_arg "Mimestring.scan_multipart_body_from_netstream";
+
+  (* First compile the regexps scanning for 'boundary': *)
+  let boundary1_re =
+    Str.regexp ("\n--" ^ Str.quote boundary) in
+  let boundary2_re =
+    Str.regexp ("--" ^ Str.quote boundary) in
+
+  (* Subtask 1: Search the end of the MIME header: CR LF CR LF
+   *            (or LF LF). Enlarge the window until the complete header
+   *            is covered by the window.
+   *)
+  let rec search_end_of_header k =
+    (* Search the end of the header beginning at position k of the
+     * current window.
+     * Return the position of the first character of the body.
+     *)
+    try
+      (* Search for LF CR? LF: *)
+      let i, r = Str.search_forward
+		   end_of_header_re
+		   (Netbuffer.unsafe_buffer (Netstream.window s))
+		   k
+      in
+      (* If match_end <= window_length, the search was successful.
+       * Otherwise, we searched in the uninitialized region of the
+       * buffer.
+       *)
+      if Str.match_end r <= Netstream.window_length s then
+	Str.match_end r
+      else
+	raise Not_found
+    with
+	Not_found ->
+	  (* If the end of the stream is reached, the end of the header
+	   * is missing: Error.
+	   * Otherwise, we try to read another block, and continue.
+	   *)
+	  if Netstream.at_eos s then
+	    failwith "Mimestring.scan_multipart_body_from_netstream: Unexpected end of stream";
+	  let w0 = Netstream.window_length s in
+	  Netstream.want_another_block s;
+	  search_end_of_header (max (w0 - 2) 0)
+  in
+
+  (* Subtask 2: Search the first boundary line. *)
+  let rec search_first_boundary() =
+    (* Search boundary per regexp; return the position of the character
+     * immediately following the boundary (on the same line), or
+     * raise Not_found.
+     *)
+    try
+      (* Search boundary per regexp: *)
+      let i, r = Str.search_forward
+		   boundary1_re
+		   (Netbuffer.unsafe_buffer (Netstream.window s))
+		   0
+      in
+      (* If match_end <= window_length, the search was successful.
+       * Otherwise, we searched in the uninitialized region of the
+       * buffer.
+       *)
+      if Str.match_end r <= Netstream.window_length s then begin
+	Str.match_end r
+      end
+      else raise Not_found
+    with
+	Not_found ->
+	  if Netstream.at_eos s then raise Not_found;
+	  (* The regexp did not match: Move the window by one block.
+	   *)
+	  let n =
+	    min
+	      (Netstream.window_length s)
+	      (Netstream.block_size s)
+	  in
+	  Netstream.move s n;
+	  search_first_boundary()
+  in
+
+  (* Subtask 3: Search the next boundary line. Invoke 'add' for every
+   * read chunk
+   *)
+  let rec search_next_boundary p =
+    (* Returns the position directly after the boundary on the same line *)
+    try
+      (* Search boundary per regexp: *)
+      let i,r = Str.search_forward
+		  boundary1_re
+		  (Netbuffer.unsafe_buffer (Netstream.window s))
+		  0
+      in
+      (* If match_end <= window_length, the search was successful.
+       * Otherwise, we searched in the uninitialized region of the
+       * buffer.
+       *)
+      if Str.match_end r <= Netstream.window_length s then begin
+	(* Add the last chunk of the part. *)
+	let n =
+	  (* i is a LF. i - 1 should be CR. Ignore these characters. *)
+	  if i >= 1 then
+	    match (Netbuffer.unsafe_buffer (Netstream.window s)).[ i - 1 ] with
+		'\013' -> i - 1
+	      | _      -> i
+	  else
+	    i
+	in
+	(* Printf.printf "add n=%d\n" n; *)
+	add p s 0 n;
+	Str.match_end r
+      end
+      else raise Not_found
+    with
+	Not_found ->
+	  if Netstream.at_eos s then
+	    failwith "Mimestring.scan_multipart_body_from_netstream: next MIME boundary not found";
+	  (* The regexp did not match: Add the first block of the window;
+	   * and move the window.
+	   *)
+	  let n =
+	    min
+	      (Netstream.window_length s)
+	      (Netstream.block_size s)
+	  in
+	  (* Printf.printf "add n=%d\n" n; *)
+	  add p s 0 n;
+	  Netstream.move s n;
+	  search_next_boundary p
+  in
+
+  (* Subtask 4: Search the end of the boundary line *)
+  let rec search_end_of_line k =
+    (* Search LF beginning at position k. Discard any contents until that. *)
+    try
+      let i,r = Str.search_forward
+		  lf_re
+		  (Netbuffer.unsafe_buffer (Netstream.window s))
+		  k
+      in
+      (* If match_end <= window_length, the search was successful.
+       * Otherwise, we searched in the uninitialized region of the
+       * buffer.
+       *)
+      if Str.match_end r <= Netstream.window_length s then begin
+	 Str.match_end r
+      end
+      else raise Not_found
+    with
+	Not_found ->
+	  if Netstream.at_eos s then
+	    failwith "Mimestring.scan_multipart_body_from_netstream: MIME boundary without line end";
+	  (* The regexp did not match: move the window.
+	   *)
+	  let n = Netstream.window_length s in
+	  Netstream.move s n;
+	  search_end_of_line 0
+  in
+
+  (* Subtask 5: Check whether "--" follows the boundary on the same line *)
+  let check_whether_last_boundary k =
+    (* k: The position directly after the boundary. *)
+    Netstream.want s (k+2);
+    let str = Netbuffer.unsafe_buffer (Netstream.window s) in
+    (Netstream.window_length s >= k+2) && str.[k] = '-' && str.[k+1] = '-'
+  in
+
+  (* Subtask 6: Check whether the buffer begins with a boundary. *)
+  let check_beginning_is_boundary () =
+    let m = String.length boundary + 2 in
+    Netstream.want s m;
+    let str = Netbuffer.unsafe_buffer (Netstream.window s) in
+    (Netstream.window_length s >= m) &&
+    (Str.string_partial_match boundary2_re str 0 <> None)
+  in
+
+  let rec parse_part () =
+    (* The first byte of the current window of s contains the character
+     * directly following the boundary line that starts this part.
+     *)
+    (* Search the end of the MIME header: *)
+    let k_eoh = search_end_of_header 0 in
+    (* Printf.printf "k_eoh=%d\n" k_eoh; *)
+    (* Get the MIME header: *)
+    let str = Netbuffer.unsafe_buffer (Netstream.window s) in
+    let header, k_eoh' = scan_header str 0 k_eoh in
+    assert (k_eoh = k_eoh');
+    (* Move the window over the header: *)
+    Netstream.move s k_eoh;
+    (* Create the part: *)
+    let p = create header in
+    let continue =
+      begin try
+	(* Search the next boundary; add the chunks while searching: *)
+	let k_eob = search_next_boundary p in
+	(* Printf.printf "k_eob=%d\n" k_eob; *)
+        (* Is this the last boundary? *)
+	if check_whether_last_boundary k_eob then begin
+	  (* Skip the rest: *)
+	  while not (Netstream.at_eos s) do
+	    Netstream.move s (Netstream.window_length s)
+	  done;
+	  Netstream.move s (Netstream.window_length s);
+	  false
+	end
+	else begin
+	  (* Move to the beginning of the next line: *)
+	  let k_eol = search_end_of_line k_eob in
+	  Netstream.move s k_eol;
+	  true
+	end
+      with
+	  any ->
+	    (try stop p with _ -> ());
+	    raise any
+      end in
+      stop p;
+      if continue then
+	(* Continue with next part: *)
+	parse_part()
+  in
+
+  (* Check whether s directly begins with a boundary: *)
+  if check_beginning_is_boundary() then begin
+    (* Move to the beginning of the next line: *)
+    let k_eol = search_end_of_line 0 in
+    Netstream.move s k_eol;
+    (* Begin with first part: *)
+    parse_part()
+  end
+  else begin
+    (* Search the first boundary: *)
+    try
+      let k_eob = search_first_boundary() in
+      (* Printf.printf "k_eob=%d\n" k_eob; *)
+      (* Move to the beginning of the next line: *)
+      let k_eol = search_end_of_line k_eob in
+      (* Printf.printf "k_eol=%d\n" k_eol; *)
+      Netstream.move s k_eol;
+      (* Begin with first part: *)
+      parse_part()
+    with
+	Not_found ->
+	  (* No boundary at all: The body is empty. *)
+	  ()
+  end;
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:27  lpadovan
+ * Initial revision
+ *
+ * Revision 1.8  2000/08/13 00:04:36  gerd
+ * 	Encoded_word -> EncodedWord
+ * 	Bugfixes.
+ *
+ * Revision 1.7  2000/08/07 00:25:14  gerd
+ * 	Implemented the new functions for structured field lexing.
+ *
+ * Revision 1.6  2000/06/25 22:34:43  gerd
+ * 	Added labels to arguments.
+ *
+ * Revision 1.5  2000/06/25 21:15:48  gerd
+ * 	Checked thread-safety.
+ *
+ * Revision 1.4  2000/05/16 22:30:14  gerd
+ * 	Added support for some types of malformed MIME messages.
+ *
+ * Revision 1.3  2000/04/15 13:09:01  gerd
+ * 	Implemented uploads to temporary files.
+ *
+ * Revision 1.2  2000/03/02 01:15:30  gerd
+ * 	Updated.
+ *
+ * Revision 1.1  2000/02/25 15:21:12  gerd
+ * 	Initial revision.
+ *
+ *
+ *)