(* $Id$ * ---------------------------------------------------------------------- * *) exception Resources_exceeded type argument_processing = Memory | File | Automatic;; type argument = { mutable arg_name : string; mutable arg_processing : argument_processing; mutable arg_buf_value : Buffer.t; mutable arg_mem_value : string option; (* Here, the value is stored if it must be kept in memory *) mutable arg_disk_value : string Weak.t; (* This component is used iff arg_mem_value = None. The * weak array has a length of 1, and the single element stores * the value (if any). *) mutable arg_file : string option; (* The filename of the temporary file storing the value *) mutable arg_fd : out_channel option; (* The file descriptor of the temp file (if open) *) mutable arg_mimetype : string; mutable arg_filename : string option; mutable arg_header : (string * string) list; (* For the last three components, see the description of the * corresponding functions in the mli file. *) } ;; type workaround = Work_around_MSIE_Content_type_bug | Work_around_backslash_bug ;; type config = { maximum_content_length : int; how_to_process_arguments : argument -> argument_processing; tmp_directory : string; tmp_prefix : string; workarounds : workaround list; } ;; let print_argument arg = Format.printf "" arg.arg_name (match arg.arg_filename with None -> "*" | Some n -> n) arg.arg_mimetype (match arg.arg_file with None -> "Memory" | Some n -> n) ;; let encode = Netencoding.Url.encode ;; let decode = Netencoding.Url.decode ;; let url_split_re = Str.regexp "[&=]";; let mk_url_encoded_parameters nv_pairs = String.concat "&" (List.map (fun (name,value) -> let name_encoded = Netencoding.Url.encode name in let value_encoded = Netencoding.Url.encode value in name_encoded ^ "=" ^ value_encoded ) nv_pairs ) ;; let dest_url_encoded_parameters parstr = let rec parse_after_amp tl = match tl with Str.Text name :: Str.Delim "=" :: Str.Text value :: tl' -> (Netencoding.Url.decode name, Netencoding.Url.decode value) :: parse_next tl' | Str.Text name :: Str.Delim "=" :: Str.Delim "&" :: tl' -> (Netencoding.Url.decode name, "") :: parse_after_amp tl' | Str.Text name :: Str.Delim "=" :: [] -> [Netencoding.Url.decode name, ""] | _ -> failwith "Cgi.dest_url_encoded_parameters" and parse_next tl = match tl with [] -> [] | Str.Delim "&" :: tl' -> parse_after_amp tl' | _ -> failwith "Cgi.dest_url_encoded_parameters" in let toklist = Str.full_split url_split_re parstr in match toklist with [] -> [] | _ -> parse_after_amp toklist ;; let mk_form_encoded_parameters ntv_triples = failwith "Cgi.mk_form_encoded_parameters: not implemented";; let dest_parameter_header header options = let get_name s = (* s is: form-data; ... name="fieldname" ... * Extract "fieldname" *) try let tok, params = Mimestring.scan_value_with_parameters s options in List.assoc "name" params with Not_found -> failwith "Cgi.dest_form_encoded_parameters" | Failure "Mimestring.scan_value_with_parameters" -> failwith "Cgi.dest_form_encoded_parameters" in let get_filename s = (* s is: form-data; ... filename="fieldname" ... * Extract "fieldname" *) try let tok, params = Mimestring.scan_value_with_parameters s options in Some(List.assoc "filename" params) with Not_found -> None | Failure "Mimestring.scan_value_with_parameters" -> failwith "Cgi.dest_form_encoded_parameters" in let mime_type = try List.assoc "content-type" header with Not_found -> "text/plain" in (* the default *) let content_disposition = try List.assoc "content-disposition" header with Not_found -> failwith "Cgi.dest_form_encoded_parameters: no content-disposition" in let name = get_name content_disposition in let filename = get_filename content_disposition in name, mime_type, filename ;; let dest_form_encoded_parameters parstr ~boundary config = let options = if List.mem Work_around_backslash_bug config.workarounds then [ Mimestring.No_backslash_escaping ] else [] in let parts = Mimestring.scan_multipart_body_and_decode parstr 0 (String.length parstr) boundary in List.map (fun (params, value) -> let name, mime_type, filename = dest_parameter_header params options in { arg_name = name; arg_processing = Memory; arg_buf_value = Buffer.create 1; arg_mem_value = Some value; arg_disk_value = Weak.create 1; arg_file = None; arg_fd = None; arg_mimetype = mime_type; arg_filename = filename; arg_header = params; } ) parts ;; let make_temporary_file config = (* Returns (filename, out_channel). *) let rec try_creation n = try let fn = Filename.concat config.tmp_directory (config.tmp_prefix ^ "-" ^ (string_of_int n)) in let fd = open_out_gen [ Open_wronly; Open_creat; Open_excl; Open_binary ] 0o666 fn in fn, fd with Sys_error m -> (* This does not look very intelligent, but it is the only chance * to limit the number of trials. *) if n > 1000 then failwith ("Cgi: Cannot create temporary file: " ^ m); try_creation (n+1) in try_creation 0 ;; let dest_form_encoded_parameters_from_netstream s ~boundary config = let parts = ref [] in let options = if List.mem Work_around_backslash_bug config.workarounds then [ Mimestring.No_backslash_escaping ] else [] in let create header = (* CALLBACK for scan_multipart_body_from_netstream *) let name, mime_type, filename = dest_parameter_header header options in let p0 = { arg_name = name; arg_processing = Memory; arg_buf_value = Buffer.create 80; arg_mem_value = None; arg_disk_value = Weak.create 1; arg_file = None; arg_fd = None; arg_mimetype = mime_type; arg_filename = filename; arg_header = header; } in let pr = config.how_to_process_arguments p0 in let p = { p0 with arg_processing = pr } in if pr = File then begin let fn, fd = make_temporary_file config in p.arg_file <- Some fn; p.arg_fd <- Some fd; p.arg_mem_value <- None; end; p in let add p s k n = (* CALLBACK for scan_multipart_body_from_netstream *) if (p.arg_processing = Automatic) && (Buffer.length (p.arg_buf_value) >= Netstream.block_size s) then begin (* This is a LARGE argument *) p.arg_processing <- File; let fn, fd = make_temporary_file config in p.arg_file <- Some fn; p.arg_fd <- Some fd; p.arg_mem_value <- None; output_string fd (Buffer.contents p.arg_buf_value); p.arg_buf_value <- Buffer.create 1; end; match p.arg_processing with (Memory|Automatic) -> Buffer.add_substring p.arg_buf_value (Netbuffer.unsafe_buffer (Netstream.window s)) k n | File -> let fd = match p.arg_fd with Some fd -> fd | None -> assert false in output fd (Netbuffer.unsafe_buffer (Netstream.window s)) k n; in let stop p = (* CALLBACK for scan_multipart_body_from_netstream *) begin match p.arg_processing with (Memory|Automatic) -> p.arg_mem_value <- Some (Buffer.contents p.arg_buf_value); p.arg_buf_value <- Buffer.create 1; | File -> let fd = match p.arg_fd with Some fd -> fd | None -> assert false in close_out fd; p.arg_mem_value <- None end; parts := p :: !parts in Mimestring.scan_multipart_body_from_netstream s boundary create add stop; List.rev !parts ;; let getenv name = try Sys.getenv name with Not_found -> "";; (* getenv: * We use this getenv instead of Sys.getenv. The CGI specification does not * say anything about what should happen if a certain environment variable * is not set. * Some servers initialize the environment variable to the empty string if * it is not applicable, some servers do not set the variable at all. Because * of this, unset variables are always reported as empty variables. * * This is especially a problem with QUERY_STRING. *) let mk_simple_arg ~name v = { arg_name = name; arg_processing = Memory; arg_buf_value = Buffer.create 1; arg_mem_value = Some v; arg_disk_value = Weak.create 0; arg_file = None; arg_fd = None; arg_mimetype = "text/plain"; arg_filename = None; arg_header = []; } ;; let mk_memory_arg ~name ?(mime = "text/plain") ?filename ?(header = []) v = { arg_name = name; arg_processing = Memory; arg_buf_value = Buffer.create 1; arg_mem_value = Some v; arg_disk_value = Weak.create 0; arg_file = None; arg_fd = None; arg_mimetype = mime; arg_filename = filename; arg_header = header; } ;; let mk_file_arg ~name ?(mime = "text/plain") ?filename ?(header = []) v_filename = let v_abs_filename = if Filename.is_relative v_filename then Filename.concat (Sys.getcwd()) v_filename else v_filename in { arg_name = name; arg_processing = File; arg_buf_value = Buffer.create 1; arg_mem_value = None; arg_disk_value = Weak.create 0; arg_file = Some v_abs_filename; arg_fd = None; arg_mimetype = mime; arg_filename = filename; arg_header = header; } ;; let get_content_type config = (* Get the environment variable CONTENT_TYPE; if necessary apply * workarounds for browser bugs. *) let content_type = getenv "CONTENT_TYPE" in let user_agent = getenv "HTTP_USER_AGENT" in let eff_content_type = if Str.string_match (Str.regexp ".*MSIE") user_agent 0 && List.mem Work_around_MSIE_Content_type_bug config.workarounds then begin (* Microsoft Internet Explorer: When used with SSL connections, * this browser sometimes produces CONTENT_TYPEs like * "multipart/form-data; boundary=..., multipart/form-data; boundary=..." * Workaround: Throw away everything after ", ". *) if Str.string_match (Str.regexp "\\([^,]*boundary[^,]*\\), .*boundary") content_type 0 then Str.matched_group 1 content_type else content_type end else content_type in eff_content_type ;; let really_parse_args config = let make_simple_arg (n,v) = mk_simple_arg n v in match getenv "REQUEST_METHOD" with ("GET"|"HEAD") -> List.map make_simple_arg (dest_url_encoded_parameters(getenv "QUERY_STRING")) | "POST" -> let n = try int_of_string (getenv "CONTENT_LENGTH") with _ -> failwith "Cgi.parse_arguments" in if n > config.maximum_content_length then raise Resources_exceeded; begin let mime_type, params = Mimestring.scan_mime_type(get_content_type config) [] in match mime_type with "application/x-www-form-urlencoded" -> let buf = String.create n in really_input stdin buf 0 n; List.map make_simple_arg (dest_url_encoded_parameters buf) | "multipart/form-data" -> let boundary = try List.assoc "boundary" params with Not_found -> failwith "Cgi.parse_arguments" in (* -------------------------------------------------- DEBUG let f = open_out "/tmp/cgiout" in output_string f buf; close_out f; * -------------------------------------------------- *) dest_form_encoded_parameters_from_netstream (Netstream.create_from_channel stdin (Some n) 4096) boundary config | _ -> failwith ("Cgi.parse_arguments: unknown content-type " ^ mime_type) end | _ -> failwith "Cgi.parse_arguments: unknown method" let parsed = ref None;; (* protected by lock/unlock *) let lock = ref (fun () -> ());; let unlock = ref (fun () -> ());; let init_mt new_lock new_unlock = lock := new_lock; unlock := new_unlock ;; let protect f = !lock(); try let r = f() in !unlock(); r with x -> !unlock(); raise x ;; let parse_arguments config = protect (fun () -> match !parsed with Some _ -> () | None -> parsed := Some (List.map (fun arg -> arg.arg_name, arg) (really_parse_args config)) ) ;; let arguments () = protect (fun () -> match !parsed with Some plist -> plist | None -> failwith "Cgi.arguments" ) ;; let set_arguments arglist = protect (fun () -> parsed := Some (List.map (fun arg -> arg.arg_name, arg) arglist) ) ;; let default_config = { maximum_content_length = max_int; how_to_process_arguments = (fun _ -> Memory); tmp_directory = "/var/tmp"; tmp_prefix = "cgi-"; workarounds = [ Work_around_MSIE_Content_type_bug; Work_around_backslash_bug; ] } ;; let arg_value arg = match arg.arg_mem_value with None -> begin match Weak.get arg.arg_disk_value 0 with None -> begin match arg.arg_file with None -> failwith "Cgi.arg_value: no value present" | Some filename -> let fd = open_in_bin filename in try let len = in_channel_length fd in let s = String.create len in really_input fd s 0 len; Weak.set arg.arg_disk_value 0 (Some s); close_in fd; s with any -> close_in fd; raise any end | Some v -> v end | Some s -> s ;; let arg_name arg = arg.arg_name;; let arg_file arg = arg.arg_file;; let arg_mimetype arg = arg.arg_mimetype;; let arg_filename arg = arg.arg_filename;; let arg_header arg = arg.arg_header;; let cleanup () = protect (fun () -> match !parsed with None -> () | Some plist -> List.iter (fun (name, arg) -> match arg.arg_file with None -> () | Some filename -> (* We do not complain if the file does not exist anymore. *) if Sys.file_exists filename then Sys.remove filename; arg.arg_file <- None ) plist ) ;; let argument name = List.assoc name (arguments());; let argument_value name = arg_value (argument name);; module Operators = struct let ( !% ) = argument let ( !$ ) = argument_value end;; let parse_args() = parse_arguments default_config; List.map (fun (name, arg) -> name, arg_value arg) (arguments()) ;; let parse_args_with_mimetypes() = parse_arguments default_config; List.map (fun (name, arg) -> name, arg_mimetype arg, arg_value arg) (arguments()) ;; let header s = let t = match s with "" -> "text/html" | _ -> s in print_string ("Content-type: " ^ t ^ "\n\n"); flush stdout ;; let this_url() = "http://" ^ (getenv "SERVER_NAME") ^ (getenv "SCRIPT_NAME") ;; (* ====================================================================== * History: * * $Log$ * Revision 1.1 2000/11/17 09:57:27 lpadovan * Initial revision * * Revision 1.8 2000/06/25 22:34:43 gerd * Added labels to arguments. * * Revision 1.7 2000/06/25 21:40:36 gerd * Added printer. * * Revision 1.6 2000/06/25 21:15:48 gerd * Checked thread-safety. * * Revision 1.5 2000/05/16 22:29:36 gerd * Added support for two common file upload bugs. * * Revision 1.4 2000/04/15 16:47:27 gerd * Last minor changes before releasing 0.6. * * Revision 1.3 2000/04/15 13:09:01 gerd * Implemented uploads to temporary files. * * Revision 1.2 2000/03/02 01:15:30 gerd * Updated. * * Revision 1.1 2000/02/25 15:21:12 gerd * Initial revision. * * *)