(* $Id$ * ---------------------------------------------------------------------- * *) (* FOR SIMPLE CGI PROGRAMS: * * If you do not need all the features of the API below, the following may * be enough: * * - At the beginning of the main program, call 'parse_argument' with * either 'default_config' as argument or with a customized configuration. * - Use 'argument_value(name)' to get the string value of the CGI parameter * 'name'. If you like, you can also open the Cgi.Operators module and * write '!$ name' instead. Here, !$ is a prefix operator equivalent to * argument_value. * * If you do not change the default configuration, you do not need to * worry about temporary files - there are not any. * * Most of the other functions defined below deal with file uploads, and * are only useful for that. *) (**********************************************************************) (* CGI functions *) (**********************************************************************) (* First, the general interface to the CGI argument parser. *) exception Resources_exceeded type argument type argument_processing = Memory (* Keep the value of the argument in memory *) | File (* Store the value of the argument into a temporary file *) | Automatic (* Store only large arguments into files. An argument * value is large if it is longer than about one block (4K). * This is not an exact definition. *) type workaround = Work_around_MSIE_Content_type_bug (* There is a bug in MSIE I observed together with SSL connections. * The CONTENT_TYPE passed to the server has sometimes the wrong * format. This option enables a workaround if the user agent string * contains the word "MSIE". *) | Work_around_backslash_bug (* There is a bug in many browsers: The backslash character is not * handled as an escaping character in MIME headers. Because DOS- * based systems use the backslash regularly in filenames, this bug * matters. * This option changes the interpretation of backslashes such that * these are handled as normal characters. I do not know any browser * that is not affected by this bug, so there is no check on * the user agent string. *) type config = { maximum_content_length : int; (* The maximum CONTENT_LENGTH. Bigger requests trigger an * Resources_exceeded exception. This feature can be used * to detect primitive denial-of-service attacks. *) how_to_process_arguments : argument -> argument_processing; (* After the beginning of an argument has been decoded, the * type of processing is decided by invoking this function on * the argument. Note that the passed argument is incomplete - * it does not have a value. You can assume that name, filename, * MIME type and the whole header are already known. * - THIS CONFIGURATION PARAMETER ONLY AFFECTS ARGUMENTS * "POST"ED FROM THE CLIENT IN FORM-ENCODED REPRESENTATION. * All other transport methods can only handle the Memory * processing type. *) tmp_directory : string; (* The temporary directory to use for the temporary files. *) tmp_prefix : string; (* A prefix for temporary files. It is recommended that the prefix * contains a part that is random or that depends on rapidly changing * environment properties. For example, the process ID is a good * candidate, or the current system time. It is not required that * the prefix is unique; there is a fail-safe algorithm that * computes a unique file name from the prefix, even if several * CGI programs run concurrently. *) workarounds : workaround list; (* Specifies which workarounds should be enabled. *) } val parse_arguments : config -> unit val arguments : unit -> (string * argument) list (* - let () = parse_arguments config: * Decodes the CGI arguments. 'config' specifies limits and processing * hints; you can simply pass default_config (see below). * * - let arglist = get_arguments(): * The function returns a list with (name, arg) pairs. The name is * passed back as string while the value is returned as opaque type * 'argument'. Below accessor functions are defined. These functions * require that parse_arguments was invoked before. * * Note 1: You can invoke 'parse_arguments' several times, but only * the first time the arguments are read in. If you call the function * again, it does nothing (even if the config changes). This is also * true if 'parse_arguments' has been invoked after 'set_arguments'. * * Note 2: It is not guaranteed that stdin has been read until EOF. * Only CONTENT_LENGTH bytes are read from stdin (following the CGI spec). * * Note 3: If arguments are processed in File or Automatic mode, the * caller of 'parse_arguments' is responsible for deleting the files * after use. You may consider to apply the at_exit function of the * core library for this purpose. See also 'cleanup' below. *) val set_arguments : argument list -> unit (* Alternatively, you can set the arguments to use. This overrides any * previously parsed set of arguments, and also any following parsing. * - Intended for debugging, and to make it possible to replace the * CGI parser by a different one while retaining this API. *) val default_config : config (* maximum_content_length = maxint * how_to_process_arguments = "use always Memory" * tmp_directory = "/var/tmp" * tmp_prefix = "cgi" * workarounds = [ Work_around_MSIE_content_type_bug; * Work_around_backslash_bug; * ] * * Note 1: On some Unixes, a special file system is used for /tmp that * stores the files into the virtual memory (main memory or swap area). * Because of this, /var/tmp is preferred as default. * * Note 2: Filename.temp_file is not used because it depends on * environment variables which are usually not set in a CGI environment. *) val arg_name : argument -> string val arg_value : argument -> string val arg_file : argument -> string option val arg_mimetype : argument -> string val arg_filename : argument -> string option val arg_header : argument -> (string * string) list (* The accessor functions that return several aspects of arguments. * arg_name: returns the name of the argument * arg_value: returns the value of the argument. If the value is stored * in a temporary file, the contents of this file are returned, i.e. * the file is loaded. This may have some consequences: * (1) The function may fail because of I/O errors. * (2) The function may be very slow, especially if the file is * non-local. * (3) If the value is bigger than Sys.max_string_length, the function * raises the exception Resources_exceeded. On 32 bit architectures, * strings are limited to 16 MB. * Note that loaded values are put into weak arrays. This makes it * possible that subsequent calls of 'arg_value' on the same argument * can avoid loading the value again, and that unused values will * nevertheless be collected by the GC. * arg_file: returns 'Some filename' if the value resides in a temporary * file, and 'filename' is the absolute path of this file. If the * value is only available in memory, None is returned. * arg_mimetype: returns the MIME type of the argument. Note that the * default MIME type is "text/plain", and that the default is returned * if the MIME type is not available. * arg_filename: returns 'Some filename' if the argument is associated * with a certain filename (e.g. from a file upload); otherwise None * arg_header: returns pairs (name,value) containing the complete header * of the argument. If the transmission protocol does not specify * a header, the empty list is passed back. *) val mk_simple_arg : name:string -> string -> argument (* mk_simple_arg name value: * Creates a simple argument with only name, and a value passed by string. * The MIME type is "text/plain". *) val mk_memory_arg : name:string -> ?mime:string -> ?filename:string -> ?header:((string * string) list) -> string -> argument (* mk_memory_arg name mimetype filename header value: * Creates an argument whose value is kept in memory. * * Note: The signature of this function changed in release 0.8. *) val mk_file_arg : name:string -> ?mime:string -> ?filename:string -> ?header:((string * string) list) -> string -> argument (* mk_file_arg name mimetype filename header value_filename: * Creates an argument whose value is stored in the file * 'value_filename'. If this file name is not absolute, it is interpreted * relative to the directory returned by Sys.getcwd() - this might not * be what you want with respect to mount points and symlinks (and it * depends on the operating system as getcwd is only POSIX.1). The * file name is turned into an absolute name immediately, and the * function arg_file returns the rewritten name. * * Note: The signature of this function changed in release 0.8. *) val cleanup : unit -> unit (* Removes all temporary files that occur in the current set of arguments * (as returned by 'arguments()'). *) (* Convenience functions: *) val argument : string -> argument (* let argument name = List.assoc name (arguments()) -- i.e. returns * the argument with the passed name. Of course, this function expects * that 'parse_arguments' was called before. *) val argument_value : string -> string (* let argument_value name = arg_value(argument name) -- i.e. returns * the value of the argument. * See also Operators.( !$ ) below. *) (* For toploop printers: *) val print_argument : argument -> unit (* Now, the compatibility functions. *) val parse_args : unit -> (string * string) list (* Decodes the arguments of the CGI and returns them as an association list * Works whatever the method is (GET or POST) *) val parse_args_with_mimetypes : unit -> (string * string * string) list (* Like parse_args, but returns also the MIME type. * The triples contain (name, mime_type, value). * If an encoding was chosen that does not transfer the MIME type, * "text/plain" is returned. * * THIS FUNCTION SHOULD BE CONSIDERED AS DEPRECATED. * It was included in netstring-0.4, but most people want not only * the MIME type. parse_arguments should be used instead. *) val header : string -> unit (* Prints the content-type header. * the argument is the MIME type (default value is "text/html" if the * argument is the empty string) *) val this_url : unit -> string (* Returns the address of the CGI *) (**********************************************************************) (* The Operators module *) (**********************************************************************) (* If you open the Operators module, you can write * !% "name" instead of argument "name", and * !$ "name" instead of argument_value "name" *) module Operators : sig val ( !% ) : string -> argument (* same as 'argument' above *) val ( !$ ) : string -> string (* same as 'argument_value' above *) end (**********************************************************************) (* Low-level functions *) (**********************************************************************) (* Encoding/Decoding within URLs: * * The following two functions perform the '%'-substitution for * characters that may otherwise be interpreted as metacharacters. * * See also the Netencoding module. This interface contains these functions * to keep the compatibility with the old Cgi module. *) val decode : string -> string val encode : string -> string (* URL-encoded parameters: * * The following two functions create and analyze URL-encoded parameters. * Format: name1=val1&name2=val2&... *) val mk_url_encoded_parameters : (string * string) list -> string (* The argument is a list of (name,value) pairs. The result is the * single URL-encoded parameter string. *) val dest_url_encoded_parameters : string -> (string * string) list (* The argument is the URL-encoded parameter string. The result is * the corresponding list of (name,value) pairs. * Note: Whitespace within the parameter string is ignored. * If there is a format error, the function fails. *) (* Form-encoded parameters: * * According to: RFCs 2388, 2183, 2045, 2046 * * General note: This is a simple API to encode/decode form-encoded parameters. * Especially, it is not possible to pass the header of the parts through * this API. *) val mk_form_encoded_parameters : (string * string * string) list -> (string * string) (* The argument is a list of (name,mimetype,value) triples. * The result is (parstr, boundary), where 'parstr' is the * single form-encoded parameter string, and 'boundary' is the * boundary to separate the message parts. * * THIS FUNCTION IS CURRENTLY NOT IMPLEMENTED! *) val dest_form_encoded_parameters : string -> boundary:string -> config -> argument list (* The first argument is the form-encoded parameter string. * The second argument is the boundary (extracted from the mime type). * Third argument: Only the workarounds component is used. * The result is * the corresponding list of arguments (all in memory). * If there is a format error, the function fails. * Note: embedded multipart/mixed types are returned as they are, * and are not recursively decoded. * Note: The content-transfer-encodings "7bit", "8bit", "binary", * "base64", and "quoted-printable" are supported. * Note: Parameter names which include spaces or non-alphanumeric * characters may be problematic (the rules of RFC 2047 are NOT applied). * Note: The returned MIME type is not normalized. *) val dest_form_encoded_parameters_from_netstream : Netstream.t -> boundary:string -> config -> argument list (* let arglist = dest_form_encoded_parameters_from_netstream s b c: * Reads the form-encoded parameters from netstream s. The boundary * is passed in b, and the configuration in c. * A list of arguments is returned. * * See also dest_form_encoded_parameters. * * Restriction: In contrast to dest_form_encoded_parameters, this * function is not able to handle the content-transfer-encodings * "base64" and "quoted-printable". (This is not really a restriction * because no browser uses these encodings in conjunction with HTTP. * This is different if mail transport is chosen. - The reason for * this restriction is that there are currently no stream functions * for decoding.) *) (* Private functions: *) val init_mt : (unit -> unit) -> (unit -> unit) -> unit (**********************************************************************) (* Compatibility with CGI library by J.-C. Filliatre *) (**********************************************************************) (* The following functions are compatible with J.-C. Filliatre's CGI * library: * * parse_args, header, this_url, decode, encode. * * Note that the new implementation of parse_args can be safely invoked * several times. * * Since release 0.8, Netstring's CGI implementation is again thread-safe. *) (* ====================================================================== * History: * * $Log$ * Revision 1.1 2000/11/17 09:57:27 lpadovan * Initial revision * * Revision 1.8 2000/06/25 22:34:43 gerd * Added labels to arguments. * * Revision 1.7 2000/06/25 21:40:36 gerd * Added printer. * * Revision 1.6 2000/06/25 21:15:48 gerd * Checked thread-safety. * * Revision 1.5 2000/05/16 22:28:13 gerd * New "workarounds" config component. * * Revision 1.4 2000/04/15 16:47:27 gerd * Last minor changes before releasing 0.6. * * Revision 1.3 2000/04/15 13:09:01 gerd * Implemented uploads to temporary files. * * Revision 1.2 2000/03/02 01:15:30 gerd * Updated. * * Revision 1.1 2000/02/25 15:21:12 gerd * Initial revision. * * *)