--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* FOR SIMPLE CGI PROGRAMS:
+ *
+ * If you do not need all the features of the API below, the following may
+ * be enough:
+ *
+ * - At the beginning of the main program, call 'parse_argument' with
+ * either 'default_config' as argument or with a customized configuration.
+ * - Use 'argument_value(name)' to get the string value of the CGI parameter
+ * 'name'. If you like, you can also open the Cgi.Operators module and
+ * write '!$ name' instead. Here, !$ is a prefix operator equivalent to
+ * argument_value.
+ *
+ * If you do not change the default configuration, you do not need to
+ * worry about temporary files - there are not any.
+ *
+ * Most of the other functions defined below deal with file uploads, and
+ * are only useful for that.
+ *)
+
+
+(**********************************************************************)
+(* CGI functions *)
+(**********************************************************************)
+
+(* First, the general interface to the CGI argument parser. *)
+
+exception Resources_exceeded
+
+type argument
+
+type argument_processing =
+ Memory (* Keep the value of the argument in memory *)
+ | File (* Store the value of the argument into a temporary file *)
+ | Automatic (* Store only large arguments into files. An argument
+ * value is large if it is longer than about one block (4K).
+ * This is not an exact definition.
+ *)
+
+type workaround =
+ Work_around_MSIE_Content_type_bug
+ (* There is a bug in MSIE I observed together with SSL connections.
+ * The CONTENT_TYPE passed to the server has sometimes the wrong
+ * format. This option enables a workaround if the user agent string
+ * contains the word "MSIE".
+ *)
+ | Work_around_backslash_bug
+ (* There is a bug in many browsers: The backslash character is not
+ * handled as an escaping character in MIME headers. Because DOS-
+ * based systems use the backslash regularly in filenames, this bug
+ * matters.
+ * This option changes the interpretation of backslashes such that
+ * these are handled as normal characters. I do not know any browser
+ * that is not affected by this bug, so there is no check on
+ * the user agent string.
+ *)
+
+
+type config =
+ { maximum_content_length : int;
+ (* The maximum CONTENT_LENGTH. Bigger requests trigger an
+ * Resources_exceeded exception. This feature can be used
+ * to detect primitive denial-of-service attacks.
+ *)
+ how_to_process_arguments : argument -> argument_processing;
+ (* After the beginning of an argument has been decoded, the
+ * type of processing is decided by invoking this function on
+ * the argument. Note that the passed argument is incomplete -
+ * it does not have a value. You can assume that name, filename,
+ * MIME type and the whole header are already known.
+ * - THIS CONFIGURATION PARAMETER ONLY AFFECTS ARGUMENTS
+ * "POST"ED FROM THE CLIENT IN FORM-ENCODED REPRESENTATION.
+ * All other transport methods can only handle the Memory
+ * processing type.
+ *)
+ tmp_directory : string;
+ (* The temporary directory to use for the temporary files. *)
+ tmp_prefix : string;
+ (* A prefix for temporary files. It is recommended that the prefix
+ * contains a part that is random or that depends on rapidly changing
+ * environment properties. For example, the process ID is a good
+ * candidate, or the current system time. It is not required that
+ * the prefix is unique; there is a fail-safe algorithm that
+ * computes a unique file name from the prefix, even if several
+ * CGI programs run concurrently.
+ *)
+ workarounds : workaround list;
+ (* Specifies which workarounds should be enabled. *)
+ }
+
+val parse_arguments : config -> unit
+val arguments : unit -> (string * argument) list
+ (* - let () = parse_arguments config:
+ * Decodes the CGI arguments. 'config' specifies limits and processing
+ * hints; you can simply pass default_config (see below).
+ *
+ * - let arglist = get_arguments():
+ * The function returns a list with (name, arg) pairs. The name is
+ * passed back as string while the value is returned as opaque type
+ * 'argument'. Below accessor functions are defined. These functions
+ * require that parse_arguments was invoked before.
+ *
+ * Note 1: You can invoke 'parse_arguments' several times, but only
+ * the first time the arguments are read in. If you call the function
+ * again, it does nothing (even if the config changes). This is also
+ * true if 'parse_arguments' has been invoked after 'set_arguments'.
+ *
+ * Note 2: It is not guaranteed that stdin has been read until EOF.
+ * Only CONTENT_LENGTH bytes are read from stdin (following the CGI spec).
+ *
+ * Note 3: If arguments are processed in File or Automatic mode, the
+ * caller of 'parse_arguments' is responsible for deleting the files
+ * after use. You may consider to apply the at_exit function of the
+ * core library for this purpose. See also 'cleanup' below.
+ *)
+
+val set_arguments : argument list -> unit
+ (* Alternatively, you can set the arguments to use. This overrides any
+ * previously parsed set of arguments, and also any following parsing.
+ * - Intended for debugging, and to make it possible to replace the
+ * CGI parser by a different one while retaining this API.
+ *)
+
+val default_config : config
+ (* maximum_content_length = maxint
+ * how_to_process_arguments = "use always Memory"
+ * tmp_directory = "/var/tmp"
+ * tmp_prefix = "cgi"
+ * workarounds = [ Work_around_MSIE_content_type_bug;
+ * Work_around_backslash_bug;
+ * ]
+ *
+ * Note 1: On some Unixes, a special file system is used for /tmp that
+ * stores the files into the virtual memory (main memory or swap area).
+ * Because of this, /var/tmp is preferred as default.
+ *
+ * Note 2: Filename.temp_file is not used because it depends on
+ * environment variables which are usually not set in a CGI environment.
+ *)
+
+val arg_name : argument -> string
+val arg_value : argument -> string
+val arg_file : argument -> string option
+val arg_mimetype : argument -> string
+val arg_filename : argument -> string option
+val arg_header : argument -> (string * string) list
+ (* The accessor functions that return several aspects of arguments.
+ * arg_name: returns the name of the argument
+ * arg_value: returns the value of the argument. If the value is stored
+ * in a temporary file, the contents of this file are returned, i.e.
+ * the file is loaded. This may have some consequences:
+ * (1) The function may fail because of I/O errors.
+ * (2) The function may be very slow, especially if the file is
+ * non-local.
+ * (3) If the value is bigger than Sys.max_string_length, the function
+ * raises the exception Resources_exceeded. On 32 bit architectures,
+ * strings are limited to 16 MB.
+ * Note that loaded values are put into weak arrays. This makes it
+ * possible that subsequent calls of 'arg_value' on the same argument
+ * can avoid loading the value again, and that unused values will
+ * nevertheless be collected by the GC.
+ * arg_file: returns 'Some filename' if the value resides in a temporary
+ * file, and 'filename' is the absolute path of this file. If the
+ * value is only available in memory, None is returned.
+ * arg_mimetype: returns the MIME type of the argument. Note that the
+ * default MIME type is "text/plain", and that the default is returned
+ * if the MIME type is not available.
+ * arg_filename: returns 'Some filename' if the argument is associated
+ * with a certain filename (e.g. from a file upload); otherwise None
+ * arg_header: returns pairs (name,value) containing the complete header
+ * of the argument. If the transmission protocol does not specify
+ * a header, the empty list is passed back.
+ *)
+
+val mk_simple_arg : name:string -> string -> argument
+ (* mk_simple_arg name value:
+ * Creates a simple argument with only name, and a value passed by string.
+ * The MIME type is "text/plain".
+ *)
+
+val mk_memory_arg
+ : name:string -> ?mime:string -> ?filename:string ->
+ ?header:((string * string) list) -> string -> argument
+ (* mk_memory_arg name mimetype filename header value:
+ * Creates an argument whose value is kept in memory.
+ *
+ * Note: The signature of this function changed in release 0.8.
+ *)
+
+val mk_file_arg
+ : name:string -> ?mime:string -> ?filename:string ->
+ ?header:((string * string) list) -> string -> argument
+ (* mk_file_arg name mimetype filename header value_filename:
+ * Creates an argument whose value is stored in the file
+ * 'value_filename'. If this file name is not absolute, it is interpreted
+ * relative to the directory returned by Sys.getcwd() - this might not
+ * be what you want with respect to mount points and symlinks (and it
+ * depends on the operating system as getcwd is only POSIX.1). The
+ * file name is turned into an absolute name immediately, and the
+ * function arg_file returns the rewritten name.
+ *
+ * Note: The signature of this function changed in release 0.8.
+ *)
+
+
+val cleanup : unit -> unit
+ (* Removes all temporary files that occur in the current set of arguments
+ * (as returned by 'arguments()').
+ *)
+
+
+(* Convenience functions: *)
+
+val argument : string -> argument
+ (* let argument name = List.assoc name (arguments()) -- i.e. returns
+ * the argument with the passed name. Of course, this function expects
+ * that 'parse_arguments' was called before.
+ *)
+
+val argument_value : string -> string
+ (* let argument_value name = arg_value(argument name) -- i.e. returns
+ * the value of the argument.
+ * See also Operators.( !$ ) below.
+ *)
+
+(* For toploop printers: *)
+
+val print_argument : argument -> unit
+
+
+(* Now, the compatibility functions. *)
+
+val parse_args : unit -> (string * string) list
+ (* Decodes the arguments of the CGI and returns them as an association list
+ * Works whatever the method is (GET or POST)
+ *)
+
+val parse_args_with_mimetypes : unit -> (string * string * string) list
+ (* Like parse_args, but returns also the MIME type.
+ * The triples contain (name, mime_type, value).
+ * If an encoding was chosen that does not transfer the MIME type,
+ * "text/plain" is returned.
+ *
+ * THIS FUNCTION SHOULD BE CONSIDERED AS DEPRECATED.
+ * It was included in netstring-0.4, but most people want not only
+ * the MIME type. parse_arguments should be used instead.
+ *)
+
+val header : string -> unit
+ (* Prints the content-type header.
+ * the argument is the MIME type (default value is "text/html" if the
+ * argument is the empty string)
+ *)
+
+val this_url : unit -> string
+ (* Returns the address of the CGI *)
+
+(**********************************************************************)
+(* The Operators module *)
+(**********************************************************************)
+
+(* If you open the Operators module, you can write
+ * !% "name" instead of argument "name", and
+ * !$ "name" instead of argument_value "name"
+ *)
+
+module Operators : sig
+ val ( !% ) : string -> argument
+ (* same as 'argument' above *)
+ val ( !$ ) : string -> string
+ (* same as 'argument_value' above *)
+end
+
+(**********************************************************************)
+(* Low-level functions *)
+(**********************************************************************)
+
+(* Encoding/Decoding within URLs:
+ *
+ * The following two functions perform the '%'-substitution for
+ * characters that may otherwise be interpreted as metacharacters.
+ *
+ * See also the Netencoding module. This interface contains these functions
+ * to keep the compatibility with the old Cgi module.
+ *)
+
+val decode : string -> string
+val encode : string -> string
+
+(* URL-encoded parameters:
+ *
+ * The following two functions create and analyze URL-encoded parameters.
+ * Format: name1=val1&name2=val2&...
+ *)
+
+val mk_url_encoded_parameters : (string * string) list -> string
+ (* The argument is a list of (name,value) pairs. The result is the
+ * single URL-encoded parameter string.
+ *)
+
+val dest_url_encoded_parameters : string -> (string * string) list
+ (* The argument is the URL-encoded parameter string. The result is
+ * the corresponding list of (name,value) pairs.
+ * Note: Whitespace within the parameter string is ignored.
+ * If there is a format error, the function fails.
+ *)
+
+(* Form-encoded parameters:
+ *
+ * According to: RFCs 2388, 2183, 2045, 2046
+ *
+ * General note: This is a simple API to encode/decode form-encoded parameters.
+ * Especially, it is not possible to pass the header of the parts through
+ * this API.
+ *)
+
+val mk_form_encoded_parameters : (string * string * string) list ->
+ (string * string)
+ (* The argument is a list of (name,mimetype,value) triples.
+ * The result is (parstr, boundary), where 'parstr' is the
+ * single form-encoded parameter string, and 'boundary' is the
+ * boundary to separate the message parts.
+ *
+ * THIS FUNCTION IS CURRENTLY NOT IMPLEMENTED!
+ *)
+
+val dest_form_encoded_parameters : string -> boundary:string -> config ->
+ argument list
+ (* The first argument is the form-encoded parameter string.
+ * The second argument is the boundary (extracted from the mime type).
+ * Third argument: Only the workarounds component is used.
+ * The result is
+ * the corresponding list of arguments (all in memory).
+ * If there is a format error, the function fails.
+ * Note: embedded multipart/mixed types are returned as they are,
+ * and are not recursively decoded.
+ * Note: The content-transfer-encodings "7bit", "8bit", "binary",
+ * "base64", and "quoted-printable" are supported.
+ * Note: Parameter names which include spaces or non-alphanumeric
+ * characters may be problematic (the rules of RFC 2047 are NOT applied).
+ * Note: The returned MIME type is not normalized.
+ *)
+
+val dest_form_encoded_parameters_from_netstream
+ : Netstream.t -> boundary:string -> config -> argument list
+ (* let arglist = dest_form_encoded_parameters_from_netstream s b c:
+ * Reads the form-encoded parameters from netstream s. The boundary
+ * is passed in b, and the configuration in c.
+ * A list of arguments is returned.
+ *
+ * See also dest_form_encoded_parameters.
+ *
+ * Restriction: In contrast to dest_form_encoded_parameters, this
+ * function is not able to handle the content-transfer-encodings
+ * "base64" and "quoted-printable". (This is not really a restriction
+ * because no browser uses these encodings in conjunction with HTTP.
+ * This is different if mail transport is chosen. - The reason for
+ * this restriction is that there are currently no stream functions
+ * for decoding.)
+ *)
+
+(* Private functions: *)
+
+val init_mt : (unit -> unit) -> (unit -> unit) -> unit
+
+
+(**********************************************************************)
+(* Compatibility with CGI library by J.-C. Filliatre *)
+(**********************************************************************)
+
+(* The following functions are compatible with J.-C. Filliatre's CGI
+ * library:
+ *
+ * parse_args, header, this_url, decode, encode.
+ *
+ * Note that the new implementation of parse_args can be safely invoked
+ * several times.
+ *
+ * Since release 0.8, Netstring's CGI implementation is again thread-safe.
+ *)
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.8 2000/06/25 22:34:43 gerd
+ * Added labels to arguments.
+ *
+ * Revision 1.7 2000/06/25 21:40:36 gerd
+ * Added printer.
+ *
+ * Revision 1.6 2000/06/25 21:15:48 gerd
+ * Checked thread-safety.
+ *
+ * Revision 1.5 2000/05/16 22:28:13 gerd
+ * New "workarounds" config component.
+ *
+ * Revision 1.4 2000/04/15 16:47:27 gerd
+ * Last minor changes before releasing 0.6.
+ *
+ * Revision 1.3 2000/04/15 13:09:01 gerd
+ * Implemented uploads to temporary files.
+ *
+ * Revision 1.2 2000/03/02 01:15:30 gerd
+ * Updated.
+ *
+ * Revision 1.1 2000/02/25 15:21:12 gerd
+ * Initial revision.
+ *
+ *
+ *)