X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2FDEVEL%2Fpxp%2Fnetstring%2Fcgi.mli;fp=helm%2FDEVEL%2Fpxp%2Fnetstring%2Fcgi.mli;h=8aea499d833609e8fb11f4a68896b99540b7924a;hb=c03d2c1fdab8d228cb88aaba5ca0f556318bebc5;hp=0000000000000000000000000000000000000000;hpb=758057e85325f94cd88583feb1fdf6b038e35055;p=helm.git diff --git a/helm/DEVEL/pxp/netstring/cgi.mli b/helm/DEVEL/pxp/netstring/cgi.mli new file mode 100644 index 000000000..8aea499d8 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/cgi.mli @@ -0,0 +1,419 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + +(* FOR SIMPLE CGI PROGRAMS: + * + * If you do not need all the features of the API below, the following may + * be enough: + * + * - At the beginning of the main program, call 'parse_argument' with + * either 'default_config' as argument or with a customized configuration. + * - Use 'argument_value(name)' to get the string value of the CGI parameter + * 'name'. If you like, you can also open the Cgi.Operators module and + * write '!$ name' instead. Here, !$ is a prefix operator equivalent to + * argument_value. + * + * If you do not change the default configuration, you do not need to + * worry about temporary files - there are not any. + * + * Most of the other functions defined below deal with file uploads, and + * are only useful for that. + *) + + +(**********************************************************************) +(* CGI functions *) +(**********************************************************************) + +(* First, the general interface to the CGI argument parser. *) + +exception Resources_exceeded + +type argument + +type argument_processing = + Memory (* Keep the value of the argument in memory *) + | File (* Store the value of the argument into a temporary file *) + | Automatic (* Store only large arguments into files. An argument + * value is large if it is longer than about one block (4K). + * This is not an exact definition. + *) + +type workaround = + Work_around_MSIE_Content_type_bug + (* There is a bug in MSIE I observed together with SSL connections. + * The CONTENT_TYPE passed to the server has sometimes the wrong + * format. This option enables a workaround if the user agent string + * contains the word "MSIE". + *) + | Work_around_backslash_bug + (* There is a bug in many browsers: The backslash character is not + * handled as an escaping character in MIME headers. Because DOS- + * based systems use the backslash regularly in filenames, this bug + * matters. + * This option changes the interpretation of backslashes such that + * these are handled as normal characters. I do not know any browser + * that is not affected by this bug, so there is no check on + * the user agent string. + *) + + +type config = + { maximum_content_length : int; + (* The maximum CONTENT_LENGTH. Bigger requests trigger an + * Resources_exceeded exception. This feature can be used + * to detect primitive denial-of-service attacks. + *) + how_to_process_arguments : argument -> argument_processing; + (* After the beginning of an argument has been decoded, the + * type of processing is decided by invoking this function on + * the argument. Note that the passed argument is incomplete - + * it does not have a value. You can assume that name, filename, + * MIME type and the whole header are already known. + * - THIS CONFIGURATION PARAMETER ONLY AFFECTS ARGUMENTS + * "POST"ED FROM THE CLIENT IN FORM-ENCODED REPRESENTATION. + * All other transport methods can only handle the Memory + * processing type. + *) + tmp_directory : string; + (* The temporary directory to use for the temporary files. *) + tmp_prefix : string; + (* A prefix for temporary files. It is recommended that the prefix + * contains a part that is random or that depends on rapidly changing + * environment properties. For example, the process ID is a good + * candidate, or the current system time. It is not required that + * the prefix is unique; there is a fail-safe algorithm that + * computes a unique file name from the prefix, even if several + * CGI programs run concurrently. + *) + workarounds : workaround list; + (* Specifies which workarounds should be enabled. *) + } + +val parse_arguments : config -> unit +val arguments : unit -> (string * argument) list + (* - let () = parse_arguments config: + * Decodes the CGI arguments. 'config' specifies limits and processing + * hints; you can simply pass default_config (see below). + * + * - let arglist = get_arguments(): + * The function returns a list with (name, arg) pairs. The name is + * passed back as string while the value is returned as opaque type + * 'argument'. Below accessor functions are defined. These functions + * require that parse_arguments was invoked before. + * + * Note 1: You can invoke 'parse_arguments' several times, but only + * the first time the arguments are read in. If you call the function + * again, it does nothing (even if the config changes). This is also + * true if 'parse_arguments' has been invoked after 'set_arguments'. + * + * Note 2: It is not guaranteed that stdin has been read until EOF. + * Only CONTENT_LENGTH bytes are read from stdin (following the CGI spec). + * + * Note 3: If arguments are processed in File or Automatic mode, the + * caller of 'parse_arguments' is responsible for deleting the files + * after use. You may consider to apply the at_exit function of the + * core library for this purpose. See also 'cleanup' below. + *) + +val set_arguments : argument list -> unit + (* Alternatively, you can set the arguments to use. This overrides any + * previously parsed set of arguments, and also any following parsing. + * - Intended for debugging, and to make it possible to replace the + * CGI parser by a different one while retaining this API. + *) + +val default_config : config + (* maximum_content_length = maxint + * how_to_process_arguments = "use always Memory" + * tmp_directory = "/var/tmp" + * tmp_prefix = "cgi" + * workarounds = [ Work_around_MSIE_content_type_bug; + * Work_around_backslash_bug; + * ] + * + * Note 1: On some Unixes, a special file system is used for /tmp that + * stores the files into the virtual memory (main memory or swap area). + * Because of this, /var/tmp is preferred as default. + * + * Note 2: Filename.temp_file is not used because it depends on + * environment variables which are usually not set in a CGI environment. + *) + +val arg_name : argument -> string +val arg_value : argument -> string +val arg_file : argument -> string option +val arg_mimetype : argument -> string +val arg_filename : argument -> string option +val arg_header : argument -> (string * string) list + (* The accessor functions that return several aspects of arguments. + * arg_name: returns the name of the argument + * arg_value: returns the value of the argument. If the value is stored + * in a temporary file, the contents of this file are returned, i.e. + * the file is loaded. This may have some consequences: + * (1) The function may fail because of I/O errors. + * (2) The function may be very slow, especially if the file is + * non-local. + * (3) If the value is bigger than Sys.max_string_length, the function + * raises the exception Resources_exceeded. On 32 bit architectures, + * strings are limited to 16 MB. + * Note that loaded values are put into weak arrays. This makes it + * possible that subsequent calls of 'arg_value' on the same argument + * can avoid loading the value again, and that unused values will + * nevertheless be collected by the GC. + * arg_file: returns 'Some filename' if the value resides in a temporary + * file, and 'filename' is the absolute path of this file. If the + * value is only available in memory, None is returned. + * arg_mimetype: returns the MIME type of the argument. Note that the + * default MIME type is "text/plain", and that the default is returned + * if the MIME type is not available. + * arg_filename: returns 'Some filename' if the argument is associated + * with a certain filename (e.g. from a file upload); otherwise None + * arg_header: returns pairs (name,value) containing the complete header + * of the argument. If the transmission protocol does not specify + * a header, the empty list is passed back. + *) + +val mk_simple_arg : name:string -> string -> argument + (* mk_simple_arg name value: + * Creates a simple argument with only name, and a value passed by string. + * The MIME type is "text/plain". + *) + +val mk_memory_arg + : name:string -> ?mime:string -> ?filename:string -> + ?header:((string * string) list) -> string -> argument + (* mk_memory_arg name mimetype filename header value: + * Creates an argument whose value is kept in memory. + * + * Note: The signature of this function changed in release 0.8. + *) + +val mk_file_arg + : name:string -> ?mime:string -> ?filename:string -> + ?header:((string * string) list) -> string -> argument + (* mk_file_arg name mimetype filename header value_filename: + * Creates an argument whose value is stored in the file + * 'value_filename'. If this file name is not absolute, it is interpreted + * relative to the directory returned by Sys.getcwd() - this might not + * be what you want with respect to mount points and symlinks (and it + * depends on the operating system as getcwd is only POSIX.1). The + * file name is turned into an absolute name immediately, and the + * function arg_file returns the rewritten name. + * + * Note: The signature of this function changed in release 0.8. + *) + + +val cleanup : unit -> unit + (* Removes all temporary files that occur in the current set of arguments + * (as returned by 'arguments()'). + *) + + +(* Convenience functions: *) + +val argument : string -> argument + (* let argument name = List.assoc name (arguments()) -- i.e. returns + * the argument with the passed name. Of course, this function expects + * that 'parse_arguments' was called before. + *) + +val argument_value : string -> string + (* let argument_value name = arg_value(argument name) -- i.e. returns + * the value of the argument. + * See also Operators.( !$ ) below. + *) + +(* For toploop printers: *) + +val print_argument : argument -> unit + + +(* Now, the compatibility functions. *) + +val parse_args : unit -> (string * string) list + (* Decodes the arguments of the CGI and returns them as an association list + * Works whatever the method is (GET or POST) + *) + +val parse_args_with_mimetypes : unit -> (string * string * string) list + (* Like parse_args, but returns also the MIME type. + * The triples contain (name, mime_type, value). + * If an encoding was chosen that does not transfer the MIME type, + * "text/plain" is returned. + * + * THIS FUNCTION SHOULD BE CONSIDERED AS DEPRECATED. + * It was included in netstring-0.4, but most people want not only + * the MIME type. parse_arguments should be used instead. + *) + +val header : string -> unit + (* Prints the content-type header. + * the argument is the MIME type (default value is "text/html" if the + * argument is the empty string) + *) + +val this_url : unit -> string + (* Returns the address of the CGI *) + +(**********************************************************************) +(* The Operators module *) +(**********************************************************************) + +(* If you open the Operators module, you can write + * !% "name" instead of argument "name", and + * !$ "name" instead of argument_value "name" + *) + +module Operators : sig + val ( !% ) : string -> argument + (* same as 'argument' above *) + val ( !$ ) : string -> string + (* same as 'argument_value' above *) +end + +(**********************************************************************) +(* Low-level functions *) +(**********************************************************************) + +(* Encoding/Decoding within URLs: + * + * The following two functions perform the '%'-substitution for + * characters that may otherwise be interpreted as metacharacters. + * + * See also the Netencoding module. This interface contains these functions + * to keep the compatibility with the old Cgi module. + *) + +val decode : string -> string +val encode : string -> string + +(* URL-encoded parameters: + * + * The following two functions create and analyze URL-encoded parameters. + * Format: name1=val1&name2=val2&... + *) + +val mk_url_encoded_parameters : (string * string) list -> string + (* The argument is a list of (name,value) pairs. The result is the + * single URL-encoded parameter string. + *) + +val dest_url_encoded_parameters : string -> (string * string) list + (* The argument is the URL-encoded parameter string. The result is + * the corresponding list of (name,value) pairs. + * Note: Whitespace within the parameter string is ignored. + * If there is a format error, the function fails. + *) + +(* Form-encoded parameters: + * + * According to: RFCs 2388, 2183, 2045, 2046 + * + * General note: This is a simple API to encode/decode form-encoded parameters. + * Especially, it is not possible to pass the header of the parts through + * this API. + *) + +val mk_form_encoded_parameters : (string * string * string) list -> + (string * string) + (* The argument is a list of (name,mimetype,value) triples. + * The result is (parstr, boundary), where 'parstr' is the + * single form-encoded parameter string, and 'boundary' is the + * boundary to separate the message parts. + * + * THIS FUNCTION IS CURRENTLY NOT IMPLEMENTED! + *) + +val dest_form_encoded_parameters : string -> boundary:string -> config -> + argument list + (* The first argument is the form-encoded parameter string. + * The second argument is the boundary (extracted from the mime type). + * Third argument: Only the workarounds component is used. + * The result is + * the corresponding list of arguments (all in memory). + * If there is a format error, the function fails. + * Note: embedded multipart/mixed types are returned as they are, + * and are not recursively decoded. + * Note: The content-transfer-encodings "7bit", "8bit", "binary", + * "base64", and "quoted-printable" are supported. + * Note: Parameter names which include spaces or non-alphanumeric + * characters may be problematic (the rules of RFC 2047 are NOT applied). + * Note: The returned MIME type is not normalized. + *) + +val dest_form_encoded_parameters_from_netstream + : Netstream.t -> boundary:string -> config -> argument list + (* let arglist = dest_form_encoded_parameters_from_netstream s b c: + * Reads the form-encoded parameters from netstream s. The boundary + * is passed in b, and the configuration in c. + * A list of arguments is returned. + * + * See also dest_form_encoded_parameters. + * + * Restriction: In contrast to dest_form_encoded_parameters, this + * function is not able to handle the content-transfer-encodings + * "base64" and "quoted-printable". (This is not really a restriction + * because no browser uses these encodings in conjunction with HTTP. + * This is different if mail transport is chosen. - The reason for + * this restriction is that there are currently no stream functions + * for decoding.) + *) + +(* Private functions: *) + +val init_mt : (unit -> unit) -> (unit -> unit) -> unit + + +(**********************************************************************) +(* Compatibility with CGI library by J.-C. Filliatre *) +(**********************************************************************) + +(* The following functions are compatible with J.-C. Filliatre's CGI + * library: + * + * parse_args, header, this_url, decode, encode. + * + * Note that the new implementation of parse_args can be safely invoked + * several times. + * + * Since release 0.8, Netstring's CGI implementation is again thread-safe. + *) + + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:27 lpadovan + * Initial revision + * + * Revision 1.8 2000/06/25 22:34:43 gerd + * Added labels to arguments. + * + * Revision 1.7 2000/06/25 21:40:36 gerd + * Added printer. + * + * Revision 1.6 2000/06/25 21:15:48 gerd + * Checked thread-safety. + * + * Revision 1.5 2000/05/16 22:28:13 gerd + * New "workarounds" config component. + * + * Revision 1.4 2000/04/15 16:47:27 gerd + * Last minor changes before releasing 0.6. + * + * Revision 1.3 2000/04/15 13:09:01 gerd + * Implemented uploads to temporary files. + * + * Revision 1.2 2000/03/02 01:15:30 gerd + * Updated. + * + * Revision 1.1 2000/02/25 15:21:12 gerd + * Initial revision. + * + * + *)