2 * ----------------------------------------------------------------------
6 (* FOR SIMPLE CGI PROGRAMS:
8 * If you do not need all the features of the API below, the following may
11 * - At the beginning of the main program, call 'parse_argument' with
12 * either 'default_config' as argument or with a customized configuration.
13 * - Use 'argument_value(name)' to get the string value of the CGI parameter
14 * 'name'. If you like, you can also open the Cgi.Operators module and
15 * write '!$ name' instead. Here, !$ is a prefix operator equivalent to
18 * If you do not change the default configuration, you do not need to
19 * worry about temporary files - there are not any.
21 * Most of the other functions defined below deal with file uploads, and
22 * are only useful for that.
26 (**********************************************************************)
28 (**********************************************************************)
30 (* First, the general interface to the CGI argument parser. *)
32 exception Resources_exceeded
36 type argument_processing =
37 Memory (* Keep the value of the argument in memory *)
38 | File (* Store the value of the argument into a temporary file *)
39 | Automatic (* Store only large arguments into files. An argument
40 * value is large if it is longer than about one block (4K).
41 * This is not an exact definition.
45 Work_around_MSIE_Content_type_bug
46 (* There is a bug in MSIE I observed together with SSL connections.
47 * The CONTENT_TYPE passed to the server has sometimes the wrong
48 * format. This option enables a workaround if the user agent string
49 * contains the word "MSIE".
51 | Work_around_backslash_bug
52 (* There is a bug in many browsers: The backslash character is not
53 * handled as an escaping character in MIME headers. Because DOS-
54 * based systems use the backslash regularly in filenames, this bug
56 * This option changes the interpretation of backslashes such that
57 * these are handled as normal characters. I do not know any browser
58 * that is not affected by this bug, so there is no check on
59 * the user agent string.
64 { maximum_content_length : int;
65 (* The maximum CONTENT_LENGTH. Bigger requests trigger an
66 * Resources_exceeded exception. This feature can be used
67 * to detect primitive denial-of-service attacks.
69 how_to_process_arguments : argument -> argument_processing;
70 (* After the beginning of an argument has been decoded, the
71 * type of processing is decided by invoking this function on
72 * the argument. Note that the passed argument is incomplete -
73 * it does not have a value. You can assume that name, filename,
74 * MIME type and the whole header are already known.
75 * - THIS CONFIGURATION PARAMETER ONLY AFFECTS ARGUMENTS
76 * "POST"ED FROM THE CLIENT IN FORM-ENCODED REPRESENTATION.
77 * All other transport methods can only handle the Memory
80 tmp_directory : string;
81 (* The temporary directory to use for the temporary files. *)
83 (* A prefix for temporary files. It is recommended that the prefix
84 * contains a part that is random or that depends on rapidly changing
85 * environment properties. For example, the process ID is a good
86 * candidate, or the current system time. It is not required that
87 * the prefix is unique; there is a fail-safe algorithm that
88 * computes a unique file name from the prefix, even if several
89 * CGI programs run concurrently.
91 workarounds : workaround list;
92 (* Specifies which workarounds should be enabled. *)
95 val parse_arguments : config -> unit
96 val arguments : unit -> (string * argument) list
97 (* - let () = parse_arguments config:
98 * Decodes the CGI arguments. 'config' specifies limits and processing
99 * hints; you can simply pass default_config (see below).
101 * - let arglist = get_arguments():
102 * The function returns a list with (name, arg) pairs. The name is
103 * passed back as string while the value is returned as opaque type
104 * 'argument'. Below accessor functions are defined. These functions
105 * require that parse_arguments was invoked before.
107 * Note 1: You can invoke 'parse_arguments' several times, but only
108 * the first time the arguments are read in. If you call the function
109 * again, it does nothing (even if the config changes). This is also
110 * true if 'parse_arguments' has been invoked after 'set_arguments'.
112 * Note 2: It is not guaranteed that stdin has been read until EOF.
113 * Only CONTENT_LENGTH bytes are read from stdin (following the CGI spec).
115 * Note 3: If arguments are processed in File or Automatic mode, the
116 * caller of 'parse_arguments' is responsible for deleting the files
117 * after use. You may consider to apply the at_exit function of the
118 * core library for this purpose. See also 'cleanup' below.
121 val set_arguments : argument list -> unit
122 (* Alternatively, you can set the arguments to use. This overrides any
123 * previously parsed set of arguments, and also any following parsing.
124 * - Intended for debugging, and to make it possible to replace the
125 * CGI parser by a different one while retaining this API.
128 val default_config : config
129 (* maximum_content_length = maxint
130 * how_to_process_arguments = "use always Memory"
131 * tmp_directory = "/var/tmp"
133 * workarounds = [ Work_around_MSIE_content_type_bug;
134 * Work_around_backslash_bug;
137 * Note 1: On some Unixes, a special file system is used for /tmp that
138 * stores the files into the virtual memory (main memory or swap area).
139 * Because of this, /var/tmp is preferred as default.
141 * Note 2: Filename.temp_file is not used because it depends on
142 * environment variables which are usually not set in a CGI environment.
145 val arg_name : argument -> string
146 val arg_value : argument -> string
147 val arg_file : argument -> string option
148 val arg_mimetype : argument -> string
149 val arg_filename : argument -> string option
150 val arg_header : argument -> (string * string) list
151 (* The accessor functions that return several aspects of arguments.
152 * arg_name: returns the name of the argument
153 * arg_value: returns the value of the argument. If the value is stored
154 * in a temporary file, the contents of this file are returned, i.e.
155 * the file is loaded. This may have some consequences:
156 * (1) The function may fail because of I/O errors.
157 * (2) The function may be very slow, especially if the file is
159 * (3) If the value is bigger than Sys.max_string_length, the function
160 * raises the exception Resources_exceeded. On 32 bit architectures,
161 * strings are limited to 16 MB.
162 * Note that loaded values are put into weak arrays. This makes it
163 * possible that subsequent calls of 'arg_value' on the same argument
164 * can avoid loading the value again, and that unused values will
165 * nevertheless be collected by the GC.
166 * arg_file: returns 'Some filename' if the value resides in a temporary
167 * file, and 'filename' is the absolute path of this file. If the
168 * value is only available in memory, None is returned.
169 * arg_mimetype: returns the MIME type of the argument. Note that the
170 * default MIME type is "text/plain", and that the default is returned
171 * if the MIME type is not available.
172 * arg_filename: returns 'Some filename' if the argument is associated
173 * with a certain filename (e.g. from a file upload); otherwise None
174 * arg_header: returns pairs (name,value) containing the complete header
175 * of the argument. If the transmission protocol does not specify
176 * a header, the empty list is passed back.
179 val mk_simple_arg : name:string -> string -> argument
180 (* mk_simple_arg name value:
181 * Creates a simple argument with only name, and a value passed by string.
182 * The MIME type is "text/plain".
186 : name:string -> ?mime:string -> ?filename:string ->
187 ?header:((string * string) list) -> string -> argument
188 (* mk_memory_arg name mimetype filename header value:
189 * Creates an argument whose value is kept in memory.
191 * Note: The signature of this function changed in release 0.8.
195 : name:string -> ?mime:string -> ?filename:string ->
196 ?header:((string * string) list) -> string -> argument
197 (* mk_file_arg name mimetype filename header value_filename:
198 * Creates an argument whose value is stored in the file
199 * 'value_filename'. If this file name is not absolute, it is interpreted
200 * relative to the directory returned by Sys.getcwd() - this might not
201 * be what you want with respect to mount points and symlinks (and it
202 * depends on the operating system as getcwd is only POSIX.1). The
203 * file name is turned into an absolute name immediately, and the
204 * function arg_file returns the rewritten name.
206 * Note: The signature of this function changed in release 0.8.
210 val cleanup : unit -> unit
211 (* Removes all temporary files that occur in the current set of arguments
212 * (as returned by 'arguments()').
216 (* Convenience functions: *)
218 val argument : string -> argument
219 (* let argument name = List.assoc name (arguments()) -- i.e. returns
220 * the argument with the passed name. Of course, this function expects
221 * that 'parse_arguments' was called before.
224 val argument_value : string -> string
225 (* let argument_value name = arg_value(argument name) -- i.e. returns
226 * the value of the argument.
227 * See also Operators.( !$ ) below.
230 (* For toploop printers: *)
232 val print_argument : argument -> unit
235 (* Now, the compatibility functions. *)
237 val parse_args : unit -> (string * string) list
238 (* Decodes the arguments of the CGI and returns them as an association list
239 * Works whatever the method is (GET or POST)
242 val parse_args_with_mimetypes : unit -> (string * string * string) list
243 (* Like parse_args, but returns also the MIME type.
244 * The triples contain (name, mime_type, value).
245 * If an encoding was chosen that does not transfer the MIME type,
246 * "text/plain" is returned.
248 * THIS FUNCTION SHOULD BE CONSIDERED AS DEPRECATED.
249 * It was included in netstring-0.4, but most people want not only
250 * the MIME type. parse_arguments should be used instead.
253 val header : string -> unit
254 (* Prints the content-type header.
255 * the argument is the MIME type (default value is "text/html" if the
256 * argument is the empty string)
259 val this_url : unit -> string
260 (* Returns the address of the CGI *)
262 (**********************************************************************)
263 (* The Operators module *)
264 (**********************************************************************)
266 (* If you open the Operators module, you can write
267 * !% "name" instead of argument "name", and
268 * !$ "name" instead of argument_value "name"
271 module Operators : sig
272 val ( !% ) : string -> argument
273 (* same as 'argument' above *)
274 val ( !$ ) : string -> string
275 (* same as 'argument_value' above *)
278 (**********************************************************************)
279 (* Low-level functions *)
280 (**********************************************************************)
282 (* Encoding/Decoding within URLs:
284 * The following two functions perform the '%'-substitution for
285 * characters that may otherwise be interpreted as metacharacters.
287 * See also the Netencoding module. This interface contains these functions
288 * to keep the compatibility with the old Cgi module.
291 val decode : string -> string
292 val encode : string -> string
294 (* URL-encoded parameters:
296 * The following two functions create and analyze URL-encoded parameters.
297 * Format: name1=val1&name2=val2&...
300 val mk_url_encoded_parameters : (string * string) list -> string
301 (* The argument is a list of (name,value) pairs. The result is the
302 * single URL-encoded parameter string.
305 val dest_url_encoded_parameters : string -> (string * string) list
306 (* The argument is the URL-encoded parameter string. The result is
307 * the corresponding list of (name,value) pairs.
308 * Note: Whitespace within the parameter string is ignored.
309 * If there is a format error, the function fails.
312 (* Form-encoded parameters:
314 * According to: RFCs 2388, 2183, 2045, 2046
316 * General note: This is a simple API to encode/decode form-encoded parameters.
317 * Especially, it is not possible to pass the header of the parts through
321 val mk_form_encoded_parameters : (string * string * string) list ->
323 (* The argument is a list of (name,mimetype,value) triples.
324 * The result is (parstr, boundary), where 'parstr' is the
325 * single form-encoded parameter string, and 'boundary' is the
326 * boundary to separate the message parts.
328 * THIS FUNCTION IS CURRENTLY NOT IMPLEMENTED!
331 val dest_form_encoded_parameters : string -> boundary:string -> config ->
333 (* The first argument is the form-encoded parameter string.
334 * The second argument is the boundary (extracted from the mime type).
335 * Third argument: Only the workarounds component is used.
337 * the corresponding list of arguments (all in memory).
338 * If there is a format error, the function fails.
339 * Note: embedded multipart/mixed types are returned as they are,
340 * and are not recursively decoded.
341 * Note: The content-transfer-encodings "7bit", "8bit", "binary",
342 * "base64", and "quoted-printable" are supported.
343 * Note: Parameter names which include spaces or non-alphanumeric
344 * characters may be problematic (the rules of RFC 2047 are NOT applied).
345 * Note: The returned MIME type is not normalized.
348 val dest_form_encoded_parameters_from_netstream
349 : Netstream.t -> boundary:string -> config -> argument list
350 (* let arglist = dest_form_encoded_parameters_from_netstream s b c:
351 * Reads the form-encoded parameters from netstream s. The boundary
352 * is passed in b, and the configuration in c.
353 * A list of arguments is returned.
355 * See also dest_form_encoded_parameters.
357 * Restriction: In contrast to dest_form_encoded_parameters, this
358 * function is not able to handle the content-transfer-encodings
359 * "base64" and "quoted-printable". (This is not really a restriction
360 * because no browser uses these encodings in conjunction with HTTP.
361 * This is different if mail transport is chosen. - The reason for
362 * this restriction is that there are currently no stream functions
366 (* Private functions: *)
368 val init_mt : (unit -> unit) -> (unit -> unit) -> unit
371 (**********************************************************************)
372 (* Compatibility with CGI library by J.-C. Filliatre *)
373 (**********************************************************************)
375 (* The following functions are compatible with J.-C. Filliatre's CGI
378 * parse_args, header, this_url, decode, encode.
380 * Note that the new implementation of parse_args can be safely invoked
383 * Since release 0.8, Netstring's CGI implementation is again thread-safe.
387 (* ======================================================================
391 * Revision 1.1 2000/11/17 09:57:27 lpadovan
394 * Revision 1.8 2000/06/25 22:34:43 gerd
395 * Added labels to arguments.
397 * Revision 1.7 2000/06/25 21:40:36 gerd
400 * Revision 1.6 2000/06/25 21:15:48 gerd
401 * Checked thread-safety.
403 * Revision 1.5 2000/05/16 22:28:13 gerd
404 * New "workarounds" config component.
406 * Revision 1.4 2000/04/15 16:47:27 gerd
407 * Last minor changes before releasing 0.6.
409 * Revision 1.3 2000/04/15 13:09:01 gerd
410 * Implemented uploads to temporary files.
412 * Revision 1.2 2000/03/02 01:15:30 gerd
415 * Revision 1.1 2000/02/25 15:21:12 gerd