+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-(* This module applies already O'Caml-3 features. *)
-
-(* Uniform Resource Locators (URLs):
- *
- * This module provides functions to parse URLs, to print URLs, to
- * store URLs, to modify URLs, and to apply relative URLs.
- *
- * URLs are strings formed according to pattern (1) or (2):
- *
- * (1) scheme://user:password@host:port/path;params?query#fragment
- * (2) scheme:other;params?query#fragment
- *
- * The word at the beginning of the URL identifies the URL scheme
- * (such as "http" or "file"). Depending on the scheme, not all of the
- * parts are allowed, or parts may be omitted. This module defines the
- * type 'url_syntax' whose values describe which parts are allowed/required/
- * not allowed for a concrete URL scheme (see below).
- *
- * Not all characters are allowed in a URL. Some characters are allowed,
- * but have the special task to separate the various parts of the URL
- * (reserved characters).
- * However, it is possible to include even invalid or reserved characters
- * as normal content by applying the '%'-encoding on these characters:
- * A '%' indicates that an encoded character follows, and the character
- * is denoted by a two-digit hexadecimal number (e.g. %2f for '/').
- * In the following descriptions, the term "encoded string" means a string
- * containing such %-encoded characters, and the "decoded string" means a
- * string not containing such characters.
- * See the module Netencoding.Url for functions encoding or decoding
- * strings.
- *
- * The type 'url' describes values storing the components of a URL,
- * and the 'url_syntax' for the URL. In general, the components are
- * stored as encoded strings; however, not for all components the
- * '%'-encoding is applicable.
- * For convenience, the functions creating, modifying, and accessing
- * URLs can handle both encoded and decoded strings. In order to
- * avoid errors, the functions pass strings even in their decoded form.
- *
- * Note that there is currently no function to compare URLs. The
- * canoncical comparison ( = ) is not applicable because the same URL
- * may be written differently.
- *
- * Note that nothing is said about the character set/encoding of URLs.
- * Some protocols and standards prefer UTF-8 as fundamental encoding
- * and apply the '%'-encoding on top of it; i.e. the byte sequence
- * representing a character in UTF-8 is '%'-encoded. There is no special
- * support for this technique.
- *
- * For more information about URLs, see RFCs 1738 and 1808.
- *)
-
-exception Malformed_URL
-(* Is raised by a number of functions when encountering a badly formed
- * URL.
- *)
-
-val extract_url_scheme : string -> string
- (* Returns the URL scheme from the string representation of an URL.
- * E.g. extract_url_scheme "http://host/path" = "http".
- * The scheme name is always converted to lowercase characters.
- * Raises Malformed_URL if the scheme name is not found.
- *)
-
-type url_syntax_option =
- Url_part_not_recognized
- | Url_part_allowed
- | Url_part_required
-
-
-type url_syntax =
- { url_enable_scheme : url_syntax_option;
- url_enable_user : url_syntax_option;
- url_enable_password : url_syntax_option;
- url_enable_host : url_syntax_option;
- url_enable_port : url_syntax_option;
- url_enable_path : url_syntax_option;
- url_enable_param : url_syntax_option;
- url_enable_query : url_syntax_option;
- url_enable_fragment : url_syntax_option;
- url_enable_other : url_syntax_option;
- url_accepts_8bits : bool;
- url_is_valid : url -> bool;
- }
-
-and url
-;;
-
-(* Values of type 'url_syntax' describe which components of an URL are
- * recognized, which are allowed (and optional), and which are required.
- * Not all combinations are valid; the predicate expressed by the
- * function 'url_syntax_is_valid' must hold.
- * The function 'url_is_valid' is applied when a fresh URL is created
- * and must return 'true'. This function allows it to add an arbitrary
- * validity criterion to 'url_syntax'. (Note that the URL passed to
- * this function is not fully working; you can safely assume that the
- * accessor functions url_scheme etc. can be applied to it.)
- *
- * Switch 'url_accepts_8bit': If 'true', the bytes with code 128 to
- * 255 are treated like alphanumeric characters; if 'false' these bytes
- * are illegal (but it is still possible to include such byte in their
- * encoded form: %80 to %FF).
- *
- * Values of type 'url' describe concrete URLs. Every URL must have
- * a fundamental 'url_syntax', and it is only possible to create URLs
- * conforming to the syntax. See 'make_url' for further information.
- *)
-
-
-val url_syntax_is_valid : url_syntax -> bool
- (* Checks whether the passed url_syntax is valid. This means:
- *
- * - If passwords are recognized, users (and hosts) must be recognized, too
- * - If ports are recognized, hosts must be recognized, too
- * - If users are recognized, hosts must be recognized, too
- * - Either the syntax recognizes one of the phrases
- * { user, password, host, port, path }, or the syntax recognized
- * the phrase 'other'.
- *)
-
-
-val partial_url_syntax : url_syntax -> url_syntax
- (* Transforms the syntax into another syntax where all required parts are
- * changed into optional parts.
- *)
-
-
-(* Note that all following url_syntaxes do not allow 8bit bytes. *)
-
-val null_url_syntax : url_syntax
-
-val ip_url_syntax : url_syntax
- (* Maximum syntax for IP based protocols *)
-
-val common_url_syntax : (string, url_syntax) Hashtbl.t
- (* Syntax descriptions for common URL schemes:
- *
- * null_url_syntax: nothing is recognized
- *
- * common_url_syntax: Hashtable mapping from URL scheme names to
- * definitions of syntaxes:
- *
- * "file": scheme, host?, path
- * "ftp": scheme, user?, password?, host, port?, path?, param?
- * "http": scheme, user?, password?, host, port?, path?, query?
- * "mailto": scheme, other
- *
- * Notes:
- * (1) These syntax descriptions can be weakened for partial/relative URLs
- * by changing the required parts to optional parts: See the function
- * 'partial_url_syntax'.
- * (2) None of the descriptions allows fragments. These can be enabled by
- * setting 'url_enable_fragment' to Url_part_allowed. E.g.
- * { file_url_syntax with url_enable_fragment = Url_part_allowed }
- *)
-
-val null_url : url
- (* A URL without any component and 'null_url_syntax'
- *)
-
-val make_url :
- ?encoded:bool ->
- ?scheme:string ->
- ?user:string ->
- ?password:string ->
- ?host:string ->
- ?port:int ->
- ?path:string list ->
- ?param:string list ->
- ?query:string ->
- ?fragment:string ->
- ?other:string ->
- url_syntax ->
- url
- (* Creates a URL from components:
- *
- * - The components "scheme" and "host" are simple strings to which the
- * '%'-encoding is not applicable.
- * - The component "port" is a simple number. Of course, the '%'-encoding
- * is not applicable, too.
- * - The components "user", "password", "query", "fragment", and "other"
- * are strings which may contains '%'-encoded characters. By default,
- * you can pass any string for these components, and problematic characters
- * are automatically encoded. If you set ~encoded:true, the passed
- * strings must already be encoded, but the function checks whether
- * the encoding is correct.
- * Note that for "query" even the characters '?' and '=' are encoded
- * by default, so you need to set ~encoded:true to pass a reasonable
- * query string.
- * - The components "path" and "param" are lists of strings which may
- * contain '%'-encoded characters. Again, the default is to pass
- * decoded strings to the function, and the function encodes them
- * automatically, and by setting ~encoded:true the caller is responsible
- * for encoding the strings.
- * path = [] and params = [] mean that no path and no parameters are
- * specified, respectively.
- * See below for the respresentation of these components.
- *
- * Except of "path", the strings representing the components do not
- * contain the characters separating the components from each other.
- * The "path" component includes the '/' at the beginning of the path
- * (if present).
- *
- * The created URL must conform to the 'url_syntax', i.e.
- * - The URL must only contain components which are recognized by the
- * syntax
- * - The URL must contain components which are required by the syntax
- * - The URL must fulfill the predicate expressed by the 'url_is_valid'
- * function of the syntax.
- *
- * The path of a URL is represented as a list of '/'-separated path
- * components. i.e.
- * [ s1; s2; ...; sN ] represents the path
- * s1 ^ "/" ^ s2 ^ "/" ^ ... ^ "/" ^ sN
- * As special cases:
- * [] is the non-existing path
- * [ "" ] is "/"
- * [ "";"" ] is illegal
- *
- * Except of s1 and sN, the path components must not be empty strings.
- *
- * To avoid ambiguities, it is illegal to create URLs with both relative
- * paths (s1 <> "") and host components.
- *
- * Parameters of URLs are components beginning with ';'. The list
- * of parameters is represented as list of strings where the strings
- * contain the value following ';'.
- *)
-
-val modify_url :
- ?syntax:url_syntax ->
- ?encoded:bool ->
- ?scheme:string ->
- ?user:string ->
- ?password:string ->
- ?host:string ->
- ?port:int ->
- ?path:string list ->
- ?param:string list ->
- ?query:string ->
- ?fragment:string ->
- ?other:string ->
- url ->
- url
- (* Modifies the passed components and returns the modified URL.
- * The modfied URL shares unmodified components with the original
- * URL.
- *)
-
-val remove_from_url :
- ?scheme:bool ->
- ?user:bool ->
- ?password:bool ->
- ?host:bool ->
- ?port:bool ->
- ?path:bool ->
- ?param:bool ->
- ?query:bool ->
- ?fragment:bool ->
- ?other:bool ->
- url ->
- url
- (* Removes the 'true' components from the URL, and returns the modified
- * URL.
- * The modfied URL shares unmodified components with the original
- * URL.
- *)
-
-val default_url :
- ?encoded:bool ->
- ?scheme:string ->
- ?user:string ->
- ?password:string ->
- ?host:string ->
- ?port:int ->
- ?path:string list ->
- ?param:string list ->
- ?query:string ->
- ?fragment:string ->
- ?other:string ->
- url ->
- url
- (* Adds missing components and returns the modified URL.
- * The modfied URL shares unmodified components with the original
- * URL.
- *)
-
-val undefault_url :
- ?scheme:string ->
- ?user:string ->
- ?password:string ->
- ?host:string ->
- ?port:int ->
- ?path:string list ->
- ?param:string list ->
- ?query:string ->
- ?fragment:string ->
- ?other:string ->
- url ->
- url
- (* Removes components from the URL if they have the passed value, and
- * returns the modified URL.
- * Note: The values must always be passed in _encoded_ form!
- * The modfied URL shares unmodified components with the original
- * URL.
- *)
-
-val url_syntax_of_url : url -> url_syntax
- (* Returns the 'url_syntax' record of a URL. *)
-
-val url_of_string : url_syntax -> string -> url
- (* Parses the passed string according to the passed url_syntax. *)
-
-val string_of_url : url -> string
- (* Returns the URL as string *)
-
-val url_provides :
- ?scheme:bool ->
- ?user:bool ->
- ?password:bool ->
- ?host:bool ->
- ?port:bool ->
- ?path:bool ->
- ?param:bool ->
- ?query:bool ->
- ?fragment:bool ->
- ?other:bool ->
- url ->
- bool
- (* Returns 'true' iff the URL has all of the components passed with
- * 'true' value.
- *)
-
-val url_scheme : url -> string
-val url_user : ?encoded:bool -> url -> string
-val url_password : ?encoded:bool -> url -> string
-val url_host : url -> string
-val url_port : url -> int
-val url_path : ?encoded:bool -> url -> string list
-val url_param : ?encoded:bool -> url -> string list
-val url_query : ?encoded:bool -> url -> string
-val url_fragment : ?encoded:bool -> url -> string
-val url_other : ?encoded:bool -> url -> string
- (* Return components of the URL. The functions return decoded strings
- * unless ~encoded:true is set.
- * If the component does not exist, the exception Not_found
- * is raised.
- *)
-
-val split_path : string -> string list
- (* Splits a '/'-separated path into components (e.g. to set up the
- * ~path argument of make_url).
- * E.g. split_path "a/b/c" = [ "a"; "b"; "c" ],
- * split_path "/a/b" = [ ""; "a"; "b" ],
- * split_path "a/b/" = [ "a"; "b"; "" ]
- *)
-
-val join_path : string list -> string
- (* Concatenates the path components (reverse function of split_path).
- *)
-
-val norm_path : string list -> string list
- (* Removes "." and ".." from the path if possible. Deletes double slashes.
- *
- * EXAMPLES:
- *
- * norm_path ["."] = []
- * means: "." = ""
- * norm_path ["."; ""] = []
- * means: "./" = ""
- * norm_path ["a"; "."] = ["a"; ""]
- * means: "a/." = "a/"
- * norm_path ["a"; "b"; "."] = ["a"; "b"; ""]
- * means: "a/b/." = "a/b/"
- * norm_path ["a"; "."; "b"; "."] = ["a"; "b"; ""]
- * means: "a/./b/." = "a/b/"
- * norm_path [".."] = [".."; ""]
- * means: ".." = "../"
- * norm_path [".."; ""] = [".."; ""]
- * means: "../" = "../"
- * norm_path ["a"; "b"; ".."; "c" ] = ["a"; "c"]
- * means: "a/b/../c" = "a/c"
- * norm_path ["a"; "b"; ".."; "c"; ""] = ["a"; "c"; ""]
- * means: "a/b/../c/" = "a/c/"
- * norm_path ["";"";"a";"";"b"] = [""; "a"; "b"]
- * means: "//a//b" = "/a/b"
- * norm_path ["a"; "b"; ""; ".."; "c"; ""] = ["a"; "c"; ""]
- * means: "a/b//../c/" = "a/c/"
- * norm_path ["a"; ".."] = []
- * means: "a/.." = ""
- *)
-
-
-val apply_relative_url : url -> url -> url
- (* apply_relative_url base rel:
- * Interprets 'rel' relative to 'base' and returns the new URL. This
- * function implements RFC 1808.
- *)
-
-val print_url : url -> unit
- (* Printer for the toploop. *)
-
-(* ---------------------------------------------------------------------- *)
-
-(* EXAMPLES:
- *
- * let http = Hashtbl.find common_url_syntax "http";;
- * let u = url_of_string http "http://g:pw@host/a/%62/";;
- * string_of_url u;;
- * --> "http://g:pw@host/a/%62/"
- * url_scheme u;;
- * --> "http"
- * url_user u;;
- * --> "g"
- * url_password u;;
- * --> "pw"
- * url_host u;;
- * --> "host"
- * url_path u;;
- * --> [ ""; "a"; "b"; "" ] (* sic! *)
- * url_path ~encoded:true u;;
- * --> [ ""; "a"; "%62"; "" ]
- * let v = make_url
- * ~path:[ ".."; "c" ]
- * ~fragment:"near-the-#-character"
- * { (partial_url_syntax http) with url_enable_fragment = Url_part_allowed };;
- * string_of_url v;;
- * --> "../c#near-the-%23-character"
- * let u' = modify_url ~syntax:(url_syntax_of_url v) u;;
- * (* u does not permit fragments *)
- * let w = apply_relative_url u' v;;
- * string_of_url w;;
- * --> "http://g:pw@host/c#near-the-%23-character"
- *)
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:27 lpadovan
- * Initial revision
- *
- * Revision 1.3 2000/06/26 22:57:49 gerd
- * Change: The record 'url_syntax' has an additional component
- * 'url_accepts_8bits'. Setting this option to 'true' causes that
- * the bytes >= 0x80 are no longer rejected.
- *
- * Revision 1.2 2000/06/25 22:55:47 gerd
- * Doc update.
- *
- * Revision 1.1 2000/06/24 20:19:59 gerd
- * Initial revision.
- *
- *
- *)