+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-exception Malformed_URL
-
-type url_syntax_option =
- Url_part_not_recognized
- | Url_part_allowed
- | Url_part_required
-
-
-type url_syntax =
- { url_enable_scheme : url_syntax_option;
- url_enable_user : url_syntax_option;
- url_enable_password : url_syntax_option;
- url_enable_host : url_syntax_option;
- url_enable_port : url_syntax_option;
- url_enable_path : url_syntax_option;
- url_enable_param : url_syntax_option;
- url_enable_query : url_syntax_option;
- url_enable_fragment : url_syntax_option;
- url_enable_other : url_syntax_option;
- url_accepts_8bits : bool;
- url_is_valid : url -> bool;
- }
-
-and url =
- {
- url_syntax : url_syntax;
- mutable url_validity : bool;
- url_scheme : string option;
- url_user : string option;
- url_password : string option;
- url_host : string option;
- url_port : int option;
- url_path : string list;
- url_param : string list;
- url_query : string option;
- url_fragment : string option;
- url_other : string option;
- }
-;;
-
-
-type char_category =
- Accepted
- | Rejected
- | Separator
-
-
-
-let scan_url_part s k_from k_to cats accept_8bits =
- (* Scans the longest word of accepted characters from position 'k_from'
- * in 's' until at most position 'k_to'. The character following the
- * word (if any) must be a separator character.
- * On success, the function returns the position of the last character
- * of the word + 1.
- * If there is any rejected character before the separator or the end
- * of the string (i.e. position 'k_to') is reached, the exception
- * Malformed_URL is raised.
- * Furthermore, if the character '%' is accepted it is checked whether
- * two hexadecimal digits follow (which must be accepted, too). If this
- * is not true, the exception Malformed_URL is raised, too.
- * 'cats': contains for every character code (0 to 255) the category
- * of the character.
- *)
- let check_hex c =
- if cats.( Char.code c ) <> Accepted then raise Malformed_URL;
- match c with
- ('0'..'9'|'A'..'F'|'a'..'f') -> ()
- | _ -> raise Malformed_URL
- in
-
- let rec scan k =
- if k >= k_to then
- k
- else begin
- let c = s.[k] in
- let cat = cats.(Char.code c) in
- match cat with
- Accepted ->
- if c = '%' then begin
- if k+2 >= k_to then raise Malformed_URL;
- let c1 = s.[k+1] in
- let c2 = s.[k+2] in
- check_hex c1;
- check_hex c2;
- scan (k+3)
- end
- else
- scan (k+1)
- | Separator -> k
- | Rejected ->
- if accept_8bits && c >= '\128'
- then scan (k+1)
- else raise Malformed_URL
- end
- in
-
- assert (Array.length cats = 256);
- assert (k_from >= 0);
- assert (k_from <= k_to);
- assert (k_to <= String.length s);
-
- scan k_from
-;;
-
-
-(* Create a categorization: *)
-
-let lalpha = [ 'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm';
- 'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z' ]
-
-let ualpha = [ 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M';
- 'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z' ]
-
-let digit = [ '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9' ]
-
-let safe = [ '$'; '-'; '_'; '.'; '+' ]
-
-let extra = [ '!'; '*'; '\''; '('; ')'; ',' ]
-
-let make_cats accepted separators =
- (* create a categorization:
- * - All characters listed in 'separators' are separators.
- * - All characters listed in 'accepted' and which do not occur in
- * 'separators' are accepted characters.
- * - All other characters are rejected.
- *)
- let cats = Array.make 256 Rejected in
- List.iter
- (fun c ->
- cats.(Char.code c) <- Accepted
- )
- accepted;
-
- List.iter
- (fun c ->
- cats.(Char.code c) <- Separator
- )
- separators;
- cats
-;;
-
-
-let scheme_cats =
- make_cats (lalpha @ ualpha @ ['+'; '-'; '.']) [':'] ;;
-
- (* scheme_cats: character categorization to _extract_ the URL scheme *)
-
-
-let login_cats =
- make_cats
- (lalpha @ ualpha @ digit @ safe @ extra @ [';'; '?'; '&'; '='; '%'])
- [':'; '@'; '/'; '#' ]
-;;
-
- (* login_cats: character categorization to _extract_ user name, password,
- * host name, and port.
- *)
-
-let host_cats =
- make_cats
- (lalpha @ ualpha @ digit @ ['.'; '-'])
- []
-;;
-
- (* host_cats: character categorization to _check_ whether the host name
- * is formed only by legal characters.
- * Especially '%' is not allowed here!
- *)
-
-let port_cats =
- make_cats
- digit
- []
-;;
-
- (* port_cats: character categorization to _check_ whether the port number
- * is formed only by legal characters.
- * Especially '%' is not allowed here!
- *)
-
-let path_cats separators =
- make_cats
- (lalpha @ ualpha @ digit @ safe @ extra @
- ['?'; ':'; '@'; '&'; '='; ';'; '%'; '/'; '~'])
- separators
-;;
-
-
-let separators_from_syntax syn =
- let include_if syn_option clist =
- if syn_option <> Url_part_not_recognized then
- clist
- else
- []
- in
- (include_if syn.url_enable_param [';']) @
- (include_if syn.url_enable_query ['?']) @
- (include_if syn.url_enable_fragment ['#'])
-;;
-
-
-let path_cats_from_syntax syn extraseps =
- let separators = separators_from_syntax syn in
- path_cats (separators @ extraseps)
-;;
-
-(* path_cats_from_syntax:
- * Computes a character categorization to extract the path from an URL.
- * This depends on the syntax because the list of possible separators
- * contains the characters that may begin the next URL clause.
- *
- * Notes:
- * - The '#' is rejected unless fragments are enabled.
- * - The '~' is accepted although this violates RFC 1738.
- *)
-
-
-let other_cats_from_syntax syn =
- let include_if syn_option clist =
- if syn_option <> Url_part_not_recognized then
- clist
- else
- []
- in
- let separators =
- (include_if syn.url_enable_param [';']) @
- (include_if syn.url_enable_query ['?']) @
- (include_if syn.url_enable_fragment ['#'])
- in
-
- make_cats
- (lalpha @ ualpha @ digit @ safe @ extra @
- (separators @ ['?'; ':'; '@'; '&'; '='; ';'; '%'; '/']))
- []
-;;
-
- (* other_cats: character categorization to extract or check the
- * "other" part of the URL.
- *)
-
-
-
-let extract_url_scheme s =
- let l = String.length s in
- let k = scan_url_part s 0 l scheme_cats false in
- (* or raise Malformed_URL *)
- if k = l then raise Malformed_URL;
- assert (s.[k] = ':');
- String.lowercase(String.sub s 0 k)
-;;
-
-
-let ( => ) a b = not a or b;; (* implication *)
-
-let ( <=> ) (a:bool) b = ( a = b );; (* equivalence *)
-
-let url_syntax_is_valid syn =
- let recognized x = x <> Url_part_not_recognized in
- let not_recognized x = x = Url_part_not_recognized in
- (recognized syn.url_enable_password => recognized syn.url_enable_user) &
- (recognized syn.url_enable_port => recognized syn.url_enable_host) &
- (recognized syn.url_enable_user => recognized syn.url_enable_host) &
- not ( (recognized syn.url_enable_user ||
- recognized syn.url_enable_password ||
- recognized syn.url_enable_host ||
- recognized syn.url_enable_port ||
- recognized syn.url_enable_path) &&
- (recognized syn.url_enable_other))
-;;
-
-
-let partial_url_syntax syn =
- let weaken =
- function
- Url_part_not_recognized -> Url_part_not_recognized
- | Url_part_allowed -> Url_part_allowed
- | Url_part_required -> Url_part_allowed
- in
- { url_enable_scheme = weaken syn.url_enable_scheme;
- url_enable_user = weaken syn.url_enable_user;
- url_enable_password = weaken syn.url_enable_password;
- url_enable_host = weaken syn.url_enable_host;
- url_enable_port = weaken syn.url_enable_port;
- url_enable_path = weaken syn.url_enable_path;
- url_enable_param = weaken syn.url_enable_param;
- url_enable_query = weaken syn.url_enable_query;
- url_enable_fragment = weaken syn.url_enable_fragment;
- url_enable_other = weaken syn.url_enable_other;
- url_accepts_8bits = syn.url_accepts_8bits;
- url_is_valid = syn.url_is_valid;
- }
-;;
-
-
-
-let file_url_syntax =
- { url_enable_scheme = Url_part_required;
- url_enable_user = Url_part_not_recognized;
- url_enable_password = Url_part_not_recognized;
- url_enable_host = Url_part_allowed;
- url_enable_port = Url_part_not_recognized;
- url_enable_path = Url_part_required;
- url_enable_param = Url_part_not_recognized;
- url_enable_query = Url_part_not_recognized;
- url_enable_fragment = Url_part_not_recognized;
- url_enable_other = Url_part_not_recognized;
- url_accepts_8bits = false;
- url_is_valid = (fun _ -> true);
- }
-;;
-
-
-let ftp_url_syntax =
- { url_enable_scheme = Url_part_required;
- url_enable_user = Url_part_allowed;
- url_enable_password = Url_part_allowed;
- url_enable_host = Url_part_required;
- url_enable_port = Url_part_allowed;
- url_enable_path = Url_part_allowed;
- url_enable_param = Url_part_allowed;
- url_enable_query = Url_part_not_recognized;
- url_enable_fragment = Url_part_not_recognized;
- url_enable_other = Url_part_not_recognized;
- url_accepts_8bits = false;
- url_is_valid = (fun _ -> true);
- }
-;;
-
-
-let http_url_syntax =
- { url_enable_scheme = Url_part_required;
- url_enable_user = Url_part_allowed;
- url_enable_password = Url_part_allowed;
- url_enable_host = Url_part_required;
- url_enable_port = Url_part_allowed;
- url_enable_path = Url_part_allowed;
- url_enable_param = Url_part_not_recognized;
- url_enable_query = Url_part_allowed;
- url_enable_fragment = Url_part_not_recognized;
- url_enable_other = Url_part_not_recognized;
- url_accepts_8bits = false;
- url_is_valid = (fun _ -> true);
- }
-;;
-
-
-let mailto_url_syntax =
- { url_enable_scheme = Url_part_required;
- url_enable_user = Url_part_not_recognized;
- url_enable_password = Url_part_not_recognized;
- url_enable_host = Url_part_not_recognized;
- url_enable_port = Url_part_not_recognized;
- url_enable_path = Url_part_not_recognized;
- url_enable_param = Url_part_not_recognized;
- url_enable_query = Url_part_not_recognized;
- url_enable_fragment = Url_part_not_recognized;
- url_enable_other = Url_part_required;
- url_accepts_8bits = false;
- url_is_valid = (fun _ -> true);
- }
-;;
-
-
-let null_url_syntax =
- { url_enable_scheme = Url_part_not_recognized;
- url_enable_user = Url_part_not_recognized;
- url_enable_password = Url_part_not_recognized;
- url_enable_host = Url_part_not_recognized;
- url_enable_port = Url_part_not_recognized;
- url_enable_path = Url_part_not_recognized;
- url_enable_param = Url_part_not_recognized;
- url_enable_query = Url_part_not_recognized;
- url_enable_fragment = Url_part_not_recognized;
- url_enable_other = Url_part_not_recognized;
- url_accepts_8bits = false;
- url_is_valid = (fun _ -> true);
- }
-;;
-
-
-let ip_url_syntax =
- { url_enable_scheme = Url_part_allowed;
- url_enable_user = Url_part_allowed;
- url_enable_password = Url_part_allowed;
- url_enable_host = Url_part_allowed;
- url_enable_port = Url_part_allowed;
- url_enable_path = Url_part_allowed;
- url_enable_param = Url_part_allowed;
- url_enable_query = Url_part_allowed;
- url_enable_fragment = Url_part_allowed;
- url_enable_other = Url_part_not_recognized;
- url_accepts_8bits = false;
- url_is_valid = (fun _ -> true);
- }
-;;
-
-
-let common_url_syntax =
- let h = Hashtbl.create 10 in
- Hashtbl.add h "file" file_url_syntax;
- Hashtbl.add h "ftp" ftp_url_syntax;
- Hashtbl.add h "http" http_url_syntax;
- Hashtbl.add h "mailto" mailto_url_syntax;
- h
-;;
-
-
-let url_conforms_to_syntax url =
- let recognized x = x <> Url_part_not_recognized in
- let required x = x = Url_part_required in
- let present x = x <> None in
- let syn = url.url_syntax in
- (present url.url_scheme => recognized syn.url_enable_scheme) &
- (present url.url_user => recognized syn.url_enable_user) &
- (present url.url_password => recognized syn.url_enable_password) &
- (present url.url_host => recognized syn.url_enable_host) &
- (present url.url_port => recognized syn.url_enable_port) &
- ((url.url_path <> []) => recognized syn.url_enable_path) &
- ((url.url_param <> []) => recognized syn.url_enable_param) &
- (present url.url_query => recognized syn.url_enable_query) &
- (present url.url_fragment => recognized syn.url_enable_fragment) &
- (present url.url_other => recognized syn.url_enable_other) &
- (required syn.url_enable_scheme => present url.url_scheme) &
- (required syn.url_enable_user => present url.url_user) &
- (required syn.url_enable_password => present url.url_password) &
- (required syn.url_enable_host => present url.url_host) &
- (required syn.url_enable_port => present url.url_port) &
- (required syn.url_enable_path => (url.url_path <> [])) &
- (required syn.url_enable_param => (url.url_param <> [])) &
- (required syn.url_enable_query => present url.url_query) &
- (required syn.url_enable_fragment => present url.url_fragment) &
- (required syn.url_enable_other => present url.url_other) &
- (url.url_validity or syn.url_is_valid url)
-;;
-
-
-let url_syntax_of_url url = url.url_syntax
-;;
-
-
-let modify_url
- ?syntax
- ?(encoded = false)
- ?scheme
- ?user
- ?password
- ?host
- ?port
- ?path
- ?param
- ?query
- ?fragment
- ?other
- url
- =
-
- let encode = Netencoding.Url.encode in
- let enc x =
- if encoded then
- x
- else
- match x with
- None -> None
- | Some x' -> Some (encode x')
- in
- let enc_list l =
- if encoded then
- l
- else
- List.map encode l
- in
-
- let new_syntax =
- match syntax with
- None -> url.url_syntax
- | Some syn -> syn
- in
-
- let check_string s_opt cats =
- match s_opt with
- None -> ()
- | Some s ->
- let l = String.length s in
- let k = scan_url_part s 0 l cats new_syntax.url_accepts_8bits in
- (* or raise Malformed_URL *)
- if k <> l then raise Malformed_URL
- in
-
- let check_string_list p cats sep =
- List.iter
- (fun p_component ->
- let l = String.length p_component in
- let k =
- scan_url_part p_component 0 l cats new_syntax.url_accepts_8bits in
- (* or raise Malformed_URL *)
- if k <> l then raise Malformed_URL;
- if String.contains p_component sep then raise Malformed_URL;
- )
- p
- in
-
- (* Create the modified record: *)
- let url' =
- {
- url_syntax = new_syntax;
- url_validity = false;
- url_scheme = if scheme = None then url.url_scheme else scheme;
- url_user = if user = None then url.url_user else enc user;
- url_password = if password = None then url.url_password else enc password;
- url_host = if host = None then url.url_host else host;
- url_port = if port = None then url.url_port else port;
- url_path = (match path with
- None -> url.url_path
- | Some p -> enc_list p);
- url_param = (match param with
- None -> url.url_param
- | Some p -> enc_list p);
- url_query = if query = None then url.url_query else enc query;
- url_fragment = if fragment = None then url.url_fragment else enc fragment;
- url_other = if other = None then url.url_other else enc other;
- }
- in
- (* Check whether the URL conforms to the syntax:
- *)
- if not (url_conforms_to_syntax url') then raise Malformed_URL;
- if url'.url_password <> None && url'.url_user = None then raise Malformed_URL;
- if url'.url_user <> None && url'.url_host = None then raise Malformed_URL;
- if url'.url_port <> None && url'.url_host = None then raise Malformed_URL;
- (* Check every part: *)
- check_string url'.url_scheme scheme_cats;
- check_string url'.url_user login_cats;
- check_string url'.url_password login_cats;
- check_string url'.url_host host_cats;
- (match url'.url_port with
- None -> ()
- | Some p -> if p < 0 || p > 65535 then raise Malformed_URL
- );
- let path_cats = path_cats_from_syntax new_syntax [] in
- let other_cats = other_cats_from_syntax new_syntax in
- check_string url'.url_query path_cats;
- check_string url'.url_fragment path_cats;
- check_string url'.url_other other_cats;
- (* Check the lists: *)
- check_string_list url'.url_param path_cats ';';
- check_string_list url'.url_path path_cats '/';
- (* Further path checks: *)
- begin match url'.url_path with
- [] ->
- (* The path is empty: There must not be a 'param' or 'query' *)
- if url'.url_host <> None then begin
- if url'.url_param <> [] then raise Malformed_URL;
- if url'.url_query <> None then raise Malformed_URL;
- end
- | ["";""] ->
- (* This is illegal. *)
- raise Malformed_URL;
- | "" :: p' ->
- (* The path is absolute: always ok *)
- ()
- | _ ->
- (* The path is relative: there must not be a host *)
- if url'.url_host <> None then raise Malformed_URL;
- end;
- begin match url'.url_path with
- _ :: rest -> (* "//" ambiguity *)
- begin match List.rev rest with
- _ :: rest' ->
- if List.exists (fun p -> p = "") rest' then
- raise Malformed_URL;
- | [] ->
- ()
- end
- | [] ->
- ()
- end;
- (* Cache that the URL is valid: *)
- url'.url_validity <- true;
-
- url'
-;;
-
-
-let null_url =
- {
- url_syntax = null_url_syntax;
- url_validity = true;
- url_scheme = None;
- url_user = None;
- url_password = None;
- url_host = None;
- url_port = None;
- url_path = [];
- url_param = [];
- url_query = None;
- url_fragment = None;
- url_other = None;
- }
-;;
-
-
-let make_url
- ?(encoded = false)
- ?scheme
- ?user
- ?password
- ?host
- ?port
- ?path
- ?param
- ?query
- ?fragment
- ?other
- url_syntax
- =
-
- if not (url_syntax_is_valid url_syntax) then
- invalid_arg "Neturl.make_url";
-
- modify_url
- ~encoded:encoded
- ~syntax:url_syntax
- ?scheme:scheme
- ?user:user
- ?password:password
- ?host:host
- ?port:port
- ?path:path
- ?param:param
- ?query:query
- ?fragment:fragment
- ?other:other
- null_url
-;;
-
-
-let remove_from_url
- ?(scheme = false)
- ?(user = false)
- ?(password = false)
- ?(host = false)
- ?(port = false)
- ?(path = false)
- ?(param = false)
- ?(query = false)
- ?(fragment = false)
- ?(other = false)
- url
- =
-
- make_url
- ~encoded: true
- ?scheme: (if scheme then None else url.url_scheme)
- ?user: (if user then None else url.url_user)
- ?password: (if password then None else url.url_password)
- ?host: (if host then None else url.url_host)
- ?port: (if port then None else url.url_port)
- ?path: (if path then None else Some url.url_path)
- ?param: (if param then None else Some url.url_param)
- ?query: (if query then None else url.url_query)
- ?fragment: (if fragment then None else url.url_fragment)
- ?other: (if other then None else url.url_other)
- url.url_syntax
-;;
-
-
-let default_url
- ?(encoded = false)
- ?scheme
- ?user
- ?password
- ?host
- ?port
- ?(path = [])
- ?(param = [])
- ?query
- ?fragment
- ?other
- url
- =
-
- let encode = Netencoding.Url.encode in
-
- let enc x =
- if encoded then
- x
- else
- match x with
- None -> None
- | Some x' -> Some (encode x')
- in
-
- let enc_list l =
- if encoded then
- l
- else
- List.map encode l
- in
-
- let pass_if_missing current arg =
- match current with
- None -> arg
- | _ -> current
- in
-
- make_url
- ~encoded: true
- ?scheme: (pass_if_missing url.url_scheme scheme)
- ?user: (pass_if_missing url.url_user (enc user))
- ?password: (pass_if_missing url.url_password (enc password))
- ?host: (pass_if_missing url.url_host host)
- ?port: (pass_if_missing url.url_port port)
- ~path: (if url.url_path = [] then enc_list path else url.url_path)
- ~param: (if url.url_param = [] then enc_list param else url.url_param)
- ?query: (pass_if_missing url.url_query (enc query))
- ?fragment: (pass_if_missing url.url_fragment (enc fragment))
- ?other: (pass_if_missing url.url_other (enc other))
- url.url_syntax
-;;
-
-
-let undefault_url
- ?scheme
- ?user
- ?password
- ?host
- ?port
- ?path
- ?param
- ?query
- ?fragment
- ?other
- url
- =
-
- let remove_if_matching current arg =
- match current with
- None -> None
- | Some x ->
- (match arg with
- None -> current
- | Some x' ->
- if x=x' then
- None
- else
- current)
- in
-
- make_url
- ~encoded: true
- ?scheme: (remove_if_matching url.url_scheme scheme)
- ?user: (remove_if_matching url.url_user user)
- ?password: (remove_if_matching url.url_password password)
- ?host: (remove_if_matching url.url_host host)
- ?port: (remove_if_matching url.url_port port)
- ~path: (match path with
- None -> url.url_path
- | Some x ->
- if x = url.url_path then
- []
- else
- url.url_path)
- ~param: (match param with
- None -> url.url_param
- | Some x ->
- if x = url.url_param then
- []
- else
- url.url_param)
- ?query: (remove_if_matching url.url_query query)
- ?fragment: (remove_if_matching url.url_fragment fragment)
- ?other: (remove_if_matching url.url_other other)
- url.url_syntax
-;;
-
-
-let url_provides
- ?(scheme = false)
- ?(user = false)
- ?(password = false)
- ?(host = false)
- ?(port = false)
- ?(path = false)
- ?(param = false)
- ?(query = false)
- ?(fragment = false)
- ?(other = false)
- url
- =
-
- (scheme => (url.url_scheme <> None)) &
- (user => (url.url_user <> None)) &
- (password => (url.url_password <> None)) &
- (host => (url.url_host <> None)) &
- (port => (url.url_port <> None)) &
- (path => (url.url_path <> [])) &
- (param => (url.url_param <> [])) &
- (query => (url.url_query <> None)) &
- (fragment => (url.url_fragment <> None)) &
- (other => (url.url_other <> None))
-;;
-
-
-let return_if value =
- match value with
- None -> raise Not_found
- | Some x -> x
-;;
-
-
-let decode_if want_encoded value =
- let value' = return_if value in
- if want_encoded then
- value'
- else
- Netencoding.Url.decode value' (* WARNING: not thread-safe! *)
-;;
-
-
-let decode_path_if want_encoded value =
- if want_encoded then
- value
- else
- List.map Netencoding.Url.decode value (* WARNING: not thread-safe! *)
-;;
-
-
-let url_scheme url = return_if url.url_scheme;;
-let url_user ?(encoded=false) url = decode_if encoded url.url_user;;
-let url_password ?(encoded=false) url = decode_if encoded url.url_password;;
-let url_host url = return_if url.url_host;;
-let url_port url = return_if url.url_port;;
-let url_path ?(encoded=false) url = decode_path_if encoded url.url_path;;
-let url_param ?(encoded=false) url = decode_path_if encoded url.url_param;;
-let url_query ?(encoded=false) url = decode_if encoded url.url_query;;
-let url_fragment ?(encoded=false) url = decode_if encoded url.url_fragment;;
-let url_other ?(encoded=false) url = decode_if encoded url.url_other;;
-
-
-let string_of_url url =
- if not (url.url_validity) then
- failwith "Neturl.string_of_url: URL not flagged as valid";
- (match url.url_scheme with
- None -> ""
- | Some s -> s ^ ":") ^
- (match url.url_host with
- None -> ""
- | Some host ->
- "//" ^
- (match url.url_user with
- None -> ""
- | Some user ->
- user ^
- (match url.url_password with
- None -> ""
- | Some password ->
- ":" ^ password
- ) ^
- "@") ^
- host ^
- (match url.url_port with
- None -> ""
- | Some port ->
- ":" ^ string_of_int port)) ^
- (match url.url_path with
- | [""] ->
- "/"
- | x :: p when url.url_scheme = None &&
- url.url_host = None &&
- String.contains x ':'
- ->
- (* Really a special case: The colon contained in 'x' may cause
- * that a prefix of 'x' is interpreted as URL scheme. In this
- * case, "./" is prepended (as recommended in RFC 1808, 5.3).
- *)
- "./"
- | _ ->
- ""
- ) ^
- String.concat "/" url.url_path ^
- (match url.url_other with
- None -> ""
- | Some other ->
- other) ^
- String.concat "" (List.map (fun s -> ";" ^ s) url.url_param) ^
- (match url.url_query with
- None -> ""
- | Some query ->
- "?" ^ query) ^
- (match url.url_fragment with
- None -> ""
- | Some fragment ->
- "#" ^ fragment)
-;;
-
-
-let url_of_string url_syntax s =
- let l = String.length s in
- let recognized x = x <> Url_part_not_recognized in
-
- let rec collect_words terminators eof_char cats k =
- (* Collect words as recognized by 'cats', starting at position 'k' in
- * 's'. Collection stops if one the characters listed in 'terminators'
- * is found. If the end of the string is reached, it is treated as
- * 'eof_char'.
- *)
- let k' = scan_url_part s k l cats url_syntax.url_accepts_8bits in
- (* or raise Malformed_URL *)
- let word, sep =
- String.sub s k (k'-k), (if k'<l then s.[k'] else eof_char) in
- if List.mem sep terminators then
- [word, sep], k'
- else
- let word_sep_list', k'' =
- collect_words terminators eof_char cats (k'+1) in
- ((word, sep) :: word_sep_list'), k''
- in
-
- (* Try to extract the scheme name: *)
- let scheme, k1 =
- if recognized url_syntax.url_enable_scheme then
- try
- let k = scan_url_part s 0 l scheme_cats false in
- (* or raise Malformed_URL *)
- if k = l then raise Malformed_URL;
- assert (s.[k] = ':');
- Some (String.sub s 0 k), (k+1)
- with
- Malformed_URL -> None, 0
- else
- None, 0
- in
-
- (* If there is a "//", a host will follow: *)
- let host, port, user, password, k2 =
- if recognized url_syntax.url_enable_host &&
- k1 + 2 <= l && s.[k1]='/' && s.[k1+1]='/' then begin
-
- let word_sep_list, k' = collect_words [ '/'; '#' ] '/' login_cats (k1+2)
- in
- (* or raise Malformed_URL *)
-
- let int x =
- try int_of_string x with _ -> raise Malformed_URL in
-
- match word_sep_list with
- [ host, ('/'|'#') ] ->
- Some host, None, None, None, k'
- | [ host, ':'; port, ('/'|'#') ] ->
- Some host, Some (int port), None, None, k'
- | [ user, '@'; host, ('/'|'#') ] ->
- Some host, None, Some user, None, k'
- | [ user, '@'; host, ':'; port, ('/'|'#') ] ->
- Some host, Some (int port), Some user, None, k'
- | [ user, ':'; password, '@'; host, ('/'|'#') ] ->
- Some host, None, Some user, Some password, k'
- | [ user, ':'; password, '@'; host, ':'; port, ('/'|'#') ] ->
- Some host, Some (int port), Some user, Some password, k'
- | _ ->
- raise Malformed_URL
- end
- else
- None, None, None, None, k1
- in
-
- let path, k3 =
- if recognized url_syntax.url_enable_path &&
- k2 < l (* && s.[k2]='/' *)
- then begin
- let cats = path_cats_from_syntax url_syntax [ '/' ] in
- let seps = separators_from_syntax url_syntax in
-
- (* Note: '>' is not allowed within URLs; because of this we can use
- * it as end-of-string character.
- *)
-
- let word_sep_list, k' = collect_words ('>'::seps) '>' cats k2 in
- (* or raise Malformed_URL *)
- match word_sep_list with
- [ "", '/'; "", _ ] ->
- [ "" ], k'
- | [ "", _ ] ->
- [], k'
- | _ ->
- List.map fst word_sep_list, k'
- end
- else begin
- (* If there is a single '/': skip it *)
- if not (recognized url_syntax.url_enable_other) &&
- k2 < l && s.[k2]='/'
- then
- [], (k2+1)
- else
- [], k2
- end
- in
-
- let other, k4 =
- if recognized url_syntax.url_enable_other &&
- k3 < l
- then begin
-
- let cats = other_cats_from_syntax url_syntax in
-
- (* Note: '>' is not allowed within URLs; because of this we can use
- * it as end-of-string character.
- *)
-
- let word_sep_list, k' = collect_words ['>';'#'] '>' cats k3 in
- (* or raise Malformed_URL *)
-
- match word_sep_list with
- [ other, _ ] -> Some other, k'
- | _ -> assert false
- end
- else
- None, k3
- in
-
- let param, k5 =
- if recognized url_syntax.url_enable_param &&
- k4 < l && s.[k4]=';'
- then begin
- let cats = path_cats_from_syntax url_syntax [] in
- let seps = separators_from_syntax url_syntax in
- let seps' = List.filter (fun c -> c <> ';') seps in
-
- (* Note: '>' is not allowed within URLs; because of this we can use
- * it as end-of-string character.
- *)
-
- let word_sep_list, k' = collect_words ('>'::seps') '>' cats (k4+1) in
- (* or raise Malformed_URL *)
-
- List.map fst word_sep_list, k'
- end
- else
- [], k4
- in
-
- let query, k6 =
- if recognized url_syntax.url_enable_query &&
- k5 < l && s.[k5]='?'
- then begin
- let cats = path_cats_from_syntax url_syntax [] in
- let seps = separators_from_syntax url_syntax in
-
- (* Note: '>' is not allowed within URLs; because of this we can use
- * it as end-of-string character.
- *)
-
- let word_sep_list, k' = collect_words ('>'::seps) '>' cats (k5+1) in
- (* or raise Malformed_URL *)
-
- match word_sep_list with
- [ query, _ ] -> Some query, k'
- | _ -> assert false
- end
- else
- None, k5
- in
-
- let fragment, k7 =
- if recognized url_syntax.url_enable_fragment &&
- k6 < l && s.[k6]='#'
- then begin
- let cats = path_cats_from_syntax url_syntax [] in
- let seps = separators_from_syntax url_syntax in
-
- (* Note: '>' is not allowed within URLs; because of this we can use
- * it as end-of-string character.
- *)
-
- let word_sep_list, k' = collect_words ('>'::seps) '>' cats (k6+1) in
- (* or raise Malformed_URL *)
-
- match word_sep_list with
- [ fragment, _ ] -> Some fragment, k'
- | _ -> assert false
- end
- else
- None, k6
- in
-
- if k7 <> l then raise Malformed_URL;
-
- make_url
- ~encoded:true
- ?scheme:scheme
- ?user:user
- ?password:password
- ?host:host
- ?port:port
- ~path:path
- ~param:param
- ?query:query
- ?fragment:fragment
- ?other:other
- url_syntax
-;;
-
-
-let split_path s =
- let l = String.length s in
- let rec collect_words k =
- let k' =
- try
- String.index_from s k '/'
- with
- Not_found -> l
- in
- let word = String.sub s k (k'-k) in
- if k' >= l then
- [word]
- else
- word :: collect_words (k'+1)
- in
- match collect_words 0 with
- [ "" ] -> []
- | [ "";"" ] -> [ "" ]
- | other -> other
-;;
-
-
-let join_path l =
- match l with
- [ "" ] -> "/"
- | _ -> String.concat "/" l;;
-
-
-let norm_path l =
-
- let rec remove_slash_slash l first =
- match l with
- | [ "" ] ->
- [ "" ]
- | [ ""; "" ] when first ->
- [ "" ]
- | "" :: l' when not first ->
- remove_slash_slash l' false
- | x :: l' ->
- x :: remove_slash_slash l' false
- | [] ->
- []
- in
-
- let rec remove_dot l first =
- match l with
- | ([ "." ] | ["."; ""]) ->
- if first then [] else [ "" ]
- | "." :: x :: l' ->
- remove_dot (x :: l') false
- | x :: l' ->
- x :: remove_dot l' false
- | [] ->
- []
- in
-
- let rec remove_dot_dot_once l first =
- match l with
- x :: ".." :: [] when x <> "" && x <> ".." && not first ->
- [ "" ]
- | x :: ".." :: l' when x <> "" && x <> ".." ->
- l'
- | x :: l' ->
- x :: remove_dot_dot_once l' false
- | [] ->
- raise Not_found
- in
-
- let rec remove_dot_dot l =
- try
- let l' = remove_dot_dot_once l true in
- remove_dot_dot l'
- with
- Not_found -> l
- in
-
- let l' = remove_dot_dot (remove_dot (remove_slash_slash l true) true) in
- match l' with
- [".."] -> [".."; ""]
- | ["";""] -> [ "" ]
- | _ -> l'
-;;
-
-
-let apply_relative_url baseurl relurl =
- if not (baseurl.url_validity) or not (relurl.url_validity) then
- failwith "Neturl.apply_relative_url: URL not flagged as valid";
-
- if relurl.url_scheme <> None then
- modify_url
- ~syntax:baseurl.url_syntax (* inherit syntax *)
- relurl
- else
- if relurl.url_host <> None then
- modify_url
- ~syntax:baseurl.url_syntax (* inherit syntax and scheme *)
- ?scheme:baseurl.url_scheme
- relurl
- else
- match relurl.url_path with
- "" :: other ->
- (* An absolute path *)
- modify_url
- ~syntax:baseurl.url_syntax (* inherit syntax, scheme, and *)
- ~encoded:true
- ?scheme:baseurl.url_scheme (* login info *)
- ?host:baseurl.url_host
- ?port:baseurl.url_port
- ?user:baseurl.url_user
- ?password:baseurl.url_password
- relurl
- | [] ->
- (* Empty: Inherit also path, params, query, and fragment *)
- let new_params, new_query, new_fragment =
- match relurl.url_param, relurl.url_query, relurl.url_fragment
- with
- [], None, None ->
- (* Inherit all three *)
- baseurl.url_param, baseurl.url_query, baseurl.url_fragment
- | [], None, f ->
- (* Inherit params and query *)
- baseurl.url_param, baseurl.url_query, f
- | [], q, f ->
- (* Inherit params *)
- baseurl.url_param, q, f
- | p, q, f ->
- (* Inherit none of them *)
- p, q, f
- in
- modify_url
- ~syntax:baseurl.url_syntax
- ~encoded:true
- ?scheme:baseurl.url_scheme
- ?host:baseurl.url_host
- ?port:baseurl.url_port
- ?user:baseurl.url_user
- ?password:baseurl.url_password
- ~path:baseurl.url_path
- ~param:new_params
- ?query:new_query
- ?fragment:new_fragment
- relurl
- | relpath ->
- (* A relative path *)
- let rec change_path basepath =
- match basepath with
- | [] ->
- relpath
- | [ x ] ->
- relpath
- | x :: basepath' ->
- x :: change_path basepath'
- in
- let new_path = norm_path (change_path baseurl.url_path) in
- modify_url
- ~syntax:baseurl.url_syntax (* inherit syntax, scheme, and *)
- ~encoded:true
- ?scheme:baseurl.url_scheme (* login info *)
- ?host:baseurl.url_host
- ?port:baseurl.url_port
- ?user:baseurl.url_user
- ?password:baseurl.url_password
- ~path:new_path (* and change path *)
- relurl
-
-;;
-
-
-let print_url url =
- Format.print_string ("<URL:" ^ string_of_url url ^ ">")
-;;
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:28 lpadovan
- * Initial revision
- *
- * Revision 1.4 2000/07/04 21:50:51 gerd
- * Fixed typo.
- *
- * Revision 1.3 2000/06/26 22:57:49 gerd
- * Change: The record 'url_syntax' has an additional component
- * 'url_accepts_8bits'. Setting this option to 'true' causes that
- * the bytes >= 0x80 are no longer rejected.
- *
- * Revision 1.2 2000/06/25 19:39:48 gerd
- * Lots of Bugfixes.
- *
- * Revision 1.1 2000/06/24 20:19:59 gerd
- * Initial revision.
- *
- *
- *)