--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+exception Malformed_URL
+
+type url_syntax_option =
+ Url_part_not_recognized
+ | Url_part_allowed
+ | Url_part_required
+
+
+type url_syntax =
+ { url_enable_scheme : url_syntax_option;
+ url_enable_user : url_syntax_option;
+ url_enable_password : url_syntax_option;
+ url_enable_host : url_syntax_option;
+ url_enable_port : url_syntax_option;
+ url_enable_path : url_syntax_option;
+ url_enable_param : url_syntax_option;
+ url_enable_query : url_syntax_option;
+ url_enable_fragment : url_syntax_option;
+ url_enable_other : url_syntax_option;
+ url_accepts_8bits : bool;
+ url_is_valid : url -> bool;
+ }
+
+and url =
+ {
+ url_syntax : url_syntax;
+ mutable url_validity : bool;
+ url_scheme : string option;
+ url_user : string option;
+ url_password : string option;
+ url_host : string option;
+ url_port : int option;
+ url_path : string list;
+ url_param : string list;
+ url_query : string option;
+ url_fragment : string option;
+ url_other : string option;
+ }
+;;
+
+
+type char_category =
+ Accepted
+ | Rejected
+ | Separator
+
+
+
+let scan_url_part s k_from k_to cats accept_8bits =
+ (* Scans the longest word of accepted characters from position 'k_from'
+ * in 's' until at most position 'k_to'. The character following the
+ * word (if any) must be a separator character.
+ * On success, the function returns the position of the last character
+ * of the word + 1.
+ * If there is any rejected character before the separator or the end
+ * of the string (i.e. position 'k_to') is reached, the exception
+ * Malformed_URL is raised.
+ * Furthermore, if the character '%' is accepted it is checked whether
+ * two hexadecimal digits follow (which must be accepted, too). If this
+ * is not true, the exception Malformed_URL is raised, too.
+ * 'cats': contains for every character code (0 to 255) the category
+ * of the character.
+ *)
+ let check_hex c =
+ if cats.( Char.code c ) <> Accepted then raise Malformed_URL;
+ match c with
+ ('0'..'9'|'A'..'F'|'a'..'f') -> ()
+ | _ -> raise Malformed_URL
+ in
+
+ let rec scan k =
+ if k >= k_to then
+ k
+ else begin
+ let c = s.[k] in
+ let cat = cats.(Char.code c) in
+ match cat with
+ Accepted ->
+ if c = '%' then begin
+ if k+2 >= k_to then raise Malformed_URL;
+ let c1 = s.[k+1] in
+ let c2 = s.[k+2] in
+ check_hex c1;
+ check_hex c2;
+ scan (k+3)
+ end
+ else
+ scan (k+1)
+ | Separator -> k
+ | Rejected ->
+ if accept_8bits && c >= '\128'
+ then scan (k+1)
+ else raise Malformed_URL
+ end
+ in
+
+ assert (Array.length cats = 256);
+ assert (k_from >= 0);
+ assert (k_from <= k_to);
+ assert (k_to <= String.length s);
+
+ scan k_from
+;;
+
+
+(* Create a categorization: *)
+
+let lalpha = [ 'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm';
+ 'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z' ]
+
+let ualpha = [ 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M';
+ 'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z' ]
+
+let digit = [ '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9' ]
+
+let safe = [ '$'; '-'; '_'; '.'; '+' ]
+
+let extra = [ '!'; '*'; '\''; '('; ')'; ',' ]
+
+let make_cats accepted separators =
+ (* create a categorization:
+ * - All characters listed in 'separators' are separators.
+ * - All characters listed in 'accepted' and which do not occur in
+ * 'separators' are accepted characters.
+ * - All other characters are rejected.
+ *)
+ let cats = Array.make 256 Rejected in
+ List.iter
+ (fun c ->
+ cats.(Char.code c) <- Accepted
+ )
+ accepted;
+
+ List.iter
+ (fun c ->
+ cats.(Char.code c) <- Separator
+ )
+ separators;
+ cats
+;;
+
+
+let scheme_cats =
+ make_cats (lalpha @ ualpha @ ['+'; '-'; '.']) [':'] ;;
+
+ (* scheme_cats: character categorization to _extract_ the URL scheme *)
+
+
+let login_cats =
+ make_cats
+ (lalpha @ ualpha @ digit @ safe @ extra @ [';'; '?'; '&'; '='; '%'])
+ [':'; '@'; '/'; '#' ]
+;;
+
+ (* login_cats: character categorization to _extract_ user name, password,
+ * host name, and port.
+ *)
+
+let host_cats =
+ make_cats
+ (lalpha @ ualpha @ digit @ ['.'; '-'])
+ []
+;;
+
+ (* host_cats: character categorization to _check_ whether the host name
+ * is formed only by legal characters.
+ * Especially '%' is not allowed here!
+ *)
+
+let port_cats =
+ make_cats
+ digit
+ []
+;;
+
+ (* port_cats: character categorization to _check_ whether the port number
+ * is formed only by legal characters.
+ * Especially '%' is not allowed here!
+ *)
+
+let path_cats separators =
+ make_cats
+ (lalpha @ ualpha @ digit @ safe @ extra @
+ ['?'; ':'; '@'; '&'; '='; ';'; '%'; '/'; '~'])
+ separators
+;;
+
+
+let separators_from_syntax syn =
+ let include_if syn_option clist =
+ if syn_option <> Url_part_not_recognized then
+ clist
+ else
+ []
+ in
+ (include_if syn.url_enable_param [';']) @
+ (include_if syn.url_enable_query ['?']) @
+ (include_if syn.url_enable_fragment ['#'])
+;;
+
+
+let path_cats_from_syntax syn extraseps =
+ let separators = separators_from_syntax syn in
+ path_cats (separators @ extraseps)
+;;
+
+(* path_cats_from_syntax:
+ * Computes a character categorization to extract the path from an URL.
+ * This depends on the syntax because the list of possible separators
+ * contains the characters that may begin the next URL clause.
+ *
+ * Notes:
+ * - The '#' is rejected unless fragments are enabled.
+ * - The '~' is accepted although this violates RFC 1738.
+ *)
+
+
+let other_cats_from_syntax syn =
+ let include_if syn_option clist =
+ if syn_option <> Url_part_not_recognized then
+ clist
+ else
+ []
+ in
+ let separators =
+ (include_if syn.url_enable_param [';']) @
+ (include_if syn.url_enable_query ['?']) @
+ (include_if syn.url_enable_fragment ['#'])
+ in
+
+ make_cats
+ (lalpha @ ualpha @ digit @ safe @ extra @
+ (separators @ ['?'; ':'; '@'; '&'; '='; ';'; '%'; '/']))
+ []
+;;
+
+ (* other_cats: character categorization to extract or check the
+ * "other" part of the URL.
+ *)
+
+
+
+let extract_url_scheme s =
+ let l = String.length s in
+ let k = scan_url_part s 0 l scheme_cats false in
+ (* or raise Malformed_URL *)
+ if k = l then raise Malformed_URL;
+ assert (s.[k] = ':');
+ String.lowercase(String.sub s 0 k)
+;;
+
+
+let ( => ) a b = not a or b;; (* implication *)
+
+let ( <=> ) (a:bool) b = ( a = b );; (* equivalence *)
+
+let url_syntax_is_valid syn =
+ let recognized x = x <> Url_part_not_recognized in
+ let not_recognized x = x = Url_part_not_recognized in
+ (recognized syn.url_enable_password => recognized syn.url_enable_user) &
+ (recognized syn.url_enable_port => recognized syn.url_enable_host) &
+ (recognized syn.url_enable_user => recognized syn.url_enable_host) &
+ not ( (recognized syn.url_enable_user ||
+ recognized syn.url_enable_password ||
+ recognized syn.url_enable_host ||
+ recognized syn.url_enable_port ||
+ recognized syn.url_enable_path) &&
+ (recognized syn.url_enable_other))
+;;
+
+
+let partial_url_syntax syn =
+ let weaken =
+ function
+ Url_part_not_recognized -> Url_part_not_recognized
+ | Url_part_allowed -> Url_part_allowed
+ | Url_part_required -> Url_part_allowed
+ in
+ { url_enable_scheme = weaken syn.url_enable_scheme;
+ url_enable_user = weaken syn.url_enable_user;
+ url_enable_password = weaken syn.url_enable_password;
+ url_enable_host = weaken syn.url_enable_host;
+ url_enable_port = weaken syn.url_enable_port;
+ url_enable_path = weaken syn.url_enable_path;
+ url_enable_param = weaken syn.url_enable_param;
+ url_enable_query = weaken syn.url_enable_query;
+ url_enable_fragment = weaken syn.url_enable_fragment;
+ url_enable_other = weaken syn.url_enable_other;
+ url_accepts_8bits = syn.url_accepts_8bits;
+ url_is_valid = syn.url_is_valid;
+ }
+;;
+
+
+
+let file_url_syntax =
+ { url_enable_scheme = Url_part_required;
+ url_enable_user = Url_part_not_recognized;
+ url_enable_password = Url_part_not_recognized;
+ url_enable_host = Url_part_allowed;
+ url_enable_port = Url_part_not_recognized;
+ url_enable_path = Url_part_required;
+ url_enable_param = Url_part_not_recognized;
+ url_enable_query = Url_part_not_recognized;
+ url_enable_fragment = Url_part_not_recognized;
+ url_enable_other = Url_part_not_recognized;
+ url_accepts_8bits = false;
+ url_is_valid = (fun _ -> true);
+ }
+;;
+
+
+let ftp_url_syntax =
+ { url_enable_scheme = Url_part_required;
+ url_enable_user = Url_part_allowed;
+ url_enable_password = Url_part_allowed;
+ url_enable_host = Url_part_required;
+ url_enable_port = Url_part_allowed;
+ url_enable_path = Url_part_allowed;
+ url_enable_param = Url_part_allowed;
+ url_enable_query = Url_part_not_recognized;
+ url_enable_fragment = Url_part_not_recognized;
+ url_enable_other = Url_part_not_recognized;
+ url_accepts_8bits = false;
+ url_is_valid = (fun _ -> true);
+ }
+;;
+
+
+let http_url_syntax =
+ { url_enable_scheme = Url_part_required;
+ url_enable_user = Url_part_allowed;
+ url_enable_password = Url_part_allowed;
+ url_enable_host = Url_part_required;
+ url_enable_port = Url_part_allowed;
+ url_enable_path = Url_part_allowed;
+ url_enable_param = Url_part_not_recognized;
+ url_enable_query = Url_part_allowed;
+ url_enable_fragment = Url_part_not_recognized;
+ url_enable_other = Url_part_not_recognized;
+ url_accepts_8bits = false;
+ url_is_valid = (fun _ -> true);
+ }
+;;
+
+
+let mailto_url_syntax =
+ { url_enable_scheme = Url_part_required;
+ url_enable_user = Url_part_not_recognized;
+ url_enable_password = Url_part_not_recognized;
+ url_enable_host = Url_part_not_recognized;
+ url_enable_port = Url_part_not_recognized;
+ url_enable_path = Url_part_not_recognized;
+ url_enable_param = Url_part_not_recognized;
+ url_enable_query = Url_part_not_recognized;
+ url_enable_fragment = Url_part_not_recognized;
+ url_enable_other = Url_part_required;
+ url_accepts_8bits = false;
+ url_is_valid = (fun _ -> true);
+ }
+;;
+
+
+let null_url_syntax =
+ { url_enable_scheme = Url_part_not_recognized;
+ url_enable_user = Url_part_not_recognized;
+ url_enable_password = Url_part_not_recognized;
+ url_enable_host = Url_part_not_recognized;
+ url_enable_port = Url_part_not_recognized;
+ url_enable_path = Url_part_not_recognized;
+ url_enable_param = Url_part_not_recognized;
+ url_enable_query = Url_part_not_recognized;
+ url_enable_fragment = Url_part_not_recognized;
+ url_enable_other = Url_part_not_recognized;
+ url_accepts_8bits = false;
+ url_is_valid = (fun _ -> true);
+ }
+;;
+
+
+let ip_url_syntax =
+ { url_enable_scheme = Url_part_allowed;
+ url_enable_user = Url_part_allowed;
+ url_enable_password = Url_part_allowed;
+ url_enable_host = Url_part_allowed;
+ url_enable_port = Url_part_allowed;
+ url_enable_path = Url_part_allowed;
+ url_enable_param = Url_part_allowed;
+ url_enable_query = Url_part_allowed;
+ url_enable_fragment = Url_part_allowed;
+ url_enable_other = Url_part_not_recognized;
+ url_accepts_8bits = false;
+ url_is_valid = (fun _ -> true);
+ }
+;;
+
+
+let common_url_syntax =
+ let h = Hashtbl.create 10 in
+ Hashtbl.add h "file" file_url_syntax;
+ Hashtbl.add h "ftp" ftp_url_syntax;
+ Hashtbl.add h "http" http_url_syntax;
+ Hashtbl.add h "mailto" mailto_url_syntax;
+ h
+;;
+
+
+let url_conforms_to_syntax url =
+ let recognized x = x <> Url_part_not_recognized in
+ let required x = x = Url_part_required in
+ let present x = x <> None in
+ let syn = url.url_syntax in
+ (present url.url_scheme => recognized syn.url_enable_scheme) &
+ (present url.url_user => recognized syn.url_enable_user) &
+ (present url.url_password => recognized syn.url_enable_password) &
+ (present url.url_host => recognized syn.url_enable_host) &
+ (present url.url_port => recognized syn.url_enable_port) &
+ ((url.url_path <> []) => recognized syn.url_enable_path) &
+ ((url.url_param <> []) => recognized syn.url_enable_param) &
+ (present url.url_query => recognized syn.url_enable_query) &
+ (present url.url_fragment => recognized syn.url_enable_fragment) &
+ (present url.url_other => recognized syn.url_enable_other) &
+ (required syn.url_enable_scheme => present url.url_scheme) &
+ (required syn.url_enable_user => present url.url_user) &
+ (required syn.url_enable_password => present url.url_password) &
+ (required syn.url_enable_host => present url.url_host) &
+ (required syn.url_enable_port => present url.url_port) &
+ (required syn.url_enable_path => (url.url_path <> [])) &
+ (required syn.url_enable_param => (url.url_param <> [])) &
+ (required syn.url_enable_query => present url.url_query) &
+ (required syn.url_enable_fragment => present url.url_fragment) &
+ (required syn.url_enable_other => present url.url_other) &
+ (url.url_validity or syn.url_is_valid url)
+;;
+
+
+let url_syntax_of_url url = url.url_syntax
+;;
+
+
+let modify_url
+ ?syntax
+ ?(encoded = false)
+ ?scheme
+ ?user
+ ?password
+ ?host
+ ?port
+ ?path
+ ?param
+ ?query
+ ?fragment
+ ?other
+ url
+ =
+
+ let encode = Netencoding.Url.encode in
+ let enc x =
+ if encoded then
+ x
+ else
+ match x with
+ None -> None
+ | Some x' -> Some (encode x')
+ in
+ let enc_list l =
+ if encoded then
+ l
+ else
+ List.map encode l
+ in
+
+ let new_syntax =
+ match syntax with
+ None -> url.url_syntax
+ | Some syn -> syn
+ in
+
+ let check_string s_opt cats =
+ match s_opt with
+ None -> ()
+ | Some s ->
+ let l = String.length s in
+ let k = scan_url_part s 0 l cats new_syntax.url_accepts_8bits in
+ (* or raise Malformed_URL *)
+ if k <> l then raise Malformed_URL
+ in
+
+ let check_string_list p cats sep =
+ List.iter
+ (fun p_component ->
+ let l = String.length p_component in
+ let k =
+ scan_url_part p_component 0 l cats new_syntax.url_accepts_8bits in
+ (* or raise Malformed_URL *)
+ if k <> l then raise Malformed_URL;
+ if String.contains p_component sep then raise Malformed_URL;
+ )
+ p
+ in
+
+ (* Create the modified record: *)
+ let url' =
+ {
+ url_syntax = new_syntax;
+ url_validity = false;
+ url_scheme = if scheme = None then url.url_scheme else scheme;
+ url_user = if user = None then url.url_user else enc user;
+ url_password = if password = None then url.url_password else enc password;
+ url_host = if host = None then url.url_host else host;
+ url_port = if port = None then url.url_port else port;
+ url_path = (match path with
+ None -> url.url_path
+ | Some p -> enc_list p);
+ url_param = (match param with
+ None -> url.url_param
+ | Some p -> enc_list p);
+ url_query = if query = None then url.url_query else enc query;
+ url_fragment = if fragment = None then url.url_fragment else enc fragment;
+ url_other = if other = None then url.url_other else enc other;
+ }
+ in
+ (* Check whether the URL conforms to the syntax:
+ *)
+ if not (url_conforms_to_syntax url') then raise Malformed_URL;
+ if url'.url_password <> None && url'.url_user = None then raise Malformed_URL;
+ if url'.url_user <> None && url'.url_host = None then raise Malformed_URL;
+ if url'.url_port <> None && url'.url_host = None then raise Malformed_URL;
+ (* Check every part: *)
+ check_string url'.url_scheme scheme_cats;
+ check_string url'.url_user login_cats;
+ check_string url'.url_password login_cats;
+ check_string url'.url_host host_cats;
+ (match url'.url_port with
+ None -> ()
+ | Some p -> if p < 0 || p > 65535 then raise Malformed_URL
+ );
+ let path_cats = path_cats_from_syntax new_syntax [] in
+ let other_cats = other_cats_from_syntax new_syntax in
+ check_string url'.url_query path_cats;
+ check_string url'.url_fragment path_cats;
+ check_string url'.url_other other_cats;
+ (* Check the lists: *)
+ check_string_list url'.url_param path_cats ';';
+ check_string_list url'.url_path path_cats '/';
+ (* Further path checks: *)
+ begin match url'.url_path with
+ [] ->
+ (* The path is empty: There must not be a 'param' or 'query' *)
+ if url'.url_host <> None then begin
+ if url'.url_param <> [] then raise Malformed_URL;
+ if url'.url_query <> None then raise Malformed_URL;
+ end
+ | ["";""] ->
+ (* This is illegal. *)
+ raise Malformed_URL;
+ | "" :: p' ->
+ (* The path is absolute: always ok *)
+ ()
+ | _ ->
+ (* The path is relative: there must not be a host *)
+ if url'.url_host <> None then raise Malformed_URL;
+ end;
+ begin match url'.url_path with
+ _ :: rest -> (* "//" ambiguity *)
+ begin match List.rev rest with
+ _ :: rest' ->
+ if List.exists (fun p -> p = "") rest' then
+ raise Malformed_URL;
+ | [] ->
+ ()
+ end
+ | [] ->
+ ()
+ end;
+ (* Cache that the URL is valid: *)
+ url'.url_validity <- true;
+
+ url'
+;;
+
+
+let null_url =
+ {
+ url_syntax = null_url_syntax;
+ url_validity = true;
+ url_scheme = None;
+ url_user = None;
+ url_password = None;
+ url_host = None;
+ url_port = None;
+ url_path = [];
+ url_param = [];
+ url_query = None;
+ url_fragment = None;
+ url_other = None;
+ }
+;;
+
+
+let make_url
+ ?(encoded = false)
+ ?scheme
+ ?user
+ ?password
+ ?host
+ ?port
+ ?path
+ ?param
+ ?query
+ ?fragment
+ ?other
+ url_syntax
+ =
+
+ if not (url_syntax_is_valid url_syntax) then
+ invalid_arg "Neturl.make_url";
+
+ modify_url
+ ~encoded:encoded
+ ~syntax:url_syntax
+ ?scheme:scheme
+ ?user:user
+ ?password:password
+ ?host:host
+ ?port:port
+ ?path:path
+ ?param:param
+ ?query:query
+ ?fragment:fragment
+ ?other:other
+ null_url
+;;
+
+
+let remove_from_url
+ ?(scheme = false)
+ ?(user = false)
+ ?(password = false)
+ ?(host = false)
+ ?(port = false)
+ ?(path = false)
+ ?(param = false)
+ ?(query = false)
+ ?(fragment = false)
+ ?(other = false)
+ url
+ =
+
+ make_url
+ ~encoded: true
+ ?scheme: (if scheme then None else url.url_scheme)
+ ?user: (if user then None else url.url_user)
+ ?password: (if password then None else url.url_password)
+ ?host: (if host then None else url.url_host)
+ ?port: (if port then None else url.url_port)
+ ?path: (if path then None else Some url.url_path)
+ ?param: (if param then None else Some url.url_param)
+ ?query: (if query then None else url.url_query)
+ ?fragment: (if fragment then None else url.url_fragment)
+ ?other: (if other then None else url.url_other)
+ url.url_syntax
+;;
+
+
+let default_url
+ ?(encoded = false)
+ ?scheme
+ ?user
+ ?password
+ ?host
+ ?port
+ ?(path = [])
+ ?(param = [])
+ ?query
+ ?fragment
+ ?other
+ url
+ =
+
+ let encode = Netencoding.Url.encode in
+
+ let enc x =
+ if encoded then
+ x
+ else
+ match x with
+ None -> None
+ | Some x' -> Some (encode x')
+ in
+
+ let enc_list l =
+ if encoded then
+ l
+ else
+ List.map encode l
+ in
+
+ let pass_if_missing current arg =
+ match current with
+ None -> arg
+ | _ -> current
+ in
+
+ make_url
+ ~encoded: true
+ ?scheme: (pass_if_missing url.url_scheme scheme)
+ ?user: (pass_if_missing url.url_user (enc user))
+ ?password: (pass_if_missing url.url_password (enc password))
+ ?host: (pass_if_missing url.url_host host)
+ ?port: (pass_if_missing url.url_port port)
+ ~path: (if url.url_path = [] then enc_list path else url.url_path)
+ ~param: (if url.url_param = [] then enc_list param else url.url_param)
+ ?query: (pass_if_missing url.url_query (enc query))
+ ?fragment: (pass_if_missing url.url_fragment (enc fragment))
+ ?other: (pass_if_missing url.url_other (enc other))
+ url.url_syntax
+;;
+
+
+let undefault_url
+ ?scheme
+ ?user
+ ?password
+ ?host
+ ?port
+ ?path
+ ?param
+ ?query
+ ?fragment
+ ?other
+ url
+ =
+
+ let remove_if_matching current arg =
+ match current with
+ None -> None
+ | Some x ->
+ (match arg with
+ None -> current
+ | Some x' ->
+ if x=x' then
+ None
+ else
+ current)
+ in
+
+ make_url
+ ~encoded: true
+ ?scheme: (remove_if_matching url.url_scheme scheme)
+ ?user: (remove_if_matching url.url_user user)
+ ?password: (remove_if_matching url.url_password password)
+ ?host: (remove_if_matching url.url_host host)
+ ?port: (remove_if_matching url.url_port port)
+ ~path: (match path with
+ None -> url.url_path
+ | Some x ->
+ if x = url.url_path then
+ []
+ else
+ url.url_path)
+ ~param: (match param with
+ None -> url.url_param
+ | Some x ->
+ if x = url.url_param then
+ []
+ else
+ url.url_param)
+ ?query: (remove_if_matching url.url_query query)
+ ?fragment: (remove_if_matching url.url_fragment fragment)
+ ?other: (remove_if_matching url.url_other other)
+ url.url_syntax
+;;
+
+
+let url_provides
+ ?(scheme = false)
+ ?(user = false)
+ ?(password = false)
+ ?(host = false)
+ ?(port = false)
+ ?(path = false)
+ ?(param = false)
+ ?(query = false)
+ ?(fragment = false)
+ ?(other = false)
+ url
+ =
+
+ (scheme => (url.url_scheme <> None)) &
+ (user => (url.url_user <> None)) &
+ (password => (url.url_password <> None)) &
+ (host => (url.url_host <> None)) &
+ (port => (url.url_port <> None)) &
+ (path => (url.url_path <> [])) &
+ (param => (url.url_param <> [])) &
+ (query => (url.url_query <> None)) &
+ (fragment => (url.url_fragment <> None)) &
+ (other => (url.url_other <> None))
+;;
+
+
+let return_if value =
+ match value with
+ None -> raise Not_found
+ | Some x -> x
+;;
+
+
+let decode_if want_encoded value =
+ let value' = return_if value in
+ if want_encoded then
+ value'
+ else
+ Netencoding.Url.decode value' (* WARNING: not thread-safe! *)
+;;
+
+
+let decode_path_if want_encoded value =
+ if want_encoded then
+ value
+ else
+ List.map Netencoding.Url.decode value (* WARNING: not thread-safe! *)
+;;
+
+
+let url_scheme url = return_if url.url_scheme;;
+let url_user ?(encoded=false) url = decode_if encoded url.url_user;;
+let url_password ?(encoded=false) url = decode_if encoded url.url_password;;
+let url_host url = return_if url.url_host;;
+let url_port url = return_if url.url_port;;
+let url_path ?(encoded=false) url = decode_path_if encoded url.url_path;;
+let url_param ?(encoded=false) url = decode_path_if encoded url.url_param;;
+let url_query ?(encoded=false) url = decode_if encoded url.url_query;;
+let url_fragment ?(encoded=false) url = decode_if encoded url.url_fragment;;
+let url_other ?(encoded=false) url = decode_if encoded url.url_other;;
+
+
+let string_of_url url =
+ if not (url.url_validity) then
+ failwith "Neturl.string_of_url: URL not flagged as valid";
+ (match url.url_scheme with
+ None -> ""
+ | Some s -> s ^ ":") ^
+ (match url.url_host with
+ None -> ""
+ | Some host ->
+ "//" ^
+ (match url.url_user with
+ None -> ""
+ | Some user ->
+ user ^
+ (match url.url_password with
+ None -> ""
+ | Some password ->
+ ":" ^ password
+ ) ^
+ "@") ^
+ host ^
+ (match url.url_port with
+ None -> ""
+ | Some port ->
+ ":" ^ string_of_int port)) ^
+ (match url.url_path with
+ | [""] ->
+ "/"
+ | x :: p when url.url_scheme = None &&
+ url.url_host = None &&
+ String.contains x ':'
+ ->
+ (* Really a special case: The colon contained in 'x' may cause
+ * that a prefix of 'x' is interpreted as URL scheme. In this
+ * case, "./" is prepended (as recommended in RFC 1808, 5.3).
+ *)
+ "./"
+ | _ ->
+ ""
+ ) ^
+ String.concat "/" url.url_path ^
+ (match url.url_other with
+ None -> ""
+ | Some other ->
+ other) ^
+ String.concat "" (List.map (fun s -> ";" ^ s) url.url_param) ^
+ (match url.url_query with
+ None -> ""
+ | Some query ->
+ "?" ^ query) ^
+ (match url.url_fragment with
+ None -> ""
+ | Some fragment ->
+ "#" ^ fragment)
+;;
+
+
+let url_of_string url_syntax s =
+ let l = String.length s in
+ let recognized x = x <> Url_part_not_recognized in
+
+ let rec collect_words terminators eof_char cats k =
+ (* Collect words as recognized by 'cats', starting at position 'k' in
+ * 's'. Collection stops if one the characters listed in 'terminators'
+ * is found. If the end of the string is reached, it is treated as
+ * 'eof_char'.
+ *)
+ let k' = scan_url_part s k l cats url_syntax.url_accepts_8bits in
+ (* or raise Malformed_URL *)
+ let word, sep =
+ String.sub s k (k'-k), (if k'<l then s.[k'] else eof_char) in
+ if List.mem sep terminators then
+ [word, sep], k'
+ else
+ let word_sep_list', k'' =
+ collect_words terminators eof_char cats (k'+1) in
+ ((word, sep) :: word_sep_list'), k''
+ in
+
+ (* Try to extract the scheme name: *)
+ let scheme, k1 =
+ if recognized url_syntax.url_enable_scheme then
+ try
+ let k = scan_url_part s 0 l scheme_cats false in
+ (* or raise Malformed_URL *)
+ if k = l then raise Malformed_URL;
+ assert (s.[k] = ':');
+ Some (String.sub s 0 k), (k+1)
+ with
+ Malformed_URL -> None, 0
+ else
+ None, 0
+ in
+
+ (* If there is a "//", a host will follow: *)
+ let host, port, user, password, k2 =
+ if recognized url_syntax.url_enable_host &&
+ k1 + 2 <= l && s.[k1]='/' && s.[k1+1]='/' then begin
+
+ let word_sep_list, k' = collect_words [ '/'; '#' ] '/' login_cats (k1+2)
+ in
+ (* or raise Malformed_URL *)
+
+ let int x =
+ try int_of_string x with _ -> raise Malformed_URL in
+
+ match word_sep_list with
+ [ host, ('/'|'#') ] ->
+ Some host, None, None, None, k'
+ | [ host, ':'; port, ('/'|'#') ] ->
+ Some host, Some (int port), None, None, k'
+ | [ user, '@'; host, ('/'|'#') ] ->
+ Some host, None, Some user, None, k'
+ | [ user, '@'; host, ':'; port, ('/'|'#') ] ->
+ Some host, Some (int port), Some user, None, k'
+ | [ user, ':'; password, '@'; host, ('/'|'#') ] ->
+ Some host, None, Some user, Some password, k'
+ | [ user, ':'; password, '@'; host, ':'; port, ('/'|'#') ] ->
+ Some host, Some (int port), Some user, Some password, k'
+ | _ ->
+ raise Malformed_URL
+ end
+ else
+ None, None, None, None, k1
+ in
+
+ let path, k3 =
+ if recognized url_syntax.url_enable_path &&
+ k2 < l (* && s.[k2]='/' *)
+ then begin
+ let cats = path_cats_from_syntax url_syntax [ '/' ] in
+ let seps = separators_from_syntax url_syntax in
+
+ (* Note: '>' is not allowed within URLs; because of this we can use
+ * it as end-of-string character.
+ *)
+
+ let word_sep_list, k' = collect_words ('>'::seps) '>' cats k2 in
+ (* or raise Malformed_URL *)
+ match word_sep_list with
+ [ "", '/'; "", _ ] ->
+ [ "" ], k'
+ | [ "", _ ] ->
+ [], k'
+ | _ ->
+ List.map fst word_sep_list, k'
+ end
+ else begin
+ (* If there is a single '/': skip it *)
+ if not (recognized url_syntax.url_enable_other) &&
+ k2 < l && s.[k2]='/'
+ then
+ [], (k2+1)
+ else
+ [], k2
+ end
+ in
+
+ let other, k4 =
+ if recognized url_syntax.url_enable_other &&
+ k3 < l
+ then begin
+
+ let cats = other_cats_from_syntax url_syntax in
+
+ (* Note: '>' is not allowed within URLs; because of this we can use
+ * it as end-of-string character.
+ *)
+
+ let word_sep_list, k' = collect_words ['>';'#'] '>' cats k3 in
+ (* or raise Malformed_URL *)
+
+ match word_sep_list with
+ [ other, _ ] -> Some other, k'
+ | _ -> assert false
+ end
+ else
+ None, k3
+ in
+
+ let param, k5 =
+ if recognized url_syntax.url_enable_param &&
+ k4 < l && s.[k4]=';'
+ then begin
+ let cats = path_cats_from_syntax url_syntax [] in
+ let seps = separators_from_syntax url_syntax in
+ let seps' = List.filter (fun c -> c <> ';') seps in
+
+ (* Note: '>' is not allowed within URLs; because of this we can use
+ * it as end-of-string character.
+ *)
+
+ let word_sep_list, k' = collect_words ('>'::seps') '>' cats (k4+1) in
+ (* or raise Malformed_URL *)
+
+ List.map fst word_sep_list, k'
+ end
+ else
+ [], k4
+ in
+
+ let query, k6 =
+ if recognized url_syntax.url_enable_query &&
+ k5 < l && s.[k5]='?'
+ then begin
+ let cats = path_cats_from_syntax url_syntax [] in
+ let seps = separators_from_syntax url_syntax in
+
+ (* Note: '>' is not allowed within URLs; because of this we can use
+ * it as end-of-string character.
+ *)
+
+ let word_sep_list, k' = collect_words ('>'::seps) '>' cats (k5+1) in
+ (* or raise Malformed_URL *)
+
+ match word_sep_list with
+ [ query, _ ] -> Some query, k'
+ | _ -> assert false
+ end
+ else
+ None, k5
+ in
+
+ let fragment, k7 =
+ if recognized url_syntax.url_enable_fragment &&
+ k6 < l && s.[k6]='#'
+ then begin
+ let cats = path_cats_from_syntax url_syntax [] in
+ let seps = separators_from_syntax url_syntax in
+
+ (* Note: '>' is not allowed within URLs; because of this we can use
+ * it as end-of-string character.
+ *)
+
+ let word_sep_list, k' = collect_words ('>'::seps) '>' cats (k6+1) in
+ (* or raise Malformed_URL *)
+
+ match word_sep_list with
+ [ fragment, _ ] -> Some fragment, k'
+ | _ -> assert false
+ end
+ else
+ None, k6
+ in
+
+ if k7 <> l then raise Malformed_URL;
+
+ make_url
+ ~encoded:true
+ ?scheme:scheme
+ ?user:user
+ ?password:password
+ ?host:host
+ ?port:port
+ ~path:path
+ ~param:param
+ ?query:query
+ ?fragment:fragment
+ ?other:other
+ url_syntax
+;;
+
+
+let split_path s =
+ let l = String.length s in
+ let rec collect_words k =
+ let k' =
+ try
+ String.index_from s k '/'
+ with
+ Not_found -> l
+ in
+ let word = String.sub s k (k'-k) in
+ if k' >= l then
+ [word]
+ else
+ word :: collect_words (k'+1)
+ in
+ match collect_words 0 with
+ [ "" ] -> []
+ | [ "";"" ] -> [ "" ]
+ | other -> other
+;;
+
+
+let join_path l =
+ match l with
+ [ "" ] -> "/"
+ | _ -> String.concat "/" l;;
+
+
+let norm_path l =
+
+ let rec remove_slash_slash l first =
+ match l with
+ | [ "" ] ->
+ [ "" ]
+ | [ ""; "" ] when first ->
+ [ "" ]
+ | "" :: l' when not first ->
+ remove_slash_slash l' false
+ | x :: l' ->
+ x :: remove_slash_slash l' false
+ | [] ->
+ []
+ in
+
+ let rec remove_dot l first =
+ match l with
+ | ([ "." ] | ["."; ""]) ->
+ if first then [] else [ "" ]
+ | "." :: x :: l' ->
+ remove_dot (x :: l') false
+ | x :: l' ->
+ x :: remove_dot l' false
+ | [] ->
+ []
+ in
+
+ let rec remove_dot_dot_once l first =
+ match l with
+ x :: ".." :: [] when x <> "" && x <> ".." && not first ->
+ [ "" ]
+ | x :: ".." :: l' when x <> "" && x <> ".." ->
+ l'
+ | x :: l' ->
+ x :: remove_dot_dot_once l' false
+ | [] ->
+ raise Not_found
+ in
+
+ let rec remove_dot_dot l =
+ try
+ let l' = remove_dot_dot_once l true in
+ remove_dot_dot l'
+ with
+ Not_found -> l
+ in
+
+ let l' = remove_dot_dot (remove_dot (remove_slash_slash l true) true) in
+ match l' with
+ [".."] -> [".."; ""]
+ | ["";""] -> [ "" ]
+ | _ -> l'
+;;
+
+
+let apply_relative_url baseurl relurl =
+ if not (baseurl.url_validity) or not (relurl.url_validity) then
+ failwith "Neturl.apply_relative_url: URL not flagged as valid";
+
+ if relurl.url_scheme <> None then
+ modify_url
+ ~syntax:baseurl.url_syntax (* inherit syntax *)
+ relurl
+ else
+ if relurl.url_host <> None then
+ modify_url
+ ~syntax:baseurl.url_syntax (* inherit syntax and scheme *)
+ ?scheme:baseurl.url_scheme
+ relurl
+ else
+ match relurl.url_path with
+ "" :: other ->
+ (* An absolute path *)
+ modify_url
+ ~syntax:baseurl.url_syntax (* inherit syntax, scheme, and *)
+ ~encoded:true
+ ?scheme:baseurl.url_scheme (* login info *)
+ ?host:baseurl.url_host
+ ?port:baseurl.url_port
+ ?user:baseurl.url_user
+ ?password:baseurl.url_password
+ relurl
+ | [] ->
+ (* Empty: Inherit also path, params, query, and fragment *)
+ let new_params, new_query, new_fragment =
+ match relurl.url_param, relurl.url_query, relurl.url_fragment
+ with
+ [], None, None ->
+ (* Inherit all three *)
+ baseurl.url_param, baseurl.url_query, baseurl.url_fragment
+ | [], None, f ->
+ (* Inherit params and query *)
+ baseurl.url_param, baseurl.url_query, f
+ | [], q, f ->
+ (* Inherit params *)
+ baseurl.url_param, q, f
+ | p, q, f ->
+ (* Inherit none of them *)
+ p, q, f
+ in
+ modify_url
+ ~syntax:baseurl.url_syntax
+ ~encoded:true
+ ?scheme:baseurl.url_scheme
+ ?host:baseurl.url_host
+ ?port:baseurl.url_port
+ ?user:baseurl.url_user
+ ?password:baseurl.url_password
+ ~path:baseurl.url_path
+ ~param:new_params
+ ?query:new_query
+ ?fragment:new_fragment
+ relurl
+ | relpath ->
+ (* A relative path *)
+ let rec change_path basepath =
+ match basepath with
+ | [] ->
+ relpath
+ | [ x ] ->
+ relpath
+ | x :: basepath' ->
+ x :: change_path basepath'
+ in
+ let new_path = norm_path (change_path baseurl.url_path) in
+ modify_url
+ ~syntax:baseurl.url_syntax (* inherit syntax, scheme, and *)
+ ~encoded:true
+ ?scheme:baseurl.url_scheme (* login info *)
+ ?host:baseurl.url_host
+ ?port:baseurl.url_port
+ ?user:baseurl.url_user
+ ?password:baseurl.url_password
+ ~path:new_path (* and change path *)
+ relurl
+
+;;
+
+
+let print_url url =
+ Format.print_string ("<URL:" ^ string_of_url url ^ ">")
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.4 2000/07/04 21:50:51 gerd
+ * Fixed typo.
+ *
+ * Revision 1.3 2000/06/26 22:57:49 gerd
+ * Change: The record 'url_syntax' has an additional component
+ * 'url_accepts_8bits'. Setting this option to 'true' causes that
+ * the bytes >= 0x80 are no longer rejected.
+ *
+ * Revision 1.2 2000/06/25 19:39:48 gerd
+ * Lots of Bugfixes.
+ *
+ * Revision 1.1 2000/06/24 20:19:59 gerd
+ * Initial revision.
+ *
+ *
+ *)