(* $Id$ * ---------------------------------------------------------------------- * *) exception Malformed_URL type url_syntax_option = Url_part_not_recognized | Url_part_allowed | Url_part_required type url_syntax = { url_enable_scheme : url_syntax_option; url_enable_user : url_syntax_option; url_enable_password : url_syntax_option; url_enable_host : url_syntax_option; url_enable_port : url_syntax_option; url_enable_path : url_syntax_option; url_enable_param : url_syntax_option; url_enable_query : url_syntax_option; url_enable_fragment : url_syntax_option; url_enable_other : url_syntax_option; url_accepts_8bits : bool; url_is_valid : url -> bool; } and url = { url_syntax : url_syntax; mutable url_validity : bool; url_scheme : string option; url_user : string option; url_password : string option; url_host : string option; url_port : int option; url_path : string list; url_param : string list; url_query : string option; url_fragment : string option; url_other : string option; } ;; type char_category = Accepted | Rejected | Separator let scan_url_part s k_from k_to cats accept_8bits = (* Scans the longest word of accepted characters from position 'k_from' * in 's' until at most position 'k_to'. The character following the * word (if any) must be a separator character. * On success, the function returns the position of the last character * of the word + 1. * If there is any rejected character before the separator or the end * of the string (i.e. position 'k_to') is reached, the exception * Malformed_URL is raised. * Furthermore, if the character '%' is accepted it is checked whether * two hexadecimal digits follow (which must be accepted, too). If this * is not true, the exception Malformed_URL is raised, too. * 'cats': contains for every character code (0 to 255) the category * of the character. *) let check_hex c = if cats.( Char.code c ) <> Accepted then raise Malformed_URL; match c with ('0'..'9'|'A'..'F'|'a'..'f') -> () | _ -> raise Malformed_URL in let rec scan k = if k >= k_to then k else begin let c = s.[k] in let cat = cats.(Char.code c) in match cat with Accepted -> if c = '%' then begin if k+2 >= k_to then raise Malformed_URL; let c1 = s.[k+1] in let c2 = s.[k+2] in check_hex c1; check_hex c2; scan (k+3) end else scan (k+1) | Separator -> k | Rejected -> if accept_8bits && c >= '\128' then scan (k+1) else raise Malformed_URL end in assert (Array.length cats = 256); assert (k_from >= 0); assert (k_from <= k_to); assert (k_to <= String.length s); scan k_from ;; (* Create a categorization: *) let lalpha = [ 'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm'; 'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z' ] let ualpha = [ 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M'; 'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z' ] let digit = [ '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9' ] let safe = [ '$'; '-'; '_'; '.'; '+' ] let extra = [ '!'; '*'; '\''; '('; ')'; ',' ] let make_cats accepted separators = (* create a categorization: * - All characters listed in 'separators' are separators. * - All characters listed in 'accepted' and which do not occur in * 'separators' are accepted characters. * - All other characters are rejected. *) let cats = Array.make 256 Rejected in List.iter (fun c -> cats.(Char.code c) <- Accepted ) accepted; List.iter (fun c -> cats.(Char.code c) <- Separator ) separators; cats ;; let scheme_cats = make_cats (lalpha @ ualpha @ ['+'; '-'; '.']) [':'] ;; (* scheme_cats: character categorization to _extract_ the URL scheme *) let login_cats = make_cats (lalpha @ ualpha @ digit @ safe @ extra @ [';'; '?'; '&'; '='; '%']) [':'; '@'; '/'; '#' ] ;; (* login_cats: character categorization to _extract_ user name, password, * host name, and port. *) let host_cats = make_cats (lalpha @ ualpha @ digit @ ['.'; '-']) [] ;; (* host_cats: character categorization to _check_ whether the host name * is formed only by legal characters. * Especially '%' is not allowed here! *) let port_cats = make_cats digit [] ;; (* port_cats: character categorization to _check_ whether the port number * is formed only by legal characters. * Especially '%' is not allowed here! *) let path_cats separators = make_cats (lalpha @ ualpha @ digit @ safe @ extra @ ['?'; ':'; '@'; '&'; '='; ';'; '%'; '/'; '~']) separators ;; let separators_from_syntax syn = let include_if syn_option clist = if syn_option <> Url_part_not_recognized then clist else [] in (include_if syn.url_enable_param [';']) @ (include_if syn.url_enable_query ['?']) @ (include_if syn.url_enable_fragment ['#']) ;; let path_cats_from_syntax syn extraseps = let separators = separators_from_syntax syn in path_cats (separators @ extraseps) ;; (* path_cats_from_syntax: * Computes a character categorization to extract the path from an URL. * This depends on the syntax because the list of possible separators * contains the characters that may begin the next URL clause. * * Notes: * - The '#' is rejected unless fragments are enabled. * - The '~' is accepted although this violates RFC 1738. *) let other_cats_from_syntax syn = let include_if syn_option clist = if syn_option <> Url_part_not_recognized then clist else [] in let separators = (include_if syn.url_enable_param [';']) @ (include_if syn.url_enable_query ['?']) @ (include_if syn.url_enable_fragment ['#']) in make_cats (lalpha @ ualpha @ digit @ safe @ extra @ (separators @ ['?'; ':'; '@'; '&'; '='; ';'; '%'; '/'])) [] ;; (* other_cats: character categorization to extract or check the * "other" part of the URL. *) let extract_url_scheme s = let l = String.length s in let k = scan_url_part s 0 l scheme_cats false in (* or raise Malformed_URL *) if k = l then raise Malformed_URL; assert (s.[k] = ':'); String.lowercase(String.sub s 0 k) ;; let ( => ) a b = not a or b;; (* implication *) let ( <=> ) (a:bool) b = ( a = b );; (* equivalence *) let url_syntax_is_valid syn = let recognized x = x <> Url_part_not_recognized in let not_recognized x = x = Url_part_not_recognized in (recognized syn.url_enable_password => recognized syn.url_enable_user) & (recognized syn.url_enable_port => recognized syn.url_enable_host) & (recognized syn.url_enable_user => recognized syn.url_enable_host) & not ( (recognized syn.url_enable_user || recognized syn.url_enable_password || recognized syn.url_enable_host || recognized syn.url_enable_port || recognized syn.url_enable_path) && (recognized syn.url_enable_other)) ;; let partial_url_syntax syn = let weaken = function Url_part_not_recognized -> Url_part_not_recognized | Url_part_allowed -> Url_part_allowed | Url_part_required -> Url_part_allowed in { url_enable_scheme = weaken syn.url_enable_scheme; url_enable_user = weaken syn.url_enable_user; url_enable_password = weaken syn.url_enable_password; url_enable_host = weaken syn.url_enable_host; url_enable_port = weaken syn.url_enable_port; url_enable_path = weaken syn.url_enable_path; url_enable_param = weaken syn.url_enable_param; url_enable_query = weaken syn.url_enable_query; url_enable_fragment = weaken syn.url_enable_fragment; url_enable_other = weaken syn.url_enable_other; url_accepts_8bits = syn.url_accepts_8bits; url_is_valid = syn.url_is_valid; } ;; let file_url_syntax = { url_enable_scheme = Url_part_required; url_enable_user = Url_part_not_recognized; url_enable_password = Url_part_not_recognized; url_enable_host = Url_part_allowed; url_enable_port = Url_part_not_recognized; url_enable_path = Url_part_required; url_enable_param = Url_part_not_recognized; url_enable_query = Url_part_not_recognized; url_enable_fragment = Url_part_not_recognized; url_enable_other = Url_part_not_recognized; url_accepts_8bits = false; url_is_valid = (fun _ -> true); } ;; let ftp_url_syntax = { url_enable_scheme = Url_part_required; url_enable_user = Url_part_allowed; url_enable_password = Url_part_allowed; url_enable_host = Url_part_required; url_enable_port = Url_part_allowed; url_enable_path = Url_part_allowed; url_enable_param = Url_part_allowed; url_enable_query = Url_part_not_recognized; url_enable_fragment = Url_part_not_recognized; url_enable_other = Url_part_not_recognized; url_accepts_8bits = false; url_is_valid = (fun _ -> true); } ;; let http_url_syntax = { url_enable_scheme = Url_part_required; url_enable_user = Url_part_allowed; url_enable_password = Url_part_allowed; url_enable_host = Url_part_required; url_enable_port = Url_part_allowed; url_enable_path = Url_part_allowed; url_enable_param = Url_part_not_recognized; url_enable_query = Url_part_allowed; url_enable_fragment = Url_part_not_recognized; url_enable_other = Url_part_not_recognized; url_accepts_8bits = false; url_is_valid = (fun _ -> true); } ;; let mailto_url_syntax = { url_enable_scheme = Url_part_required; url_enable_user = Url_part_not_recognized; url_enable_password = Url_part_not_recognized; url_enable_host = Url_part_not_recognized; url_enable_port = Url_part_not_recognized; url_enable_path = Url_part_not_recognized; url_enable_param = Url_part_not_recognized; url_enable_query = Url_part_not_recognized; url_enable_fragment = Url_part_not_recognized; url_enable_other = Url_part_required; url_accepts_8bits = false; url_is_valid = (fun _ -> true); } ;; let null_url_syntax = { url_enable_scheme = Url_part_not_recognized; url_enable_user = Url_part_not_recognized; url_enable_password = Url_part_not_recognized; url_enable_host = Url_part_not_recognized; url_enable_port = Url_part_not_recognized; url_enable_path = Url_part_not_recognized; url_enable_param = Url_part_not_recognized; url_enable_query = Url_part_not_recognized; url_enable_fragment = Url_part_not_recognized; url_enable_other = Url_part_not_recognized; url_accepts_8bits = false; url_is_valid = (fun _ -> true); } ;; let ip_url_syntax = { url_enable_scheme = Url_part_allowed; url_enable_user = Url_part_allowed; url_enable_password = Url_part_allowed; url_enable_host = Url_part_allowed; url_enable_port = Url_part_allowed; url_enable_path = Url_part_allowed; url_enable_param = Url_part_allowed; url_enable_query = Url_part_allowed; url_enable_fragment = Url_part_allowed; url_enable_other = Url_part_not_recognized; url_accepts_8bits = false; url_is_valid = (fun _ -> true); } ;; let common_url_syntax = let h = Hashtbl.create 10 in Hashtbl.add h "file" file_url_syntax; Hashtbl.add h "ftp" ftp_url_syntax; Hashtbl.add h "http" http_url_syntax; Hashtbl.add h "mailto" mailto_url_syntax; h ;; let url_conforms_to_syntax url = let recognized x = x <> Url_part_not_recognized in let required x = x = Url_part_required in let present x = x <> None in let syn = url.url_syntax in (present url.url_scheme => recognized syn.url_enable_scheme) & (present url.url_user => recognized syn.url_enable_user) & (present url.url_password => recognized syn.url_enable_password) & (present url.url_host => recognized syn.url_enable_host) & (present url.url_port => recognized syn.url_enable_port) & ((url.url_path <> []) => recognized syn.url_enable_path) & ((url.url_param <> []) => recognized syn.url_enable_param) & (present url.url_query => recognized syn.url_enable_query) & (present url.url_fragment => recognized syn.url_enable_fragment) & (present url.url_other => recognized syn.url_enable_other) & (required syn.url_enable_scheme => present url.url_scheme) & (required syn.url_enable_user => present url.url_user) & (required syn.url_enable_password => present url.url_password) & (required syn.url_enable_host => present url.url_host) & (required syn.url_enable_port => present url.url_port) & (required syn.url_enable_path => (url.url_path <> [])) & (required syn.url_enable_param => (url.url_param <> [])) & (required syn.url_enable_query => present url.url_query) & (required syn.url_enable_fragment => present url.url_fragment) & (required syn.url_enable_other => present url.url_other) & (url.url_validity or syn.url_is_valid url) ;; let url_syntax_of_url url = url.url_syntax ;; let modify_url ?syntax ?(encoded = false) ?scheme ?user ?password ?host ?port ?path ?param ?query ?fragment ?other url = let encode = Netencoding.Url.encode in let enc x = if encoded then x else match x with None -> None | Some x' -> Some (encode x') in let enc_list l = if encoded then l else List.map encode l in let new_syntax = match syntax with None -> url.url_syntax | Some syn -> syn in let check_string s_opt cats = match s_opt with None -> () | Some s -> let l = String.length s in let k = scan_url_part s 0 l cats new_syntax.url_accepts_8bits in (* or raise Malformed_URL *) if k <> l then raise Malformed_URL in let check_string_list p cats sep = List.iter (fun p_component -> let l = String.length p_component in let k = scan_url_part p_component 0 l cats new_syntax.url_accepts_8bits in (* or raise Malformed_URL *) if k <> l then raise Malformed_URL; if String.contains p_component sep then raise Malformed_URL; ) p in (* Create the modified record: *) let url' = { url_syntax = new_syntax; url_validity = false; url_scheme = if scheme = None then url.url_scheme else scheme; url_user = if user = None then url.url_user else enc user; url_password = if password = None then url.url_password else enc password; url_host = if host = None then url.url_host else host; url_port = if port = None then url.url_port else port; url_path = (match path with None -> url.url_path | Some p -> enc_list p); url_param = (match param with None -> url.url_param | Some p -> enc_list p); url_query = if query = None then url.url_query else enc query; url_fragment = if fragment = None then url.url_fragment else enc fragment; url_other = if other = None then url.url_other else enc other; } in (* Check whether the URL conforms to the syntax: *) if not (url_conforms_to_syntax url') then raise Malformed_URL; if url'.url_password <> None && url'.url_user = None then raise Malformed_URL; if url'.url_user <> None && url'.url_host = None then raise Malformed_URL; if url'.url_port <> None && url'.url_host = None then raise Malformed_URL; (* Check every part: *) check_string url'.url_scheme scheme_cats; check_string url'.url_user login_cats; check_string url'.url_password login_cats; check_string url'.url_host host_cats; (match url'.url_port with None -> () | Some p -> if p < 0 || p > 65535 then raise Malformed_URL ); let path_cats = path_cats_from_syntax new_syntax [] in let other_cats = other_cats_from_syntax new_syntax in check_string url'.url_query path_cats; check_string url'.url_fragment path_cats; check_string url'.url_other other_cats; (* Check the lists: *) check_string_list url'.url_param path_cats ';'; check_string_list url'.url_path path_cats '/'; (* Further path checks: *) begin match url'.url_path with [] -> (* The path is empty: There must not be a 'param' or 'query' *) if url'.url_host <> None then begin if url'.url_param <> [] then raise Malformed_URL; if url'.url_query <> None then raise Malformed_URL; end | ["";""] -> (* This is illegal. *) raise Malformed_URL; | "" :: p' -> (* The path is absolute: always ok *) () | _ -> (* The path is relative: there must not be a host *) if url'.url_host <> None then raise Malformed_URL; end; begin match url'.url_path with _ :: rest -> (* "//" ambiguity *) begin match List.rev rest with _ :: rest' -> if List.exists (fun p -> p = "") rest' then raise Malformed_URL; | [] -> () end | [] -> () end; (* Cache that the URL is valid: *) url'.url_validity <- true; url' ;; let null_url = { url_syntax = null_url_syntax; url_validity = true; url_scheme = None; url_user = None; url_password = None; url_host = None; url_port = None; url_path = []; url_param = []; url_query = None; url_fragment = None; url_other = None; } ;; let make_url ?(encoded = false) ?scheme ?user ?password ?host ?port ?path ?param ?query ?fragment ?other url_syntax = if not (url_syntax_is_valid url_syntax) then invalid_arg "Neturl.make_url"; modify_url ~encoded:encoded ~syntax:url_syntax ?scheme:scheme ?user:user ?password:password ?host:host ?port:port ?path:path ?param:param ?query:query ?fragment:fragment ?other:other null_url ;; let remove_from_url ?(scheme = false) ?(user = false) ?(password = false) ?(host = false) ?(port = false) ?(path = false) ?(param = false) ?(query = false) ?(fragment = false) ?(other = false) url = make_url ~encoded: true ?scheme: (if scheme then None else url.url_scheme) ?user: (if user then None else url.url_user) ?password: (if password then None else url.url_password) ?host: (if host then None else url.url_host) ?port: (if port then None else url.url_port) ?path: (if path then None else Some url.url_path) ?param: (if param then None else Some url.url_param) ?query: (if query then None else url.url_query) ?fragment: (if fragment then None else url.url_fragment) ?other: (if other then None else url.url_other) url.url_syntax ;; let default_url ?(encoded = false) ?scheme ?user ?password ?host ?port ?(path = []) ?(param = []) ?query ?fragment ?other url = let encode = Netencoding.Url.encode in let enc x = if encoded then x else match x with None -> None | Some x' -> Some (encode x') in let enc_list l = if encoded then l else List.map encode l in let pass_if_missing current arg = match current with None -> arg | _ -> current in make_url ~encoded: true ?scheme: (pass_if_missing url.url_scheme scheme) ?user: (pass_if_missing url.url_user (enc user)) ?password: (pass_if_missing url.url_password (enc password)) ?host: (pass_if_missing url.url_host host) ?port: (pass_if_missing url.url_port port) ~path: (if url.url_path = [] then enc_list path else url.url_path) ~param: (if url.url_param = [] then enc_list param else url.url_param) ?query: (pass_if_missing url.url_query (enc query)) ?fragment: (pass_if_missing url.url_fragment (enc fragment)) ?other: (pass_if_missing url.url_other (enc other)) url.url_syntax ;; let undefault_url ?scheme ?user ?password ?host ?port ?path ?param ?query ?fragment ?other url = let remove_if_matching current arg = match current with None -> None | Some x -> (match arg with None -> current | Some x' -> if x=x' then None else current) in make_url ~encoded: true ?scheme: (remove_if_matching url.url_scheme scheme) ?user: (remove_if_matching url.url_user user) ?password: (remove_if_matching url.url_password password) ?host: (remove_if_matching url.url_host host) ?port: (remove_if_matching url.url_port port) ~path: (match path with None -> url.url_path | Some x -> if x = url.url_path then [] else url.url_path) ~param: (match param with None -> url.url_param | Some x -> if x = url.url_param then [] else url.url_param) ?query: (remove_if_matching url.url_query query) ?fragment: (remove_if_matching url.url_fragment fragment) ?other: (remove_if_matching url.url_other other) url.url_syntax ;; let url_provides ?(scheme = false) ?(user = false) ?(password = false) ?(host = false) ?(port = false) ?(path = false) ?(param = false) ?(query = false) ?(fragment = false) ?(other = false) url = (scheme => (url.url_scheme <> None)) & (user => (url.url_user <> None)) & (password => (url.url_password <> None)) & (host => (url.url_host <> None)) & (port => (url.url_port <> None)) & (path => (url.url_path <> [])) & (param => (url.url_param <> [])) & (query => (url.url_query <> None)) & (fragment => (url.url_fragment <> None)) & (other => (url.url_other <> None)) ;; let return_if value = match value with None -> raise Not_found | Some x -> x ;; let decode_if want_encoded value = let value' = return_if value in if want_encoded then value' else Netencoding.Url.decode value' (* WARNING: not thread-safe! *) ;; let decode_path_if want_encoded value = if want_encoded then value else List.map Netencoding.Url.decode value (* WARNING: not thread-safe! *) ;; let url_scheme url = return_if url.url_scheme;; let url_user ?(encoded=false) url = decode_if encoded url.url_user;; let url_password ?(encoded=false) url = decode_if encoded url.url_password;; let url_host url = return_if url.url_host;; let url_port url = return_if url.url_port;; let url_path ?(encoded=false) url = decode_path_if encoded url.url_path;; let url_param ?(encoded=false) url = decode_path_if encoded url.url_param;; let url_query ?(encoded=false) url = decode_if encoded url.url_query;; let url_fragment ?(encoded=false) url = decode_if encoded url.url_fragment;; let url_other ?(encoded=false) url = decode_if encoded url.url_other;; let string_of_url url = if not (url.url_validity) then failwith "Neturl.string_of_url: URL not flagged as valid"; (match url.url_scheme with None -> "" | Some s -> s ^ ":") ^ (match url.url_host with None -> "" | Some host -> "//" ^ (match url.url_user with None -> "" | Some user -> user ^ (match url.url_password with None -> "" | Some password -> ":" ^ password ) ^ "@") ^ host ^ (match url.url_port with None -> "" | Some port -> ":" ^ string_of_int port)) ^ (match url.url_path with | [""] -> "/" | x :: p when url.url_scheme = None && url.url_host = None && String.contains x ':' -> (* Really a special case: The colon contained in 'x' may cause * that a prefix of 'x' is interpreted as URL scheme. In this * case, "./" is prepended (as recommended in RFC 1808, 5.3). *) "./" | _ -> "" ) ^ String.concat "/" url.url_path ^ (match url.url_other with None -> "" | Some other -> other) ^ String.concat "" (List.map (fun s -> ";" ^ s) url.url_param) ^ (match url.url_query with None -> "" | Some query -> "?" ^ query) ^ (match url.url_fragment with None -> "" | Some fragment -> "#" ^ fragment) ;; let url_of_string url_syntax s = let l = String.length s in let recognized x = x <> Url_part_not_recognized in let rec collect_words terminators eof_char cats k = (* Collect words as recognized by 'cats', starting at position 'k' in * 's'. Collection stops if one the characters listed in 'terminators' * is found. If the end of the string is reached, it is treated as * 'eof_char'. *) let k' = scan_url_part s k l cats url_syntax.url_accepts_8bits in (* or raise Malformed_URL *) let word, sep = String.sub s k (k'-k), (if k' None, 0 else None, 0 in (* If there is a "//", a host will follow: *) let host, port, user, password, k2 = if recognized url_syntax.url_enable_host && k1 + 2 <= l && s.[k1]='/' && s.[k1+1]='/' then begin let word_sep_list, k' = collect_words [ '/'; '#' ] '/' login_cats (k1+2) in (* or raise Malformed_URL *) let int x = try int_of_string x with _ -> raise Malformed_URL in match word_sep_list with [ host, ('/'|'#') ] -> Some host, None, None, None, k' | [ host, ':'; port, ('/'|'#') ] -> Some host, Some (int port), None, None, k' | [ user, '@'; host, ('/'|'#') ] -> Some host, None, Some user, None, k' | [ user, '@'; host, ':'; port, ('/'|'#') ] -> Some host, Some (int port), Some user, None, k' | [ user, ':'; password, '@'; host, ('/'|'#') ] -> Some host, None, Some user, Some password, k' | [ user, ':'; password, '@'; host, ':'; port, ('/'|'#') ] -> Some host, Some (int port), Some user, Some password, k' | _ -> raise Malformed_URL end else None, None, None, None, k1 in let path, k3 = if recognized url_syntax.url_enable_path && k2 < l (* && s.[k2]='/' *) then begin let cats = path_cats_from_syntax url_syntax [ '/' ] in let seps = separators_from_syntax url_syntax in (* Note: '>' is not allowed within URLs; because of this we can use * it as end-of-string character. *) let word_sep_list, k' = collect_words ('>'::seps) '>' cats k2 in (* or raise Malformed_URL *) match word_sep_list with [ "", '/'; "", _ ] -> [ "" ], k' | [ "", _ ] -> [], k' | _ -> List.map fst word_sep_list, k' end else begin (* If there is a single '/': skip it *) if not (recognized url_syntax.url_enable_other) && k2 < l && s.[k2]='/' then [], (k2+1) else [], k2 end in let other, k4 = if recognized url_syntax.url_enable_other && k3 < l then begin let cats = other_cats_from_syntax url_syntax in (* Note: '>' is not allowed within URLs; because of this we can use * it as end-of-string character. *) let word_sep_list, k' = collect_words ['>';'#'] '>' cats k3 in (* or raise Malformed_URL *) match word_sep_list with [ other, _ ] -> Some other, k' | _ -> assert false end else None, k3 in let param, k5 = if recognized url_syntax.url_enable_param && k4 < l && s.[k4]=';' then begin let cats = path_cats_from_syntax url_syntax [] in let seps = separators_from_syntax url_syntax in let seps' = List.filter (fun c -> c <> ';') seps in (* Note: '>' is not allowed within URLs; because of this we can use * it as end-of-string character. *) let word_sep_list, k' = collect_words ('>'::seps') '>' cats (k4+1) in (* or raise Malformed_URL *) List.map fst word_sep_list, k' end else [], k4 in let query, k6 = if recognized url_syntax.url_enable_query && k5 < l && s.[k5]='?' then begin let cats = path_cats_from_syntax url_syntax [] in let seps = separators_from_syntax url_syntax in (* Note: '>' is not allowed within URLs; because of this we can use * it as end-of-string character. *) let word_sep_list, k' = collect_words ('>'::seps) '>' cats (k5+1) in (* or raise Malformed_URL *) match word_sep_list with [ query, _ ] -> Some query, k' | _ -> assert false end else None, k5 in let fragment, k7 = if recognized url_syntax.url_enable_fragment && k6 < l && s.[k6]='#' then begin let cats = path_cats_from_syntax url_syntax [] in let seps = separators_from_syntax url_syntax in (* Note: '>' is not allowed within URLs; because of this we can use * it as end-of-string character. *) let word_sep_list, k' = collect_words ('>'::seps) '>' cats (k6+1) in (* or raise Malformed_URL *) match word_sep_list with [ fragment, _ ] -> Some fragment, k' | _ -> assert false end else None, k6 in if k7 <> l then raise Malformed_URL; make_url ~encoded:true ?scheme:scheme ?user:user ?password:password ?host:host ?port:port ~path:path ~param:param ?query:query ?fragment:fragment ?other:other url_syntax ;; let split_path s = let l = String.length s in let rec collect_words k = let k' = try String.index_from s k '/' with Not_found -> l in let word = String.sub s k (k'-k) in if k' >= l then [word] else word :: collect_words (k'+1) in match collect_words 0 with [ "" ] -> [] | [ "";"" ] -> [ "" ] | other -> other ;; let join_path l = match l with [ "" ] -> "/" | _ -> String.concat "/" l;; let norm_path l = let rec remove_slash_slash l first = match l with | [ "" ] -> [ "" ] | [ ""; "" ] when first -> [ "" ] | "" :: l' when not first -> remove_slash_slash l' false | x :: l' -> x :: remove_slash_slash l' false | [] -> [] in let rec remove_dot l first = match l with | ([ "." ] | ["."; ""]) -> if first then [] else [ "" ] | "." :: x :: l' -> remove_dot (x :: l') false | x :: l' -> x :: remove_dot l' false | [] -> [] in let rec remove_dot_dot_once l first = match l with x :: ".." :: [] when x <> "" && x <> ".." && not first -> [ "" ] | x :: ".." :: l' when x <> "" && x <> ".." -> l' | x :: l' -> x :: remove_dot_dot_once l' false | [] -> raise Not_found in let rec remove_dot_dot l = try let l' = remove_dot_dot_once l true in remove_dot_dot l' with Not_found -> l in let l' = remove_dot_dot (remove_dot (remove_slash_slash l true) true) in match l' with [".."] -> [".."; ""] | ["";""] -> [ "" ] | _ -> l' ;; let apply_relative_url baseurl relurl = if not (baseurl.url_validity) or not (relurl.url_validity) then failwith "Neturl.apply_relative_url: URL not flagged as valid"; if relurl.url_scheme <> None then modify_url ~syntax:baseurl.url_syntax (* inherit syntax *) relurl else if relurl.url_host <> None then modify_url ~syntax:baseurl.url_syntax (* inherit syntax and scheme *) ?scheme:baseurl.url_scheme relurl else match relurl.url_path with "" :: other -> (* An absolute path *) modify_url ~syntax:baseurl.url_syntax (* inherit syntax, scheme, and *) ~encoded:true ?scheme:baseurl.url_scheme (* login info *) ?host:baseurl.url_host ?port:baseurl.url_port ?user:baseurl.url_user ?password:baseurl.url_password relurl | [] -> (* Empty: Inherit also path, params, query, and fragment *) let new_params, new_query, new_fragment = match relurl.url_param, relurl.url_query, relurl.url_fragment with [], None, None -> (* Inherit all three *) baseurl.url_param, baseurl.url_query, baseurl.url_fragment | [], None, f -> (* Inherit params and query *) baseurl.url_param, baseurl.url_query, f | [], q, f -> (* Inherit params *) baseurl.url_param, q, f | p, q, f -> (* Inherit none of them *) p, q, f in modify_url ~syntax:baseurl.url_syntax ~encoded:true ?scheme:baseurl.url_scheme ?host:baseurl.url_host ?port:baseurl.url_port ?user:baseurl.url_user ?password:baseurl.url_password ~path:baseurl.url_path ~param:new_params ?query:new_query ?fragment:new_fragment relurl | relpath -> (* A relative path *) let rec change_path basepath = match basepath with | [] -> relpath | [ x ] -> relpath | x :: basepath' -> x :: change_path basepath' in let new_path = norm_path (change_path baseurl.url_path) in modify_url ~syntax:baseurl.url_syntax (* inherit syntax, scheme, and *) ~encoded:true ?scheme:baseurl.url_scheme (* login info *) ?host:baseurl.url_host ?port:baseurl.url_port ?user:baseurl.url_user ?password:baseurl.url_password ~path:new_path (* and change path *) relurl ;; let print_url url = Format.print_string ("") ;; (* ====================================================================== * History: * * $Log$ * Revision 1.1 2000/11/17 09:57:28 lpadovan * Initial revision * * Revision 1.4 2000/07/04 21:50:51 gerd * Fixed typo. * * Revision 1.3 2000/06/26 22:57:49 gerd * Change: The record 'url_syntax' has an additional component * 'url_accepts_8bits'. Setting this option to 'true' causes that * the bytes >= 0x80 are no longer rejected. * * Revision 1.2 2000/06/25 19:39:48 gerd * Lots of Bugfixes. * * Revision 1.1 2000/06/24 20:19:59 gerd * Initial revision. * * *)