X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2FDEVEL%2Fpxp%2Fnetstring%2Fneturl.ml;fp=helm%2FDEVEL%2Fpxp%2Fnetstring%2Fneturl.ml;h=0000000000000000000000000000000000000000;hb=e108abe5c0b4eb841c4ad332229a6c0e57e70079;hp=f597b0c1d9ea6145f295eca4da3e6ae1adddcc26;hpb=1456c337a60f6677ee742ff7891d43fc382359a9;p=helm.git diff --git a/helm/DEVEL/pxp/netstring/neturl.ml b/helm/DEVEL/pxp/netstring/neturl.ml deleted file mode 100644 index f597b0c1d..000000000 --- a/helm/DEVEL/pxp/netstring/neturl.ml +++ /dev/null @@ -1,1302 +0,0 @@ -(* $Id$ - * ---------------------------------------------------------------------- - * - *) - -exception Malformed_URL - -type url_syntax_option = - Url_part_not_recognized - | Url_part_allowed - | Url_part_required - - -type url_syntax = - { url_enable_scheme : url_syntax_option; - url_enable_user : url_syntax_option; - url_enable_password : url_syntax_option; - url_enable_host : url_syntax_option; - url_enable_port : url_syntax_option; - url_enable_path : url_syntax_option; - url_enable_param : url_syntax_option; - url_enable_query : url_syntax_option; - url_enable_fragment : url_syntax_option; - url_enable_other : url_syntax_option; - url_accepts_8bits : bool; - url_is_valid : url -> bool; - } - -and url = - { - url_syntax : url_syntax; - mutable url_validity : bool; - url_scheme : string option; - url_user : string option; - url_password : string option; - url_host : string option; - url_port : int option; - url_path : string list; - url_param : string list; - url_query : string option; - url_fragment : string option; - url_other : string option; - } -;; - - -type char_category = - Accepted - | Rejected - | Separator - - - -let scan_url_part s k_from k_to cats accept_8bits = - (* Scans the longest word of accepted characters from position 'k_from' - * in 's' until at most position 'k_to'. The character following the - * word (if any) must be a separator character. - * On success, the function returns the position of the last character - * of the word + 1. - * If there is any rejected character before the separator or the end - * of the string (i.e. position 'k_to') is reached, the exception - * Malformed_URL is raised. - * Furthermore, if the character '%' is accepted it is checked whether - * two hexadecimal digits follow (which must be accepted, too). If this - * is not true, the exception Malformed_URL is raised, too. - * 'cats': contains for every character code (0 to 255) the category - * of the character. - *) - let check_hex c = - if cats.( Char.code c ) <> Accepted then raise Malformed_URL; - match c with - ('0'..'9'|'A'..'F'|'a'..'f') -> () - | _ -> raise Malformed_URL - in - - let rec scan k = - if k >= k_to then - k - else begin - let c = s.[k] in - let cat = cats.(Char.code c) in - match cat with - Accepted -> - if c = '%' then begin - if k+2 >= k_to then raise Malformed_URL; - let c1 = s.[k+1] in - let c2 = s.[k+2] in - check_hex c1; - check_hex c2; - scan (k+3) - end - else - scan (k+1) - | Separator -> k - | Rejected -> - if accept_8bits && c >= '\128' - then scan (k+1) - else raise Malformed_URL - end - in - - assert (Array.length cats = 256); - assert (k_from >= 0); - assert (k_from <= k_to); - assert (k_to <= String.length s); - - scan k_from -;; - - -(* Create a categorization: *) - -let lalpha = [ 'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm'; - 'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z' ] - -let ualpha = [ 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M'; - 'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z' ] - -let digit = [ '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9' ] - -let safe = [ '$'; '-'; '_'; '.'; '+' ] - -let extra = [ '!'; '*'; '\''; '('; ')'; ',' ] - -let make_cats accepted separators = - (* create a categorization: - * - All characters listed in 'separators' are separators. - * - All characters listed in 'accepted' and which do not occur in - * 'separators' are accepted characters. - * - All other characters are rejected. - *) - let cats = Array.make 256 Rejected in - List.iter - (fun c -> - cats.(Char.code c) <- Accepted - ) - accepted; - - List.iter - (fun c -> - cats.(Char.code c) <- Separator - ) - separators; - cats -;; - - -let scheme_cats = - make_cats (lalpha @ ualpha @ ['+'; '-'; '.']) [':'] ;; - - (* scheme_cats: character categorization to _extract_ the URL scheme *) - - -let login_cats = - make_cats - (lalpha @ ualpha @ digit @ safe @ extra @ [';'; '?'; '&'; '='; '%']) - [':'; '@'; '/'; '#' ] -;; - - (* login_cats: character categorization to _extract_ user name, password, - * host name, and port. - *) - -let host_cats = - make_cats - (lalpha @ ualpha @ digit @ ['.'; '-']) - [] -;; - - (* host_cats: character categorization to _check_ whether the host name - * is formed only by legal characters. - * Especially '%' is not allowed here! - *) - -let port_cats = - make_cats - digit - [] -;; - - (* port_cats: character categorization to _check_ whether the port number - * is formed only by legal characters. - * Especially '%' is not allowed here! - *) - -let path_cats separators = - make_cats - (lalpha @ ualpha @ digit @ safe @ extra @ - ['?'; ':'; '@'; '&'; '='; ';'; '%'; '/'; '~']) - separators -;; - - -let separators_from_syntax syn = - let include_if syn_option clist = - if syn_option <> Url_part_not_recognized then - clist - else - [] - in - (include_if syn.url_enable_param [';']) @ - (include_if syn.url_enable_query ['?']) @ - (include_if syn.url_enable_fragment ['#']) -;; - - -let path_cats_from_syntax syn extraseps = - let separators = separators_from_syntax syn in - path_cats (separators @ extraseps) -;; - -(* path_cats_from_syntax: - * Computes a character categorization to extract the path from an URL. - * This depends on the syntax because the list of possible separators - * contains the characters that may begin the next URL clause. - * - * Notes: - * - The '#' is rejected unless fragments are enabled. - * - The '~' is accepted although this violates RFC 1738. - *) - - -let other_cats_from_syntax syn = - let include_if syn_option clist = - if syn_option <> Url_part_not_recognized then - clist - else - [] - in - let separators = - (include_if syn.url_enable_param [';']) @ - (include_if syn.url_enable_query ['?']) @ - (include_if syn.url_enable_fragment ['#']) - in - - make_cats - (lalpha @ ualpha @ digit @ safe @ extra @ - (separators @ ['?'; ':'; '@'; '&'; '='; ';'; '%'; '/'])) - [] -;; - - (* other_cats: character categorization to extract or check the - * "other" part of the URL. - *) - - - -let extract_url_scheme s = - let l = String.length s in - let k = scan_url_part s 0 l scheme_cats false in - (* or raise Malformed_URL *) - if k = l then raise Malformed_URL; - assert (s.[k] = ':'); - String.lowercase(String.sub s 0 k) -;; - - -let ( => ) a b = not a or b;; (* implication *) - -let ( <=> ) (a:bool) b = ( a = b );; (* equivalence *) - -let url_syntax_is_valid syn = - let recognized x = x <> Url_part_not_recognized in - let not_recognized x = x = Url_part_not_recognized in - (recognized syn.url_enable_password => recognized syn.url_enable_user) & - (recognized syn.url_enable_port => recognized syn.url_enable_host) & - (recognized syn.url_enable_user => recognized syn.url_enable_host) & - not ( (recognized syn.url_enable_user || - recognized syn.url_enable_password || - recognized syn.url_enable_host || - recognized syn.url_enable_port || - recognized syn.url_enable_path) && - (recognized syn.url_enable_other)) -;; - - -let partial_url_syntax syn = - let weaken = - function - Url_part_not_recognized -> Url_part_not_recognized - | Url_part_allowed -> Url_part_allowed - | Url_part_required -> Url_part_allowed - in - { url_enable_scheme = weaken syn.url_enable_scheme; - url_enable_user = weaken syn.url_enable_user; - url_enable_password = weaken syn.url_enable_password; - url_enable_host = weaken syn.url_enable_host; - url_enable_port = weaken syn.url_enable_port; - url_enable_path = weaken syn.url_enable_path; - url_enable_param = weaken syn.url_enable_param; - url_enable_query = weaken syn.url_enable_query; - url_enable_fragment = weaken syn.url_enable_fragment; - url_enable_other = weaken syn.url_enable_other; - url_accepts_8bits = syn.url_accepts_8bits; - url_is_valid = syn.url_is_valid; - } -;; - - - -let file_url_syntax = - { url_enable_scheme = Url_part_required; - url_enable_user = Url_part_not_recognized; - url_enable_password = Url_part_not_recognized; - url_enable_host = Url_part_allowed; - url_enable_port = Url_part_not_recognized; - url_enable_path = Url_part_required; - url_enable_param = Url_part_not_recognized; - url_enable_query = Url_part_not_recognized; - url_enable_fragment = Url_part_not_recognized; - url_enable_other = Url_part_not_recognized; - url_accepts_8bits = false; - url_is_valid = (fun _ -> true); - } -;; - - -let ftp_url_syntax = - { url_enable_scheme = Url_part_required; - url_enable_user = Url_part_allowed; - url_enable_password = Url_part_allowed; - url_enable_host = Url_part_required; - url_enable_port = Url_part_allowed; - url_enable_path = Url_part_allowed; - url_enable_param = Url_part_allowed; - url_enable_query = Url_part_not_recognized; - url_enable_fragment = Url_part_not_recognized; - url_enable_other = Url_part_not_recognized; - url_accepts_8bits = false; - url_is_valid = (fun _ -> true); - } -;; - - -let http_url_syntax = - { url_enable_scheme = Url_part_required; - url_enable_user = Url_part_allowed; - url_enable_password = Url_part_allowed; - url_enable_host = Url_part_required; - url_enable_port = Url_part_allowed; - url_enable_path = Url_part_allowed; - url_enable_param = Url_part_not_recognized; - url_enable_query = Url_part_allowed; - url_enable_fragment = Url_part_not_recognized; - url_enable_other = Url_part_not_recognized; - url_accepts_8bits = false; - url_is_valid = (fun _ -> true); - } -;; - - -let mailto_url_syntax = - { url_enable_scheme = Url_part_required; - url_enable_user = Url_part_not_recognized; - url_enable_password = Url_part_not_recognized; - url_enable_host = Url_part_not_recognized; - url_enable_port = Url_part_not_recognized; - url_enable_path = Url_part_not_recognized; - url_enable_param = Url_part_not_recognized; - url_enable_query = Url_part_not_recognized; - url_enable_fragment = Url_part_not_recognized; - url_enable_other = Url_part_required; - url_accepts_8bits = false; - url_is_valid = (fun _ -> true); - } -;; - - -let null_url_syntax = - { url_enable_scheme = Url_part_not_recognized; - url_enable_user = Url_part_not_recognized; - url_enable_password = Url_part_not_recognized; - url_enable_host = Url_part_not_recognized; - url_enable_port = Url_part_not_recognized; - url_enable_path = Url_part_not_recognized; - url_enable_param = Url_part_not_recognized; - url_enable_query = Url_part_not_recognized; - url_enable_fragment = Url_part_not_recognized; - url_enable_other = Url_part_not_recognized; - url_accepts_8bits = false; - url_is_valid = (fun _ -> true); - } -;; - - -let ip_url_syntax = - { url_enable_scheme = Url_part_allowed; - url_enable_user = Url_part_allowed; - url_enable_password = Url_part_allowed; - url_enable_host = Url_part_allowed; - url_enable_port = Url_part_allowed; - url_enable_path = Url_part_allowed; - url_enable_param = Url_part_allowed; - url_enable_query = Url_part_allowed; - url_enable_fragment = Url_part_allowed; - url_enable_other = Url_part_not_recognized; - url_accepts_8bits = false; - url_is_valid = (fun _ -> true); - } -;; - - -let common_url_syntax = - let h = Hashtbl.create 10 in - Hashtbl.add h "file" file_url_syntax; - Hashtbl.add h "ftp" ftp_url_syntax; - Hashtbl.add h "http" http_url_syntax; - Hashtbl.add h "mailto" mailto_url_syntax; - h -;; - - -let url_conforms_to_syntax url = - let recognized x = x <> Url_part_not_recognized in - let required x = x = Url_part_required in - let present x = x <> None in - let syn = url.url_syntax in - (present url.url_scheme => recognized syn.url_enable_scheme) & - (present url.url_user => recognized syn.url_enable_user) & - (present url.url_password => recognized syn.url_enable_password) & - (present url.url_host => recognized syn.url_enable_host) & - (present url.url_port => recognized syn.url_enable_port) & - ((url.url_path <> []) => recognized syn.url_enable_path) & - ((url.url_param <> []) => recognized syn.url_enable_param) & - (present url.url_query => recognized syn.url_enable_query) & - (present url.url_fragment => recognized syn.url_enable_fragment) & - (present url.url_other => recognized syn.url_enable_other) & - (required syn.url_enable_scheme => present url.url_scheme) & - (required syn.url_enable_user => present url.url_user) & - (required syn.url_enable_password => present url.url_password) & - (required syn.url_enable_host => present url.url_host) & - (required syn.url_enable_port => present url.url_port) & - (required syn.url_enable_path => (url.url_path <> [])) & - (required syn.url_enable_param => (url.url_param <> [])) & - (required syn.url_enable_query => present url.url_query) & - (required syn.url_enable_fragment => present url.url_fragment) & - (required syn.url_enable_other => present url.url_other) & - (url.url_validity or syn.url_is_valid url) -;; - - -let url_syntax_of_url url = url.url_syntax -;; - - -let modify_url - ?syntax - ?(encoded = false) - ?scheme - ?user - ?password - ?host - ?port - ?path - ?param - ?query - ?fragment - ?other - url - = - - let encode = Netencoding.Url.encode in - let enc x = - if encoded then - x - else - match x with - None -> None - | Some x' -> Some (encode x') - in - let enc_list l = - if encoded then - l - else - List.map encode l - in - - let new_syntax = - match syntax with - None -> url.url_syntax - | Some syn -> syn - in - - let check_string s_opt cats = - match s_opt with - None -> () - | Some s -> - let l = String.length s in - let k = scan_url_part s 0 l cats new_syntax.url_accepts_8bits in - (* or raise Malformed_URL *) - if k <> l then raise Malformed_URL - in - - let check_string_list p cats sep = - List.iter - (fun p_component -> - let l = String.length p_component in - let k = - scan_url_part p_component 0 l cats new_syntax.url_accepts_8bits in - (* or raise Malformed_URL *) - if k <> l then raise Malformed_URL; - if String.contains p_component sep then raise Malformed_URL; - ) - p - in - - (* Create the modified record: *) - let url' = - { - url_syntax = new_syntax; - url_validity = false; - url_scheme = if scheme = None then url.url_scheme else scheme; - url_user = if user = None then url.url_user else enc user; - url_password = if password = None then url.url_password else enc password; - url_host = if host = None then url.url_host else host; - url_port = if port = None then url.url_port else port; - url_path = (match path with - None -> url.url_path - | Some p -> enc_list p); - url_param = (match param with - None -> url.url_param - | Some p -> enc_list p); - url_query = if query = None then url.url_query else enc query; - url_fragment = if fragment = None then url.url_fragment else enc fragment; - url_other = if other = None then url.url_other else enc other; - } - in - (* Check whether the URL conforms to the syntax: - *) - if not (url_conforms_to_syntax url') then raise Malformed_URL; - if url'.url_password <> None && url'.url_user = None then raise Malformed_URL; - if url'.url_user <> None && url'.url_host = None then raise Malformed_URL; - if url'.url_port <> None && url'.url_host = None then raise Malformed_URL; - (* Check every part: *) - check_string url'.url_scheme scheme_cats; - check_string url'.url_user login_cats; - check_string url'.url_password login_cats; - check_string url'.url_host host_cats; - (match url'.url_port with - None -> () - | Some p -> if p < 0 || p > 65535 then raise Malformed_URL - ); - let path_cats = path_cats_from_syntax new_syntax [] in - let other_cats = other_cats_from_syntax new_syntax in - check_string url'.url_query path_cats; - check_string url'.url_fragment path_cats; - check_string url'.url_other other_cats; - (* Check the lists: *) - check_string_list url'.url_param path_cats ';'; - check_string_list url'.url_path path_cats '/'; - (* Further path checks: *) - begin match url'.url_path with - [] -> - (* The path is empty: There must not be a 'param' or 'query' *) - if url'.url_host <> None then begin - if url'.url_param <> [] then raise Malformed_URL; - if url'.url_query <> None then raise Malformed_URL; - end - | ["";""] -> - (* This is illegal. *) - raise Malformed_URL; - | "" :: p' -> - (* The path is absolute: always ok *) - () - | _ -> - (* The path is relative: there must not be a host *) - if url'.url_host <> None then raise Malformed_URL; - end; - begin match url'.url_path with - _ :: rest -> (* "//" ambiguity *) - begin match List.rev rest with - _ :: rest' -> - if List.exists (fun p -> p = "") rest' then - raise Malformed_URL; - | [] -> - () - end - | [] -> - () - end; - (* Cache that the URL is valid: *) - url'.url_validity <- true; - - url' -;; - - -let null_url = - { - url_syntax = null_url_syntax; - url_validity = true; - url_scheme = None; - url_user = None; - url_password = None; - url_host = None; - url_port = None; - url_path = []; - url_param = []; - url_query = None; - url_fragment = None; - url_other = None; - } -;; - - -let make_url - ?(encoded = false) - ?scheme - ?user - ?password - ?host - ?port - ?path - ?param - ?query - ?fragment - ?other - url_syntax - = - - if not (url_syntax_is_valid url_syntax) then - invalid_arg "Neturl.make_url"; - - modify_url - ~encoded:encoded - ~syntax:url_syntax - ?scheme:scheme - ?user:user - ?password:password - ?host:host - ?port:port - ?path:path - ?param:param - ?query:query - ?fragment:fragment - ?other:other - null_url -;; - - -let remove_from_url - ?(scheme = false) - ?(user = false) - ?(password = false) - ?(host = false) - ?(port = false) - ?(path = false) - ?(param = false) - ?(query = false) - ?(fragment = false) - ?(other = false) - url - = - - make_url - ~encoded: true - ?scheme: (if scheme then None else url.url_scheme) - ?user: (if user then None else url.url_user) - ?password: (if password then None else url.url_password) - ?host: (if host then None else url.url_host) - ?port: (if port then None else url.url_port) - ?path: (if path then None else Some url.url_path) - ?param: (if param then None else Some url.url_param) - ?query: (if query then None else url.url_query) - ?fragment: (if fragment then None else url.url_fragment) - ?other: (if other then None else url.url_other) - url.url_syntax -;; - - -let default_url - ?(encoded = false) - ?scheme - ?user - ?password - ?host - ?port - ?(path = []) - ?(param = []) - ?query - ?fragment - ?other - url - = - - let encode = Netencoding.Url.encode in - - let enc x = - if encoded then - x - else - match x with - None -> None - | Some x' -> Some (encode x') - in - - let enc_list l = - if encoded then - l - else - List.map encode l - in - - let pass_if_missing current arg = - match current with - None -> arg - | _ -> current - in - - make_url - ~encoded: true - ?scheme: (pass_if_missing url.url_scheme scheme) - ?user: (pass_if_missing url.url_user (enc user)) - ?password: (pass_if_missing url.url_password (enc password)) - ?host: (pass_if_missing url.url_host host) - ?port: (pass_if_missing url.url_port port) - ~path: (if url.url_path = [] then enc_list path else url.url_path) - ~param: (if url.url_param = [] then enc_list param else url.url_param) - ?query: (pass_if_missing url.url_query (enc query)) - ?fragment: (pass_if_missing url.url_fragment (enc fragment)) - ?other: (pass_if_missing url.url_other (enc other)) - url.url_syntax -;; - - -let undefault_url - ?scheme - ?user - ?password - ?host - ?port - ?path - ?param - ?query - ?fragment - ?other - url - = - - let remove_if_matching current arg = - match current with - None -> None - | Some x -> - (match arg with - None -> current - | Some x' -> - if x=x' then - None - else - current) - in - - make_url - ~encoded: true - ?scheme: (remove_if_matching url.url_scheme scheme) - ?user: (remove_if_matching url.url_user user) - ?password: (remove_if_matching url.url_password password) - ?host: (remove_if_matching url.url_host host) - ?port: (remove_if_matching url.url_port port) - ~path: (match path with - None -> url.url_path - | Some x -> - if x = url.url_path then - [] - else - url.url_path) - ~param: (match param with - None -> url.url_param - | Some x -> - if x = url.url_param then - [] - else - url.url_param) - ?query: (remove_if_matching url.url_query query) - ?fragment: (remove_if_matching url.url_fragment fragment) - ?other: (remove_if_matching url.url_other other) - url.url_syntax -;; - - -let url_provides - ?(scheme = false) - ?(user = false) - ?(password = false) - ?(host = false) - ?(port = false) - ?(path = false) - ?(param = false) - ?(query = false) - ?(fragment = false) - ?(other = false) - url - = - - (scheme => (url.url_scheme <> None)) & - (user => (url.url_user <> None)) & - (password => (url.url_password <> None)) & - (host => (url.url_host <> None)) & - (port => (url.url_port <> None)) & - (path => (url.url_path <> [])) & - (param => (url.url_param <> [])) & - (query => (url.url_query <> None)) & - (fragment => (url.url_fragment <> None)) & - (other => (url.url_other <> None)) -;; - - -let return_if value = - match value with - None -> raise Not_found - | Some x -> x -;; - - -let decode_if want_encoded value = - let value' = return_if value in - if want_encoded then - value' - else - Netencoding.Url.decode value' (* WARNING: not thread-safe! *) -;; - - -let decode_path_if want_encoded value = - if want_encoded then - value - else - List.map Netencoding.Url.decode value (* WARNING: not thread-safe! *) -;; - - -let url_scheme url = return_if url.url_scheme;; -let url_user ?(encoded=false) url = decode_if encoded url.url_user;; -let url_password ?(encoded=false) url = decode_if encoded url.url_password;; -let url_host url = return_if url.url_host;; -let url_port url = return_if url.url_port;; -let url_path ?(encoded=false) url = decode_path_if encoded url.url_path;; -let url_param ?(encoded=false) url = decode_path_if encoded url.url_param;; -let url_query ?(encoded=false) url = decode_if encoded url.url_query;; -let url_fragment ?(encoded=false) url = decode_if encoded url.url_fragment;; -let url_other ?(encoded=false) url = decode_if encoded url.url_other;; - - -let string_of_url url = - if not (url.url_validity) then - failwith "Neturl.string_of_url: URL not flagged as valid"; - (match url.url_scheme with - None -> "" - | Some s -> s ^ ":") ^ - (match url.url_host with - None -> "" - | Some host -> - "//" ^ - (match url.url_user with - None -> "" - | Some user -> - user ^ - (match url.url_password with - None -> "" - | Some password -> - ":" ^ password - ) ^ - "@") ^ - host ^ - (match url.url_port with - None -> "" - | Some port -> - ":" ^ string_of_int port)) ^ - (match url.url_path with - | [""] -> - "/" - | x :: p when url.url_scheme = None && - url.url_host = None && - String.contains x ':' - -> - (* Really a special case: The colon contained in 'x' may cause - * that a prefix of 'x' is interpreted as URL scheme. In this - * case, "./" is prepended (as recommended in RFC 1808, 5.3). - *) - "./" - | _ -> - "" - ) ^ - String.concat "/" url.url_path ^ - (match url.url_other with - None -> "" - | Some other -> - other) ^ - String.concat "" (List.map (fun s -> ";" ^ s) url.url_param) ^ - (match url.url_query with - None -> "" - | Some query -> - "?" ^ query) ^ - (match url.url_fragment with - None -> "" - | Some fragment -> - "#" ^ fragment) -;; - - -let url_of_string url_syntax s = - let l = String.length s in - let recognized x = x <> Url_part_not_recognized in - - let rec collect_words terminators eof_char cats k = - (* Collect words as recognized by 'cats', starting at position 'k' in - * 's'. Collection stops if one the characters listed in 'terminators' - * is found. If the end of the string is reached, it is treated as - * 'eof_char'. - *) - let k' = scan_url_part s k l cats url_syntax.url_accepts_8bits in - (* or raise Malformed_URL *) - let word, sep = - String.sub s k (k'-k), (if k' None, 0 - else - None, 0 - in - - (* If there is a "//", a host will follow: *) - let host, port, user, password, k2 = - if recognized url_syntax.url_enable_host && - k1 + 2 <= l && s.[k1]='/' && s.[k1+1]='/' then begin - - let word_sep_list, k' = collect_words [ '/'; '#' ] '/' login_cats (k1+2) - in - (* or raise Malformed_URL *) - - let int x = - try int_of_string x with _ -> raise Malformed_URL in - - match word_sep_list with - [ host, ('/'|'#') ] -> - Some host, None, None, None, k' - | [ host, ':'; port, ('/'|'#') ] -> - Some host, Some (int port), None, None, k' - | [ user, '@'; host, ('/'|'#') ] -> - Some host, None, Some user, None, k' - | [ user, '@'; host, ':'; port, ('/'|'#') ] -> - Some host, Some (int port), Some user, None, k' - | [ user, ':'; password, '@'; host, ('/'|'#') ] -> - Some host, None, Some user, Some password, k' - | [ user, ':'; password, '@'; host, ':'; port, ('/'|'#') ] -> - Some host, Some (int port), Some user, Some password, k' - | _ -> - raise Malformed_URL - end - else - None, None, None, None, k1 - in - - let path, k3 = - if recognized url_syntax.url_enable_path && - k2 < l (* && s.[k2]='/' *) - then begin - let cats = path_cats_from_syntax url_syntax [ '/' ] in - let seps = separators_from_syntax url_syntax in - - (* Note: '>' is not allowed within URLs; because of this we can use - * it as end-of-string character. - *) - - let word_sep_list, k' = collect_words ('>'::seps) '>' cats k2 in - (* or raise Malformed_URL *) - match word_sep_list with - [ "", '/'; "", _ ] -> - [ "" ], k' - | [ "", _ ] -> - [], k' - | _ -> - List.map fst word_sep_list, k' - end - else begin - (* If there is a single '/': skip it *) - if not (recognized url_syntax.url_enable_other) && - k2 < l && s.[k2]='/' - then - [], (k2+1) - else - [], k2 - end - in - - let other, k4 = - if recognized url_syntax.url_enable_other && - k3 < l - then begin - - let cats = other_cats_from_syntax url_syntax in - - (* Note: '>' is not allowed within URLs; because of this we can use - * it as end-of-string character. - *) - - let word_sep_list, k' = collect_words ['>';'#'] '>' cats k3 in - (* or raise Malformed_URL *) - - match word_sep_list with - [ other, _ ] -> Some other, k' - | _ -> assert false - end - else - None, k3 - in - - let param, k5 = - if recognized url_syntax.url_enable_param && - k4 < l && s.[k4]=';' - then begin - let cats = path_cats_from_syntax url_syntax [] in - let seps = separators_from_syntax url_syntax in - let seps' = List.filter (fun c -> c <> ';') seps in - - (* Note: '>' is not allowed within URLs; because of this we can use - * it as end-of-string character. - *) - - let word_sep_list, k' = collect_words ('>'::seps') '>' cats (k4+1) in - (* or raise Malformed_URL *) - - List.map fst word_sep_list, k' - end - else - [], k4 - in - - let query, k6 = - if recognized url_syntax.url_enable_query && - k5 < l && s.[k5]='?' - then begin - let cats = path_cats_from_syntax url_syntax [] in - let seps = separators_from_syntax url_syntax in - - (* Note: '>' is not allowed within URLs; because of this we can use - * it as end-of-string character. - *) - - let word_sep_list, k' = collect_words ('>'::seps) '>' cats (k5+1) in - (* or raise Malformed_URL *) - - match word_sep_list with - [ query, _ ] -> Some query, k' - | _ -> assert false - end - else - None, k5 - in - - let fragment, k7 = - if recognized url_syntax.url_enable_fragment && - k6 < l && s.[k6]='#' - then begin - let cats = path_cats_from_syntax url_syntax [] in - let seps = separators_from_syntax url_syntax in - - (* Note: '>' is not allowed within URLs; because of this we can use - * it as end-of-string character. - *) - - let word_sep_list, k' = collect_words ('>'::seps) '>' cats (k6+1) in - (* or raise Malformed_URL *) - - match word_sep_list with - [ fragment, _ ] -> Some fragment, k' - | _ -> assert false - end - else - None, k6 - in - - if k7 <> l then raise Malformed_URL; - - make_url - ~encoded:true - ?scheme:scheme - ?user:user - ?password:password - ?host:host - ?port:port - ~path:path - ~param:param - ?query:query - ?fragment:fragment - ?other:other - url_syntax -;; - - -let split_path s = - let l = String.length s in - let rec collect_words k = - let k' = - try - String.index_from s k '/' - with - Not_found -> l - in - let word = String.sub s k (k'-k) in - if k' >= l then - [word] - else - word :: collect_words (k'+1) - in - match collect_words 0 with - [ "" ] -> [] - | [ "";"" ] -> [ "" ] - | other -> other -;; - - -let join_path l = - match l with - [ "" ] -> "/" - | _ -> String.concat "/" l;; - - -let norm_path l = - - let rec remove_slash_slash l first = - match l with - | [ "" ] -> - [ "" ] - | [ ""; "" ] when first -> - [ "" ] - | "" :: l' when not first -> - remove_slash_slash l' false - | x :: l' -> - x :: remove_slash_slash l' false - | [] -> - [] - in - - let rec remove_dot l first = - match l with - | ([ "." ] | ["."; ""]) -> - if first then [] else [ "" ] - | "." :: x :: l' -> - remove_dot (x :: l') false - | x :: l' -> - x :: remove_dot l' false - | [] -> - [] - in - - let rec remove_dot_dot_once l first = - match l with - x :: ".." :: [] when x <> "" && x <> ".." && not first -> - [ "" ] - | x :: ".." :: l' when x <> "" && x <> ".." -> - l' - | x :: l' -> - x :: remove_dot_dot_once l' false - | [] -> - raise Not_found - in - - let rec remove_dot_dot l = - try - let l' = remove_dot_dot_once l true in - remove_dot_dot l' - with - Not_found -> l - in - - let l' = remove_dot_dot (remove_dot (remove_slash_slash l true) true) in - match l' with - [".."] -> [".."; ""] - | ["";""] -> [ "" ] - | _ -> l' -;; - - -let apply_relative_url baseurl relurl = - if not (baseurl.url_validity) or not (relurl.url_validity) then - failwith "Neturl.apply_relative_url: URL not flagged as valid"; - - if relurl.url_scheme <> None then - modify_url - ~syntax:baseurl.url_syntax (* inherit syntax *) - relurl - else - if relurl.url_host <> None then - modify_url - ~syntax:baseurl.url_syntax (* inherit syntax and scheme *) - ?scheme:baseurl.url_scheme - relurl - else - match relurl.url_path with - "" :: other -> - (* An absolute path *) - modify_url - ~syntax:baseurl.url_syntax (* inherit syntax, scheme, and *) - ~encoded:true - ?scheme:baseurl.url_scheme (* login info *) - ?host:baseurl.url_host - ?port:baseurl.url_port - ?user:baseurl.url_user - ?password:baseurl.url_password - relurl - | [] -> - (* Empty: Inherit also path, params, query, and fragment *) - let new_params, new_query, new_fragment = - match relurl.url_param, relurl.url_query, relurl.url_fragment - with - [], None, None -> - (* Inherit all three *) - baseurl.url_param, baseurl.url_query, baseurl.url_fragment - | [], None, f -> - (* Inherit params and query *) - baseurl.url_param, baseurl.url_query, f - | [], q, f -> - (* Inherit params *) - baseurl.url_param, q, f - | p, q, f -> - (* Inherit none of them *) - p, q, f - in - modify_url - ~syntax:baseurl.url_syntax - ~encoded:true - ?scheme:baseurl.url_scheme - ?host:baseurl.url_host - ?port:baseurl.url_port - ?user:baseurl.url_user - ?password:baseurl.url_password - ~path:baseurl.url_path - ~param:new_params - ?query:new_query - ?fragment:new_fragment - relurl - | relpath -> - (* A relative path *) - let rec change_path basepath = - match basepath with - | [] -> - relpath - | [ x ] -> - relpath - | x :: basepath' -> - x :: change_path basepath' - in - let new_path = norm_path (change_path baseurl.url_path) in - modify_url - ~syntax:baseurl.url_syntax (* inherit syntax, scheme, and *) - ~encoded:true - ?scheme:baseurl.url_scheme (* login info *) - ?host:baseurl.url_host - ?port:baseurl.url_port - ?user:baseurl.url_user - ?password:baseurl.url_password - ~path:new_path (* and change path *) - relurl - -;; - - -let print_url url = - Format.print_string ("") -;; - - -(* ====================================================================== - * History: - * - * $Log$ - * Revision 1.1 2000/11/17 09:57:28 lpadovan - * Initial revision - * - * Revision 1.4 2000/07/04 21:50:51 gerd - * Fixed typo. - * - * Revision 1.3 2000/06/26 22:57:49 gerd - * Change: The record 'url_syntax' has an additional component - * 'url_accepts_8bits'. Setting this option to 'true' causes that - * the bytes >= 0x80 are no longer rejected. - * - * Revision 1.2 2000/06/25 19:39:48 gerd - * Lots of Bugfixes. - * - * Revision 1.1 2000/06/24 20:19:59 gerd - * Initial revision. - * - * - *)