2 * ----------------------------------------------------------------------
6 exception Malformed_URL
8 type url_syntax_option =
9 Url_part_not_recognized
15 { url_enable_scheme : url_syntax_option;
16 url_enable_user : url_syntax_option;
17 url_enable_password : url_syntax_option;
18 url_enable_host : url_syntax_option;
19 url_enable_port : url_syntax_option;
20 url_enable_path : url_syntax_option;
21 url_enable_param : url_syntax_option;
22 url_enable_query : url_syntax_option;
23 url_enable_fragment : url_syntax_option;
24 url_enable_other : url_syntax_option;
25 url_accepts_8bits : bool;
26 url_is_valid : url -> bool;
31 url_syntax : url_syntax;
32 mutable url_validity : bool;
33 url_scheme : string option;
34 url_user : string option;
35 url_password : string option;
36 url_host : string option;
37 url_port : int option;
38 url_path : string list;
39 url_param : string list;
40 url_query : string option;
41 url_fragment : string option;
42 url_other : string option;
54 let scan_url_part s k_from k_to cats accept_8bits =
55 (* Scans the longest word of accepted characters from position 'k_from'
56 * in 's' until at most position 'k_to'. The character following the
57 * word (if any) must be a separator character.
58 * On success, the function returns the position of the last character
60 * If there is any rejected character before the separator or the end
61 * of the string (i.e. position 'k_to') is reached, the exception
62 * Malformed_URL is raised.
63 * Furthermore, if the character '%' is accepted it is checked whether
64 * two hexadecimal digits follow (which must be accepted, too). If this
65 * is not true, the exception Malformed_URL is raised, too.
66 * 'cats': contains for every character code (0 to 255) the category
70 if cats.( Char.code c ) <> Accepted then raise Malformed_URL;
72 ('0'..'9'|'A'..'F'|'a'..'f') -> ()
73 | _ -> raise Malformed_URL
81 let cat = cats.(Char.code c) in
85 if k+2 >= k_to then raise Malformed_URL;
96 if accept_8bits && c >= '\128'
98 else raise Malformed_URL
102 assert (Array.length cats = 256);
103 assert (k_from >= 0);
104 assert (k_from <= k_to);
105 assert (k_to <= String.length s);
111 (* Create a categorization: *)
113 let lalpha = [ 'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm';
114 'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z' ]
116 let ualpha = [ 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M';
117 'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z' ]
119 let digit = [ '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9' ]
121 let safe = [ '$'; '-'; '_'; '.'; '+' ]
123 let extra = [ '!'; '*'; '\''; '('; ')'; ',' ]
125 let make_cats accepted separators =
126 (* create a categorization:
127 * - All characters listed in 'separators' are separators.
128 * - All characters listed in 'accepted' and which do not occur in
129 * 'separators' are accepted characters.
130 * - All other characters are rejected.
132 let cats = Array.make 256 Rejected in
135 cats.(Char.code c) <- Accepted
141 cats.(Char.code c) <- Separator
149 make_cats (lalpha @ ualpha @ ['+'; '-'; '.']) [':'] ;;
151 (* scheme_cats: character categorization to _extract_ the URL scheme *)
156 (lalpha @ ualpha @ digit @ safe @ extra @ [';'; '?'; '&'; '='; '%'])
157 [':'; '@'; '/'; '#' ]
160 (* login_cats: character categorization to _extract_ user name, password,
161 * host name, and port.
166 (lalpha @ ualpha @ digit @ ['.'; '-'])
170 (* host_cats: character categorization to _check_ whether the host name
171 * is formed only by legal characters.
172 * Especially '%' is not allowed here!
181 (* port_cats: character categorization to _check_ whether the port number
182 * is formed only by legal characters.
183 * Especially '%' is not allowed here!
186 let path_cats separators =
188 (lalpha @ ualpha @ digit @ safe @ extra @
189 ['?'; ':'; '@'; '&'; '='; ';'; '%'; '/'; '~'])
194 let separators_from_syntax syn =
195 let include_if syn_option clist =
196 if syn_option <> Url_part_not_recognized then
201 (include_if syn.url_enable_param [';']) @
202 (include_if syn.url_enable_query ['?']) @
203 (include_if syn.url_enable_fragment ['#'])
207 let path_cats_from_syntax syn extraseps =
208 let separators = separators_from_syntax syn in
209 path_cats (separators @ extraseps)
212 (* path_cats_from_syntax:
213 * Computes a character categorization to extract the path from an URL.
214 * This depends on the syntax because the list of possible separators
215 * contains the characters that may begin the next URL clause.
218 * - The '#' is rejected unless fragments are enabled.
219 * - The '~' is accepted although this violates RFC 1738.
223 let other_cats_from_syntax syn =
224 let include_if syn_option clist =
225 if syn_option <> Url_part_not_recognized then
231 (include_if syn.url_enable_param [';']) @
232 (include_if syn.url_enable_query ['?']) @
233 (include_if syn.url_enable_fragment ['#'])
237 (lalpha @ ualpha @ digit @ safe @ extra @
238 (separators @ ['?'; ':'; '@'; '&'; '='; ';'; '%'; '/']))
242 (* other_cats: character categorization to extract or check the
243 * "other" part of the URL.
248 let extract_url_scheme s =
249 let l = String.length s in
250 let k = scan_url_part s 0 l scheme_cats false in
251 (* or raise Malformed_URL *)
252 if k = l then raise Malformed_URL;
253 assert (s.[k] = ':');
254 String.lowercase(String.sub s 0 k)
258 let ( => ) a b = not a or b;; (* implication *)
260 let ( <=> ) (a:bool) b = ( a = b );; (* equivalence *)
262 let url_syntax_is_valid syn =
263 let recognized x = x <> Url_part_not_recognized in
264 let not_recognized x = x = Url_part_not_recognized in
265 (recognized syn.url_enable_password => recognized syn.url_enable_user) &
266 (recognized syn.url_enable_port => recognized syn.url_enable_host) &
267 (recognized syn.url_enable_user => recognized syn.url_enable_host) &
268 not ( (recognized syn.url_enable_user ||
269 recognized syn.url_enable_password ||
270 recognized syn.url_enable_host ||
271 recognized syn.url_enable_port ||
272 recognized syn.url_enable_path) &&
273 (recognized syn.url_enable_other))
277 let partial_url_syntax syn =
280 Url_part_not_recognized -> Url_part_not_recognized
281 | Url_part_allowed -> Url_part_allowed
282 | Url_part_required -> Url_part_allowed
284 { url_enable_scheme = weaken syn.url_enable_scheme;
285 url_enable_user = weaken syn.url_enable_user;
286 url_enable_password = weaken syn.url_enable_password;
287 url_enable_host = weaken syn.url_enable_host;
288 url_enable_port = weaken syn.url_enable_port;
289 url_enable_path = weaken syn.url_enable_path;
290 url_enable_param = weaken syn.url_enable_param;
291 url_enable_query = weaken syn.url_enable_query;
292 url_enable_fragment = weaken syn.url_enable_fragment;
293 url_enable_other = weaken syn.url_enable_other;
294 url_accepts_8bits = syn.url_accepts_8bits;
295 url_is_valid = syn.url_is_valid;
301 let file_url_syntax =
302 { url_enable_scheme = Url_part_required;
303 url_enable_user = Url_part_not_recognized;
304 url_enable_password = Url_part_not_recognized;
305 url_enable_host = Url_part_allowed;
306 url_enable_port = Url_part_not_recognized;
307 url_enable_path = Url_part_required;
308 url_enable_param = Url_part_not_recognized;
309 url_enable_query = Url_part_not_recognized;
310 url_enable_fragment = Url_part_not_recognized;
311 url_enable_other = Url_part_not_recognized;
312 url_accepts_8bits = false;
313 url_is_valid = (fun _ -> true);
319 { url_enable_scheme = Url_part_required;
320 url_enable_user = Url_part_allowed;
321 url_enable_password = Url_part_allowed;
322 url_enable_host = Url_part_required;
323 url_enable_port = Url_part_allowed;
324 url_enable_path = Url_part_allowed;
325 url_enable_param = Url_part_allowed;
326 url_enable_query = Url_part_not_recognized;
327 url_enable_fragment = Url_part_not_recognized;
328 url_enable_other = Url_part_not_recognized;
329 url_accepts_8bits = false;
330 url_is_valid = (fun _ -> true);
335 let http_url_syntax =
336 { url_enable_scheme = Url_part_required;
337 url_enable_user = Url_part_allowed;
338 url_enable_password = Url_part_allowed;
339 url_enable_host = Url_part_required;
340 url_enable_port = Url_part_allowed;
341 url_enable_path = Url_part_allowed;
342 url_enable_param = Url_part_not_recognized;
343 url_enable_query = Url_part_allowed;
344 url_enable_fragment = Url_part_not_recognized;
345 url_enable_other = Url_part_not_recognized;
346 url_accepts_8bits = false;
347 url_is_valid = (fun _ -> true);
352 let mailto_url_syntax =
353 { url_enable_scheme = Url_part_required;
354 url_enable_user = Url_part_not_recognized;
355 url_enable_password = Url_part_not_recognized;
356 url_enable_host = Url_part_not_recognized;
357 url_enable_port = Url_part_not_recognized;
358 url_enable_path = Url_part_not_recognized;
359 url_enable_param = Url_part_not_recognized;
360 url_enable_query = Url_part_not_recognized;
361 url_enable_fragment = Url_part_not_recognized;
362 url_enable_other = Url_part_required;
363 url_accepts_8bits = false;
364 url_is_valid = (fun _ -> true);
369 let null_url_syntax =
370 { url_enable_scheme = Url_part_not_recognized;
371 url_enable_user = Url_part_not_recognized;
372 url_enable_password = Url_part_not_recognized;
373 url_enable_host = Url_part_not_recognized;
374 url_enable_port = Url_part_not_recognized;
375 url_enable_path = Url_part_not_recognized;
376 url_enable_param = Url_part_not_recognized;
377 url_enable_query = Url_part_not_recognized;
378 url_enable_fragment = Url_part_not_recognized;
379 url_enable_other = Url_part_not_recognized;
380 url_accepts_8bits = false;
381 url_is_valid = (fun _ -> true);
387 { url_enable_scheme = Url_part_allowed;
388 url_enable_user = Url_part_allowed;
389 url_enable_password = Url_part_allowed;
390 url_enable_host = Url_part_allowed;
391 url_enable_port = Url_part_allowed;
392 url_enable_path = Url_part_allowed;
393 url_enable_param = Url_part_allowed;
394 url_enable_query = Url_part_allowed;
395 url_enable_fragment = Url_part_allowed;
396 url_enable_other = Url_part_not_recognized;
397 url_accepts_8bits = false;
398 url_is_valid = (fun _ -> true);
403 let common_url_syntax =
404 let h = Hashtbl.create 10 in
405 Hashtbl.add h "file" file_url_syntax;
406 Hashtbl.add h "ftp" ftp_url_syntax;
407 Hashtbl.add h "http" http_url_syntax;
408 Hashtbl.add h "mailto" mailto_url_syntax;
413 let url_conforms_to_syntax url =
414 let recognized x = x <> Url_part_not_recognized in
415 let required x = x = Url_part_required in
416 let present x = x <> None in
417 let syn = url.url_syntax in
418 (present url.url_scheme => recognized syn.url_enable_scheme) &
419 (present url.url_user => recognized syn.url_enable_user) &
420 (present url.url_password => recognized syn.url_enable_password) &
421 (present url.url_host => recognized syn.url_enable_host) &
422 (present url.url_port => recognized syn.url_enable_port) &
423 ((url.url_path <> []) => recognized syn.url_enable_path) &
424 ((url.url_param <> []) => recognized syn.url_enable_param) &
425 (present url.url_query => recognized syn.url_enable_query) &
426 (present url.url_fragment => recognized syn.url_enable_fragment) &
427 (present url.url_other => recognized syn.url_enable_other) &
428 (required syn.url_enable_scheme => present url.url_scheme) &
429 (required syn.url_enable_user => present url.url_user) &
430 (required syn.url_enable_password => present url.url_password) &
431 (required syn.url_enable_host => present url.url_host) &
432 (required syn.url_enable_port => present url.url_port) &
433 (required syn.url_enable_path => (url.url_path <> [])) &
434 (required syn.url_enable_param => (url.url_param <> [])) &
435 (required syn.url_enable_query => present url.url_query) &
436 (required syn.url_enable_fragment => present url.url_fragment) &
437 (required syn.url_enable_other => present url.url_other) &
438 (url.url_validity or syn.url_is_valid url)
442 let url_syntax_of_url url = url.url_syntax
462 let encode = Netencoding.Url.encode in
469 | Some x' -> Some (encode x')
480 None -> url.url_syntax
484 let check_string s_opt cats =
488 let l = String.length s in
489 let k = scan_url_part s 0 l cats new_syntax.url_accepts_8bits in
490 (* or raise Malformed_URL *)
491 if k <> l then raise Malformed_URL
494 let check_string_list p cats sep =
497 let l = String.length p_component in
499 scan_url_part p_component 0 l cats new_syntax.url_accepts_8bits in
500 (* or raise Malformed_URL *)
501 if k <> l then raise Malformed_URL;
502 if String.contains p_component sep then raise Malformed_URL;
507 (* Create the modified record: *)
510 url_syntax = new_syntax;
511 url_validity = false;
512 url_scheme = if scheme = None then url.url_scheme else scheme;
513 url_user = if user = None then url.url_user else enc user;
514 url_password = if password = None then url.url_password else enc password;
515 url_host = if host = None then url.url_host else host;
516 url_port = if port = None then url.url_port else port;
517 url_path = (match path with
519 | Some p -> enc_list p);
520 url_param = (match param with
521 None -> url.url_param
522 | Some p -> enc_list p);
523 url_query = if query = None then url.url_query else enc query;
524 url_fragment = if fragment = None then url.url_fragment else enc fragment;
525 url_other = if other = None then url.url_other else enc other;
528 (* Check whether the URL conforms to the syntax:
530 if not (url_conforms_to_syntax url') then raise Malformed_URL;
531 if url'.url_password <> None && url'.url_user = None then raise Malformed_URL;
532 if url'.url_user <> None && url'.url_host = None then raise Malformed_URL;
533 if url'.url_port <> None && url'.url_host = None then raise Malformed_URL;
534 (* Check every part: *)
535 check_string url'.url_scheme scheme_cats;
536 check_string url'.url_user login_cats;
537 check_string url'.url_password login_cats;
538 check_string url'.url_host host_cats;
539 (match url'.url_port with
541 | Some p -> if p < 0 || p > 65535 then raise Malformed_URL
543 let path_cats = path_cats_from_syntax new_syntax [] in
544 let other_cats = other_cats_from_syntax new_syntax in
545 check_string url'.url_query path_cats;
546 check_string url'.url_fragment path_cats;
547 check_string url'.url_other other_cats;
548 (* Check the lists: *)
549 check_string_list url'.url_param path_cats ';';
550 check_string_list url'.url_path path_cats '/';
551 (* Further path checks: *)
552 begin match url'.url_path with
554 (* The path is empty: There must not be a 'param' or 'query' *)
555 if url'.url_host <> None then begin
556 if url'.url_param <> [] then raise Malformed_URL;
557 if url'.url_query <> None then raise Malformed_URL;
560 (* This is illegal. *)
563 (* The path is absolute: always ok *)
566 (* The path is relative: there must not be a host *)
567 if url'.url_host <> None then raise Malformed_URL;
569 begin match url'.url_path with
570 _ :: rest -> (* "//" ambiguity *)
571 begin match List.rev rest with
573 if List.exists (fun p -> p = "") rest' then
581 (* Cache that the URL is valid: *)
582 url'.url_validity <- true;
590 url_syntax = null_url_syntax;
621 if not (url_syntax_is_valid url_syntax) then
622 invalid_arg "Neturl.make_url";
657 ?scheme: (if scheme then None else url.url_scheme)
658 ?user: (if user then None else url.url_user)
659 ?password: (if password then None else url.url_password)
660 ?host: (if host then None else url.url_host)
661 ?port: (if port then None else url.url_port)
662 ?path: (if path then None else Some url.url_path)
663 ?param: (if param then None else Some url.url_param)
664 ?query: (if query then None else url.url_query)
665 ?fragment: (if fragment then None else url.url_fragment)
666 ?other: (if other then None else url.url_other)
686 let encode = Netencoding.Url.encode in
694 | Some x' -> Some (encode x')
704 let pass_if_missing current arg =
712 ?scheme: (pass_if_missing url.url_scheme scheme)
713 ?user: (pass_if_missing url.url_user (enc user))
714 ?password: (pass_if_missing url.url_password (enc password))
715 ?host: (pass_if_missing url.url_host host)
716 ?port: (pass_if_missing url.url_port port)
717 ~path: (if url.url_path = [] then enc_list path else url.url_path)
718 ~param: (if url.url_param = [] then enc_list param else url.url_param)
719 ?query: (pass_if_missing url.url_query (enc query))
720 ?fragment: (pass_if_missing url.url_fragment (enc fragment))
721 ?other: (pass_if_missing url.url_other (enc other))
740 let remove_if_matching current arg =
755 ?scheme: (remove_if_matching url.url_scheme scheme)
756 ?user: (remove_if_matching url.url_user user)
757 ?password: (remove_if_matching url.url_password password)
758 ?host: (remove_if_matching url.url_host host)
759 ?port: (remove_if_matching url.url_port port)
760 ~path: (match path with
763 if x = url.url_path then
767 ~param: (match param with
768 None -> url.url_param
770 if x = url.url_param then
774 ?query: (remove_if_matching url.url_query query)
775 ?fragment: (remove_if_matching url.url_fragment fragment)
776 ?other: (remove_if_matching url.url_other other)
795 (scheme => (url.url_scheme <> None)) &
796 (user => (url.url_user <> None)) &
797 (password => (url.url_password <> None)) &
798 (host => (url.url_host <> None)) &
799 (port => (url.url_port <> None)) &
800 (path => (url.url_path <> [])) &
801 (param => (url.url_param <> [])) &
802 (query => (url.url_query <> None)) &
803 (fragment => (url.url_fragment <> None)) &
804 (other => (url.url_other <> None))
808 let return_if value =
810 None -> raise Not_found
815 let decode_if want_encoded value =
816 let value' = return_if value in
820 Netencoding.Url.decode value' (* WARNING: not thread-safe! *)
824 let decode_path_if want_encoded value =
828 List.map Netencoding.Url.decode value (* WARNING: not thread-safe! *)
832 let url_scheme url = return_if url.url_scheme;;
833 let url_user ?(encoded=false) url = decode_if encoded url.url_user;;
834 let url_password ?(encoded=false) url = decode_if encoded url.url_password;;
835 let url_host url = return_if url.url_host;;
836 let url_port url = return_if url.url_port;;
837 let url_path ?(encoded=false) url = decode_path_if encoded url.url_path;;
838 let url_param ?(encoded=false) url = decode_path_if encoded url.url_param;;
839 let url_query ?(encoded=false) url = decode_if encoded url.url_query;;
840 let url_fragment ?(encoded=false) url = decode_if encoded url.url_fragment;;
841 let url_other ?(encoded=false) url = decode_if encoded url.url_other;;
844 let string_of_url url =
845 if not (url.url_validity) then
846 failwith "Neturl.string_of_url: URL not flagged as valid";
847 (match url.url_scheme with
849 | Some s -> s ^ ":") ^
850 (match url.url_host with
854 (match url.url_user with
858 (match url.url_password with
865 (match url.url_port with
868 ":" ^ string_of_int port)) ^
869 (match url.url_path with
872 | x :: p when url.url_scheme = None &&
873 url.url_host = None &&
874 String.contains x ':'
876 (* Really a special case: The colon contained in 'x' may cause
877 * that a prefix of 'x' is interpreted as URL scheme. In this
878 * case, "./" is prepended (as recommended in RFC 1808, 5.3).
884 String.concat "/" url.url_path ^
885 (match url.url_other with
889 String.concat "" (List.map (fun s -> ";" ^ s) url.url_param) ^
890 (match url.url_query with
894 (match url.url_fragment with
901 let url_of_string url_syntax s =
902 let l = String.length s in
903 let recognized x = x <> Url_part_not_recognized in
905 let rec collect_words terminators eof_char cats k =
906 (* Collect words as recognized by 'cats', starting at position 'k' in
907 * 's'. Collection stops if one the characters listed in 'terminators'
908 * is found. If the end of the string is reached, it is treated as
911 let k' = scan_url_part s k l cats url_syntax.url_accepts_8bits in
912 (* or raise Malformed_URL *)
914 String.sub s k (k'-k), (if k'<l then s.[k'] else eof_char) in
915 if List.mem sep terminators then
918 let word_sep_list', k'' =
919 collect_words terminators eof_char cats (k'+1) in
920 ((word, sep) :: word_sep_list'), k''
923 (* Try to extract the scheme name: *)
925 if recognized url_syntax.url_enable_scheme then
927 let k = scan_url_part s 0 l scheme_cats false in
928 (* or raise Malformed_URL *)
929 if k = l then raise Malformed_URL;
930 assert (s.[k] = ':');
931 Some (String.sub s 0 k), (k+1)
933 Malformed_URL -> None, 0
938 (* If there is a "//", a host will follow: *)
939 let host, port, user, password, k2 =
940 if recognized url_syntax.url_enable_host &&
941 k1 + 2 <= l && s.[k1]='/' && s.[k1+1]='/' then begin
943 let word_sep_list, k' = collect_words [ '/'; '#' ] '/' login_cats (k1+2)
945 (* or raise Malformed_URL *)
948 try int_of_string x with _ -> raise Malformed_URL in
950 match word_sep_list with
951 [ host, ('/'|'#') ] ->
952 Some host, None, None, None, k'
953 | [ host, ':'; port, ('/'|'#') ] ->
954 Some host, Some (int port), None, None, k'
955 | [ user, '@'; host, ('/'|'#') ] ->
956 Some host, None, Some user, None, k'
957 | [ user, '@'; host, ':'; port, ('/'|'#') ] ->
958 Some host, Some (int port), Some user, None, k'
959 | [ user, ':'; password, '@'; host, ('/'|'#') ] ->
960 Some host, None, Some user, Some password, k'
961 | [ user, ':'; password, '@'; host, ':'; port, ('/'|'#') ] ->
962 Some host, Some (int port), Some user, Some password, k'
967 None, None, None, None, k1
971 if recognized url_syntax.url_enable_path &&
972 k2 < l (* && s.[k2]='/' *)
974 let cats = path_cats_from_syntax url_syntax [ '/' ] in
975 let seps = separators_from_syntax url_syntax in
977 (* Note: '>' is not allowed within URLs; because of this we can use
978 * it as end-of-string character.
981 let word_sep_list, k' = collect_words ('>'::seps) '>' cats k2 in
982 (* or raise Malformed_URL *)
983 match word_sep_list with
984 [ "", '/'; "", _ ] ->
989 List.map fst word_sep_list, k'
992 (* If there is a single '/': skip it *)
993 if not (recognized url_syntax.url_enable_other) &&
1003 if recognized url_syntax.url_enable_other &&
1007 let cats = other_cats_from_syntax url_syntax in
1009 (* Note: '>' is not allowed within URLs; because of this we can use
1010 * it as end-of-string character.
1013 let word_sep_list, k' = collect_words ['>';'#'] '>' cats k3 in
1014 (* or raise Malformed_URL *)
1016 match word_sep_list with
1017 [ other, _ ] -> Some other, k'
1025 if recognized url_syntax.url_enable_param &&
1026 k4 < l && s.[k4]=';'
1028 let cats = path_cats_from_syntax url_syntax [] in
1029 let seps = separators_from_syntax url_syntax in
1030 let seps' = List.filter (fun c -> c <> ';') seps in
1032 (* Note: '>' is not allowed within URLs; because of this we can use
1033 * it as end-of-string character.
1036 let word_sep_list, k' = collect_words ('>'::seps') '>' cats (k4+1) in
1037 (* or raise Malformed_URL *)
1039 List.map fst word_sep_list, k'
1046 if recognized url_syntax.url_enable_query &&
1047 k5 < l && s.[k5]='?'
1049 let cats = path_cats_from_syntax url_syntax [] in
1050 let seps = separators_from_syntax url_syntax in
1052 (* Note: '>' is not allowed within URLs; because of this we can use
1053 * it as end-of-string character.
1056 let word_sep_list, k' = collect_words ('>'::seps) '>' cats (k5+1) in
1057 (* or raise Malformed_URL *)
1059 match word_sep_list with
1060 [ query, _ ] -> Some query, k'
1068 if recognized url_syntax.url_enable_fragment &&
1069 k6 < l && s.[k6]='#'
1071 let cats = path_cats_from_syntax url_syntax [] in
1072 let seps = separators_from_syntax url_syntax in
1074 (* Note: '>' is not allowed within URLs; because of this we can use
1075 * it as end-of-string character.
1078 let word_sep_list, k' = collect_words ('>'::seps) '>' cats (k6+1) in
1079 (* or raise Malformed_URL *)
1081 match word_sep_list with
1082 [ fragment, _ ] -> Some fragment, k'
1089 if k7 <> l then raise Malformed_URL;
1108 let l = String.length s in
1109 let rec collect_words k =
1112 String.index_from s k '/'
1116 let word = String.sub s k (k'-k) in
1120 word :: collect_words (k'+1)
1122 match collect_words 0 with
1124 | [ "";"" ] -> [ "" ]
1132 | _ -> String.concat "/" l;;
1137 let rec remove_slash_slash l first =
1141 | [ ""; "" ] when first ->
1143 | "" :: l' when not first ->
1144 remove_slash_slash l' false
1146 x :: remove_slash_slash l' false
1151 let rec remove_dot l first =
1153 | ([ "." ] | ["."; ""]) ->
1154 if first then [] else [ "" ]
1156 remove_dot (x :: l') false
1158 x :: remove_dot l' false
1163 let rec remove_dot_dot_once l first =
1165 x :: ".." :: [] when x <> "" && x <> ".." && not first ->
1167 | x :: ".." :: l' when x <> "" && x <> ".." ->
1170 x :: remove_dot_dot_once l' false
1175 let rec remove_dot_dot l =
1177 let l' = remove_dot_dot_once l true in
1183 let l' = remove_dot_dot (remove_dot (remove_slash_slash l true) true) in
1185 [".."] -> [".."; ""]
1191 let apply_relative_url baseurl relurl =
1192 if not (baseurl.url_validity) or not (relurl.url_validity) then
1193 failwith "Neturl.apply_relative_url: URL not flagged as valid";
1195 if relurl.url_scheme <> None then
1197 ~syntax:baseurl.url_syntax (* inherit syntax *)
1200 if relurl.url_host <> None then
1202 ~syntax:baseurl.url_syntax (* inherit syntax and scheme *)
1203 ?scheme:baseurl.url_scheme
1206 match relurl.url_path with
1208 (* An absolute path *)
1210 ~syntax:baseurl.url_syntax (* inherit syntax, scheme, and *)
1212 ?scheme:baseurl.url_scheme (* login info *)
1213 ?host:baseurl.url_host
1214 ?port:baseurl.url_port
1215 ?user:baseurl.url_user
1216 ?password:baseurl.url_password
1219 (* Empty: Inherit also path, params, query, and fragment *)
1220 let new_params, new_query, new_fragment =
1221 match relurl.url_param, relurl.url_query, relurl.url_fragment
1224 (* Inherit all three *)
1225 baseurl.url_param, baseurl.url_query, baseurl.url_fragment
1227 (* Inherit params and query *)
1228 baseurl.url_param, baseurl.url_query, f
1230 (* Inherit params *)
1231 baseurl.url_param, q, f
1233 (* Inherit none of them *)
1237 ~syntax:baseurl.url_syntax
1239 ?scheme:baseurl.url_scheme
1240 ?host:baseurl.url_host
1241 ?port:baseurl.url_port
1242 ?user:baseurl.url_user
1243 ?password:baseurl.url_password
1244 ~path:baseurl.url_path
1247 ?fragment:new_fragment
1250 (* A relative path *)
1251 let rec change_path basepath =
1258 x :: change_path basepath'
1260 let new_path = norm_path (change_path baseurl.url_path) in
1262 ~syntax:baseurl.url_syntax (* inherit syntax, scheme, and *)
1264 ?scheme:baseurl.url_scheme (* login info *)
1265 ?host:baseurl.url_host
1266 ?port:baseurl.url_port
1267 ?user:baseurl.url_user
1268 ?password:baseurl.url_password
1269 ~path:new_path (* and change path *)
1276 Format.print_string ("<URL:" ^ string_of_url url ^ ">")
1280 (* ======================================================================
1284 * Revision 1.1 2000/11/17 09:57:28 lpadovan
1287 * Revision 1.4 2000/07/04 21:50:51 gerd
1290 * Revision 1.3 2000/06/26 22:57:49 gerd
1291 * Change: The record 'url_syntax' has an additional component
1292 * 'url_accepts_8bits'. Setting this option to 'true' causes that
1293 * the bytes >= 0x80 are no longer rejected.
1295 * Revision 1.2 2000/06/25 19:39:48 gerd
1298 * Revision 1.1 2000/06/24 20:19:59 gerd