2 #load "netstring.cma";;
7 let expect_malformed_url f =
8 try ignore(f()); false with Malformed_URL -> true;;
11 not (expect_malformed_url f)
14 (**********************************************************************)
15 (* extract_url_scheme *)
16 (**********************************************************************)
19 extract_url_scheme "a:bc" = "a" &&
20 extract_url_scheme "A:bc" = "a" &&
21 extract_url_scheme "a:b:c" = "a" &&
22 extract_url_scheme "a+b-c:d:e" = "a+b-c"
28 try ignore(extract_url_scheme s); false with Malformed_URL -> true
36 (**********************************************************************)
38 (**********************************************************************)
40 let hashtbl_for_all f h =
43 (fun k v -> b := !b && f k v)
49 url_syntax_is_valid null_url_syntax &&
50 url_syntax_is_valid ip_url_syntax &&
53 url_syntax_is_valid syn
59 url_syntax_is_valid (partial_url_syntax null_url_syntax) &&
60 url_syntax_is_valid (partial_url_syntax ip_url_syntax) &&
63 url_syntax_is_valid (partial_url_syntax syn)
69 let f = fun _ -> true in
71 { url_enable_scheme = Url_part_not_recognized;
72 url_enable_user = Url_part_required;
73 url_enable_password = Url_part_allowed;
74 url_enable_host = Url_part_required;
75 url_enable_port = Url_part_not_recognized;
76 url_enable_path = Url_part_required;
77 url_enable_param = Url_part_not_recognized;
78 url_enable_query = Url_part_not_recognized;
79 url_enable_fragment = Url_part_required;
80 url_enable_other = Url_part_not_recognized;
81 url_accepts_8bits = false;
84 let syn' = partial_url_syntax syn in
86 (syn'.url_enable_scheme = Url_part_not_recognized) &&
87 (syn'.url_enable_user = Url_part_allowed) &&
88 (syn'.url_enable_password = Url_part_allowed) &&
89 (syn'.url_enable_host = Url_part_allowed) &&
90 (syn'.url_enable_port = Url_part_not_recognized) &&
91 (syn'.url_enable_path = Url_part_allowed) &&
92 (syn'.url_enable_param = Url_part_not_recognized) &&
93 (syn'.url_enable_query = Url_part_not_recognized) &&
94 (syn'.url_enable_fragment = Url_part_allowed) &&
95 (syn'.url_enable_other = Url_part_not_recognized) &&
96 (syn'.url_is_valid == f) &&
98 url_syntax_is_valid syn &&
99 url_syntax_is_valid syn'
102 (**********************************************************************)
104 (**********************************************************************)
107 (* Basic functionality: *)
108 let http_syn = Hashtbl.find common_url_syntax "http" in
111 (* default: not encoded *)
121 ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true
126 ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true
129 (url_syntax_of_url u1 == http_syn) &&
131 (url_scheme u1 = "http") &&
132 (url_user u1 = "U") &&
133 (url_password u1 = "%()~$@") &&
134 (url_host u1 = "a.b.c") &&
135 (url_port u1 = 81) &&
136 (url_path u1 = ["";"?";""]) &&
138 (url_user ~encoded:true u1 = "U") &&
139 (url_password ~encoded:true u1 = "%25()%7E$%40") &&
140 (url_path ~encoded:true u1 = ["";"%3F";""]) &&
142 string_of_url u1 = "http://U:%25()%7E$%40@a.b.c:81/%3F/"
147 (* Basic functionality: *)
148 let http_syn = Hashtbl.find common_url_syntax "http" in
154 ~password:"%25()%7e$%40"
161 ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true
166 ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true
169 (url_syntax_of_url u1 == http_syn) &&
171 (url_scheme u1 = "http") &&
172 (url_user u1 = "U") &&
173 (url_password u1 = "%()~$@") &&
174 (url_host u1 = "a.b.c") &&
175 (url_port u1 = 81) &&
176 (url_path u1 = ["";"?";""]) &&
178 (url_user ~encoded:true u1 = "%55") &&
179 (url_password ~encoded:true u1 = "%25()%7e$%40") &&
180 (url_path ~encoded:true u1 = ["";"%3F";""]) &&
182 string_of_url u1 = "http://%55:%25()%7e$%40@a.b.c:81/%3F/"
189 (* It is not possible to add a component which is not recognized *)
190 let http_syn = Hashtbl.find common_url_syntax "http" in
207 (* It is not possible to put malformed '%'-encodings into the URL *)
208 let http_syn = Hashtbl.find common_url_syntax "http" in
210 works (* reference *)
260 (* It is not possible to put unsafe characters into the URL *)
261 let http_syn = Hashtbl.find common_url_syntax "http" in
268 ~password:(String.make 1 c)
275 works (fun () -> make 'a') && (* reference *)
277 (* List of unsafe characters taken from RFC1738: *)
278 expect_malformed_url (fun () -> make '<') &&
279 expect_malformed_url (fun () -> make '>') &&
280 expect_malformed_url (fun () -> make '"') &&
281 expect_malformed_url (fun () -> make '#') &&
282 (* Note: '#' would be considered as reserved if fragments were enabled *)
283 expect_malformed_url (fun () -> make '%') &&
284 expect_malformed_url (fun () -> make '{') &&
285 expect_malformed_url (fun () -> make '}') &&
286 expect_malformed_url (fun () -> make '|') &&
287 expect_malformed_url (fun () -> make '\\') &&
288 expect_malformed_url (fun () -> make '^') &&
289 expect_malformed_url (fun () -> make '[') &&
290 expect_malformed_url (fun () -> make ']') &&
291 expect_malformed_url (fun () -> make '`') &&
292 expect_malformed_url (fun () -> make '~') &&
293 (* Note: '~' is considered as safe in paths: *)
308 (* It is not possible to put reserved characters into the URL *)
309 let http_syn = Hashtbl.find common_url_syntax "http" in
311 let make_password c =
316 ~password:(String.make 1 c)
330 ~path:["";String.make 1 c;""]
342 ~query:(String.make 1 c)
346 (* Note: There is a difference between RFC 1738 and RFC 1808 regarding
347 * which characters are reserved. RFC 1808 defines a fixed set of characters
348 * as reserved while RFC 1738 defines the reserved characters depending
350 * This implementation of URLs follows RFC 1738 (because of practical
354 works (fun () -> make_password 'a') && (* reference *)
355 works (fun () -> make_path 'a') &&
356 works (fun () -> make_query 'a') &&
358 expect_malformed_url (fun () -> make_password ':') &&
359 expect_malformed_url (fun () -> make_password '@') &&
360 expect_malformed_url (fun () -> make_password '/') &&
361 works (fun () -> make_password ';') &&
362 works (fun () -> make_password '?') &&
363 works (fun () -> make_password '=') &&
364 works (fun () -> make_password '&') &&
366 (* Note: ';' is allowed in path and query because parameters are not
367 * recognized in HTTP syntax.
370 expect_malformed_url (fun () -> make_path '/') &&
371 expect_malformed_url (fun () -> make_path '?') &&
372 works (fun () -> make_path ':') &&
373 works (fun () -> make_path '@') &&
374 works (fun () -> make_path ';') &&
375 works (fun () -> make_path '=') &&
376 works (fun () -> make_path '&') &&
378 expect_malformed_url (fun () -> make_query '?') &&
379 works (fun () -> make_query '/') &&
380 works (fun () -> make_query ':') &&
381 works (fun () -> make_query '@') &&
382 works (fun () -> make_query ';') &&
383 works (fun () -> make_query '=') &&
384 works (fun () -> make_query '&')
389 (* It is not possible to create a URL with a password, but without user;
390 * and neither to create a URL with a port, but without host;
391 * and neither to create a URL with a user, but without host
422 (* It is not possible to create a URL with illegal scheme prefix *)
458 (* It is not possible to have a path with double slashes *)
494 (* It is not possible to have port numbers outside 0..65535 *)
521 (* Several cases which are not allowed. *)
529 ) && (* illegal: host + relative path *)
538 ) && (* illegal: host + no path + params *)
547 ) (* illegal: host + no path + query *)
550 (**********************************************************************)
552 (**********************************************************************)
555 (* absolute URLs with ip_url_syntax *)
557 string_of_url (url_of_string ip_url_syntax s) = s in
560 try ignore(url_of_string ip_url_syntax s); false
561 with Malformed_URL -> true
566 identical "http://host" &&
567 identical "http://user@host" &&
568 identical "http://user:password@host" &&
569 identical "http://user@host:99" &&
570 identical "http://user:password@host:99" &&
572 identical "http://host/" &&
573 identical "http://user@host/" &&
574 identical "http://user:password@host/" &&
575 identical "http://user@host:99/" &&
576 identical "http://user:password@host:99/" &&
578 identical "http://host/a/b" &&
579 identical "http://user@host/a/b" &&
580 identical "http://user:password@host/a/b" &&
581 identical "http://user@host:99/a/b" &&
582 identical "http://user:password@host:99/a/b" &&
584 identical "http://host/a/b/" &&
585 identical "http://user@host/a/b/" &&
586 identical "http://user:password@host/a/b/" &&
587 identical "http://user@host:99/a/b/" &&
588 identical "http://user:password@host:99/a/b/" &&
590 identical "http://host/?a=b&c=d" &&
591 identical "http://user@host/?a=b&c=d" &&
592 identical "http://user:password@host/?a=b&c=d" &&
593 identical "http://user@host:99/?a=b&c=d" &&
594 identical "http://user:password@host:99/?a=b&c=d" &&
596 fails "http://host?a=b&c=d" &&
597 fails "http://user@host?a=b&c=d" &&
598 fails "http://user:password@host?a=b&c=d" &&
599 fails "http://user@host:99?a=b&c=d" &&
600 fails "http://user:password@host:99?a=b&c=d" &&
602 identical "http://host/?a=/&c=/" &&
603 identical "http://user@host/?a=/&c=/" &&
604 identical "http://user:password@host/?a=/&c=/" &&
605 identical "http://user@host:99/?a=/&c=/" &&
606 identical "http://user:password@host:99/?a=/&c=/" &&
608 identical "http://host/;a;b" &&
609 identical "http://user@host/;a;b" &&
610 identical "http://user:password@host/;a;b" &&
611 identical "http://user@host:99/;a;b" &&
612 identical "http://user:password@host:99/;a;b" &&
614 fails "http://host;a;b" &&
615 fails "http://user@host;a;b" &&
616 fails "http://user:password@host;a;b" &&
617 fails "http://user@host:99;a;b" &&
618 fails "http://user:password@host:99;a;b" &&
620 identical "http://host/;a;b?a=b&c=d" &&
621 identical "http://user@host/;a;b?a=b&c=d" &&
622 identical "http://user:password@host/;a;b?a=b&c=d" &&
623 identical "http://user@host:99/;a;b?a=b&c=d" &&
624 identical "http://user:password@host:99/;a;b?a=b&c=d" &&
626 identical "http:#f" &&
628 identical "http://host#f" &&
629 identical "http://user@host#f" &&
630 identical "http://user:password@host#f" &&
631 identical "http://user@host:99#f" &&
632 identical "http://user:password@host:99#f" &&
634 identical "http://host/;a;b?a=b&c=d#f" &&
635 identical "http://user@host/;a;b?a=b&c=d#f" &&
636 identical "http://user:password@host/;a;b?a=b&c=d#f" &&
637 identical "http://user@host:99/;a;b?a=b&c=d#f" &&
638 identical "http://user:password@host:99/;a;b?a=b&c=d#f" &&
645 (* relative URLs with ip_url_syntax *)
647 string_of_url (url_of_string ip_url_syntax s) = s in
650 try ignore(url_of_string ip_url_syntax s); false
651 with Malformed_URL -> true
654 identical "//host" &&
655 identical "//user@host" &&
656 identical "//user:password@host" &&
657 identical "//user@host:99" &&
658 identical "//user:password@host:99" &&
660 identical "//host/" &&
661 identical "//user@host/" &&
662 identical "//user:password@host/" &&
663 identical "//user@host:99/" &&
664 identical "//user:password@host:99/" &&
666 identical "//host#f" &&
667 identical "//user@host#f" &&
668 identical "//user:password@host#f" &&
669 identical "//user@host:99#f" &&
670 identical "//user:password@host:99#f" &&
678 identical "/a;a;b" &&
679 identical "/a/;a;b" &&
680 identical "/a/a;a;b" &&
682 identical "/?a=b&c=d" &&
683 identical "/a?a=b&c=d" &&
684 identical "/a/?a=b&c=d" &&
685 identical "/a/a?a=b&c=d" &&
687 identical "/;a;b?a=b&c=d" &&
688 identical "/a;a;b?a=b&c=d" &&
689 identical "/a/;a;b?a=b&c=d" &&
690 identical "/a/a;a;b?a=b&c=d" &&
695 identical "/a/a#f" &&
697 identical "/;a;b#f" &&
698 identical "/a;a;b#f" &&
699 identical "/a/;a;b#f" &&
700 identical "/a/a;a;b#f" &&
702 identical "/;a;b?a=b&c=d#f" &&
703 identical "/a;a;b?a=b&c=d#f" &&
704 identical "/a/;a;b?a=b&c=d#f" &&
705 identical "/a/a;a;b?a=b&c=d#f" &&
714 identical "a/;a;b" &&
715 identical "a/a;a;b" &&
717 identical "?a=b&c=d" &&
718 identical "a?a=b&c=d" &&
719 identical "a/?a=b&c=d" &&
720 identical "a/a?a=b&c=d" &&
722 identical ";a;b?a=b&c=d" &&
723 identical "a;a;b?a=b&c=d" &&
724 identical "a/;a;b?a=b&c=d" &&
725 identical "a/a;a;b?a=b&c=d" &&
732 identical ";a;b#f" &&
733 identical "a;a;b#f" &&
734 identical "a/;a;b#f" &&
735 identical "a/a;a;b#f" &&
737 identical ";a;b?a=b&c=d#f" &&
738 identical "a;a;b?a=b&c=d#f" &&
739 identical "a/;a;b?a=b&c=d#f" &&
740 identical "a/a;a;b?a=b&c=d#f" &&
747 identical "./;a;b" &&
748 identical "./a;a;b" &&
750 identical ".?a=b&c=d" &&
751 identical "./?a=b&c=d" &&
752 identical "./a?a=b&c=d" &&
754 identical ".;a;b?a=b&c=d" &&
755 identical "./;a;b?a=b&c=d" &&
756 identical "./a;a;b?a=b&c=d" &&
762 identical ".;a;b#f" &&
763 identical "./;a;b#f" &&
764 identical "./a;a;b#f" &&
766 identical ".;a;b?a=b&c=d#f" &&
767 identical "./;a;b?a=b&c=d#f" &&
768 identical "./a;a;b?a=b&c=d#f" &&
774 identical "..;a;b" &&
775 identical "../;a;b" &&
776 identical "../a;a;b" &&
778 identical "..?a=b&c=d" &&
779 identical "../?a=b&c=d" &&
780 identical "../a?a=b&c=d" &&
782 identical "..;a;b?a=b&c=d" &&
783 identical "../;a;b?a=b&c=d" &&
784 identical "../a;a;b?a=b&c=d" &&
788 identical "../a#f" &&
790 identical "..;a;b#f" &&
791 identical "../;a;b#f" &&
792 identical "../a;a;b#f" &&
794 identical "..;a;b?a=b&c=d#f" &&
795 identical "../;a;b?a=b&c=d#f" &&
796 identical "../a;a;b?a=b&c=d#f" &&
799 (make_url ~path:["a:b"] ip_url_syntax) = "a%3Ab" &&
802 (make_url ~encoded:true ~path:["a:b"] ip_url_syntax) = "./a:b" &&
810 let mailto_syn = Hashtbl.find common_url_syntax "mailto" in
813 string_of_url (url_of_string mailto_syn s) = s in
816 try ignore(url_of_string mailto_syn s); false
817 with Malformed_URL -> true
820 identical "mailto:user@host" &&
821 identical "mailto:user@host;?;?" &&
822 fails "mailto:user@host#f"
825 (**********************************************************************)
826 (* split_path/join_path/norm_path: *)
827 (**********************************************************************)
830 (split_path "" = []) &&
831 (split_path "/" = [ "" ]) &&
832 (split_path "/a" = [ ""; "a" ]) &&
833 (split_path "a" = [ "a" ]) &&
834 (split_path "a/" = [ "a"; "" ]) &&
835 (split_path "/a/" = [ ""; "a"; "" ]) &&
836 (split_path "/a/b" = [ ""; "a"; "b" ]) &&
837 (split_path "/a/b/" = [ ""; "a"; "b"; "" ]) &&
838 (split_path "/a/b/c" = [ ""; "a"; "b"; "c" ]) &&
840 (join_path [] = "") &&
841 (join_path [ "" ] = "/") &&
842 (join_path [ ""; "a" ] = "/a") &&
843 (join_path [ "a" ] = "a") &&
844 (join_path [ "a"; "" ] = "a/") &&
845 (join_path [ ""; "a"; "" ] = "/a/") &&
846 (join_path [ ""; "a"; "b" ] = "/a/b") &&
847 (join_path [ ""; "a"; "b"; "" ] = "/a/b/") &&
848 (join_path [ ""; "a"; "b"; "c" ] = "/a/b/c") &&
855 (norm_path ["."] = []) &&
856 (norm_path ["."; ""] = []) &&
857 (norm_path ["a"; "."] = ["a"; ""]) &&
858 (norm_path ["a"; "b"; "."] = ["a"; "b"; ""]) &&
859 (norm_path ["a"; "b"; ".."] = ["a"; ""]) &&
860 (norm_path ["a"; "."; "b"; "."] = ["a"; "b"; ""]) &&
861 (norm_path [".."] = [".."; ""]) &&
862 (norm_path [".."; ""] = [".."; ""]) &&
863 (norm_path ["a"; "b"; ".."; "c" ] = ["a"; "c"]) &&
864 (norm_path ["a"; "b"; ".."; "c"; ""] = ["a"; "c"; ""]) &&
865 (norm_path ["";"";"a";"";"b"] = [""; "a"; "b"]) &&
866 (norm_path ["a"; "b"; ""; ".."; "c"; ""] = ["a"; "c"; ""]) &&
867 (norm_path ["a"; ".."] = []) &&
868 (norm_path ["";""] = [""]) &&
869 (norm_path [""] = [""]) &&
870 (norm_path [] = []) &&
875 (**********************************************************************)
876 (* apply_relative_url: *)
877 (**********************************************************************)
880 (* Examples taken from RFC 1808 *)
881 let url = url_of_string ip_url_syntax in
882 let base = url "http://a/b/c/d;p?q#f" in
883 let aru = apply_relative_url base in
885 (aru (url "g:h") = url "g:h") &&
886 (aru (url "g") = url "http://a/b/c/g") &&
887 (aru (url "./g") = url "http://a/b/c/g") &&
888 (aru (url "g/") = url "http://a/b/c/g/") &&
889 (aru (url "/g") = url "http://a/g") &&
890 (aru (url "//g") = url "http://g") &&
891 (aru (url "?y") = url "http://a/b/c/d;p?y") &&
892 (aru (url "g?y") = url "http://a/b/c/g?y") &&
893 (aru (url "g?y/./x") = url "http://a/b/c/g?y/./x") &&
894 (aru (url "#s") = url "http://a/b/c/d;p?q#s") &&
895 (aru (url "g#s") = url "http://a/b/c/g#s") &&
896 (aru (url "g#s/./x") = url "http://a/b/c/g#s/./x") &&
897 (aru (url "g?y#s") = url "http://a/b/c/g?y#s") &&
898 (aru (url ";x") = url "http://a/b/c/d;x") &&
899 (aru (url "g;x") = url "http://a/b/c/g;x") &&
900 (aru (url "g;x?y#s") = url "http://a/b/c/g;x?y#s") &&
901 (aru (url ".") = url "http://a/b/c/") &&
902 (aru (url "./") = url "http://a/b/c/") &&
903 (aru (url "..") = url "http://a/b/") &&
904 (aru (url "../") = url "http://a/b/") &&
905 (aru (url "../g") = url "http://a/b/g") &&
906 (aru (url "../..") = url "http://a/") &&
907 (aru (url "../../") = url "http://a/") &&
908 (aru (url "../../g") = url "http://a/g") &&
910 (aru (url "") = url "http://a/b/c/d;p?q#f") &&
911 (aru (url "../../../g") = url "http://a/../g") &&
912 (aru (url "../../../../g") = url "http://a/../../g") &&
913 (aru (url "/./g") = url "http://a/./g") &&
914 (aru (url "/../g") = url "http://a/../g") &&
915 (aru (url "g.") = url "http://a/b/c/g.") &&
916 (aru (url ".g") = url "http://a/b/c/.g") &&
917 (aru (url "g..") = url "http://a/b/c/g..") &&
918 (aru (url "..g") = url "http://a/b/c/..g") &&
919 (aru (url "./../g") = url "http://a/b/g") &&
920 (aru (url "./g/.") = url "http://a/b/c/g/") &&
921 (aru (url "g/./h") = url "http://a/b/c/g/h") &&
922 (aru (url "g/../h") = url "http://a/b/c/h") &&
923 (aru (url "http:g") = url "http:g") &&
924 (aru (url "http:") = url "http:") &&
930 (**********************************************************************)
934 print_endline ("Test " ^ n ^ " ok")
936 print_endline ("Test " ^ n ^ " FAILED!!!!");