open Neturl;;
open Printf;;
-exception Malformed_query of string
-exception Malformed_query_binding of string * string
-exception Unsupported_method of string
-exception Unsupported_HTTP_version of string
-exception Malformed_request_URI of string
-exception Malformed_request of string
+open Http_types;;
+open Http_constants;;
(*
type url_syntax_option =
url_is_valid = (fun _ -> true);
}
- (** given a list of length 2
- @return a pair formed by the elements of the list
- @raise Assert_failure if the list length isn't 2
+ (* convention:
+ foo_RE_raw is the uncompiled regexp matching foo
+ foo_RE is the compiled regexp matching foo
+ is_foo is the predicate over string matching foo
*)
-let pair_of_2_sized_list = function
- | [a;b] -> (a,b)
- | _ -> assert false
+
+let separators_RE_raw = "()<>@,;:\\\\\"/\\[\\]?={} \t"
+let ctls_RE_raw = "\\x00-\\x1F\\x7F"
+let token_RE_raw = "[^" ^ separators_RE_raw ^ ctls_RE_raw ^ "]+"
+let lws_RE_raw = "(\r\n)?[ \t]"
+let quoted_string_RE_raw = "\"(([^\"])|(\\\\\"))*\""
+let text_RE_raw = "(([^" ^ ctls_RE_raw ^ "])|(" ^ lws_RE_raw ^ "))+"
+let field_content_RE_raw =
+ sprintf
+ "^(((%s)|(%s)|(%s))|(%s))*$"
+ token_RE_raw
+ separators_RE_raw
+ quoted_string_RE_raw
+ text_RE_raw
+(*
+ (* following RFC 2616 specifications *)
+let field_value_RE_raw = "((" ^ field_content_RE_raw ^ ")|(" ^ lws_RE_raw^ "))*"
+*)
+ (* smarter implementation: TEXT production is included in the regexp below *)
+let field_value_RE_raw =
+ sprintf
+ "^((%s)|(%s)|(%s)|(%s))*$"
+ token_RE_raw
+ separators_RE_raw
+ quoted_string_RE_raw
+ lws_RE_raw
+
+let token_RE = Pcre.regexp ("^" ^ token_RE_raw ^ "$")
+let field_value_RE = Pcre.regexp ("^" ^ field_value_RE_raw ^ "$")
+
+let is_token s = Pcre.pmatch ~rex:token_RE s
+let is_field_name = is_token
+let is_field_value s = Pcre.pmatch ~rex:field_value_RE s
+
+let heal_header_name s =
+ if not (is_field_name s) then raise (Invalid_header_name s) else ()
+
+let heal_header_value s =
+ if not (is_field_value s) then raise (Invalid_header_value s) else ()
+
+let heal_header (name, value) =
+ heal_header_name name;
+ heal_header_value name
(** given an HTTP like query string (e.g. "name1=value1&name2=value2&...")
@return a list of pairs [("name1", "value1"); ("name2", "value2")]
@raise Malformed_query if the string isn't a valid query string
- @raise Malformed_query_binding if some piece of the query isn't valid
+ @raise Malformed_query_part if some piece of the query isn't valid
*)
let split_query_params =
let (bindings_sep, binding_sep) = (Pcre.regexp "&", Pcre.regexp "=") in
+ let http_decode url = Netencoding.Url.decode ~plus:false url in
fun ~query ->
let bindings = Pcre.split ~rex:bindings_sep query in
if List.length bindings < 1 then
raise (Malformed_query query);
List.map
(fun binding ->
- let pieces = Pcre.split ~rex:binding_sep binding in
- if List.length pieces <> 2 then
- raise (Malformed_query_binding (binding, query));
- pair_of_2_sized_list pieces)
+ match Pcre.split ~rex:binding_sep binding with
+ | [""; b] -> (* '=b' *) raise (Malformed_query_part (binding, query))
+ | [a; b] -> (* 'a=b' *) (http_decode a, http_decode b)
+ | [a] -> (* 'a=' || 'a' *) (http_decode a, "")
+ | _ -> raise (Malformed_query_part (binding, query)))
bindings
(** given an input channel and a separator
@return a line read from it (like Pervasives.input_line)
line is returned only after reading a separator string; separator string isn't
included in the returned value
- FIXME what about efficiency?, input is performed char-by-char
+ TODO what about efficiency?, input is performed char-by-char
*)
let generic_input_line ~sep ~ic =
let sep_len = String.length sep in
else
!line
- (** given an input channel, reads from it a GET HTTP request and
- @return a pair <path, query_params> where path is a string representing the
- requested path and query_params is a list of pairs <name, value> (the GET
- parameters)
- *)
let parse_request =
let patch_empty_path s = (if s = "" then "/" else s) in
let pieces_sep = Pcre.regexp " " in
fun ic ->
- let request_line = generic_input_line ~sep:Http_common.crlf ~ic in
+ let request_line = generic_input_line ~sep:crlf ~ic in
match Pcre.split ~rex:pieces_sep request_line with
| [meth; request_uri_raw; http_version] ->
if meth <> "GET" then
patch_empty_path (String.concat "/" (url_path request_uri))
in
let query_params =
- try split_query_params (url_query request_uri) with Not_found -> []
+ try (* act on HTTP encoded URIs *)
+ split_query_params (url_query ~encoded:true request_uri)
+ with Not_found -> []
in
Http_common.debug_print
(sprintf
(path, query_params)
| _ -> raise (Malformed_request request_line)
+let parse_request' ic =
+ let (path, params) = parse_request ic in
+ let clisockaddr = Http_misc.peername_of_in_channel ic in
+ new Http_request.request ~path ~params ~clisockaddr
+