Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)
-open Neturl;;
open Printf;;
+open Http_common;;
open Http_types;;
open Http_constants;;
-(*
-type url_syntax_option =
- Url_part_not_recognized
- | Url_part_allowed
- | Url_part_required
-
-* (1) scheme://user:password@host:port/path;params?query#fragment
-*)
-
-let request_uri_syntax = {
- url_enable_scheme = Url_part_not_recognized;
- url_enable_user = Url_part_not_recognized;
- url_enable_password = Url_part_not_recognized;
- url_enable_host = Url_part_not_recognized;
- url_enable_port = Url_part_not_recognized;
- url_enable_path = Url_part_required;
- url_enable_param = Url_part_not_recognized;
- url_enable_query = Url_part_allowed;
- url_enable_fragment = Url_part_not_recognized;
- url_enable_other = Url_part_not_recognized;
- url_accepts_8bits = false;
- url_is_valid = (fun _ -> true);
-}
-
- (* convention:
- foo_RE_raw is the uncompiled regexp matching foo
- foo_RE is the compiled regexp matching foo
- is_foo is the predicate over string matching foo
- *)
-
-let separators_RE_raw = "()<>@,;:\\\\\"/\\[\\]?={} \t"
-let ctls_RE_raw = "\\x00-\\x1F\\x7F"
-let token_RE_raw = "[^" ^ separators_RE_raw ^ ctls_RE_raw ^ "]+"
-let lws_RE_raw = "(\r\n)?[ \t]"
-let quoted_string_RE_raw = "\"(([^\"])|(\\\\\"))*\""
-let text_RE_raw = "(([^" ^ ctls_RE_raw ^ "])|(" ^ lws_RE_raw ^ "))+"
-let field_content_RE_raw =
- sprintf
- "^(((%s)|(%s)|(%s))|(%s))*$"
- token_RE_raw
- separators_RE_raw
- quoted_string_RE_raw
- text_RE_raw
-(*
- (* following RFC 2616 specifications *)
-let field_value_RE_raw = "((" ^ field_content_RE_raw ^ ")|(" ^ lws_RE_raw^ "))*"
-*)
- (* smarter implementation: TEXT production is included in the regexp below *)
-let field_value_RE_raw =
- sprintf
- "^((%s)|(%s)|(%s)|(%s))*$"
- token_RE_raw
- separators_RE_raw
- quoted_string_RE_raw
- lws_RE_raw
-
-let token_RE = Pcre.regexp ("^" ^ token_RE_raw ^ "$")
-let field_value_RE = Pcre.regexp ("^" ^ field_value_RE_raw ^ "$")
-
-let is_token s = Pcre.pmatch ~rex:token_RE s
-let is_field_name = is_token
-let is_field_value s = Pcre.pmatch ~rex:field_value_RE s
-
-let heal_header_name s =
- if not (is_field_name s) then raise (Invalid_header_name s) else ()
+let (bindings_sep, binding_sep, pieces_sep, header_sep) =
+ (Pcre.regexp "&", Pcre.regexp "=", Pcre.regexp " ", Pcre.regexp ":")
+let header_RE = Pcre.regexp "([^:]*):(.*)"
-let heal_header_value s =
- if not (is_field_value s) then raise (Invalid_header_value s) else ()
-
-let heal_header (name, value) =
- heal_header_name name;
- heal_header_value name
+let url_decode url = Netencoding.Url.decode ~plus:true url
(** given an HTTP like query string (e.g. "name1=value1&name2=value2&...")
@return a list of pairs [("name1", "value1"); ("name2", "value2")]
@raise Malformed_query if the string isn't a valid query string
@raise Malformed_query_part if some piece of the query isn't valid
*)
-let split_query_params =
- let (bindings_sep, binding_sep) = (Pcre.regexp "&", Pcre.regexp "=") in
- let http_decode url = Netencoding.Url.decode ~plus:false url in
- fun ~query ->
- let bindings = Pcre.split ~rex:bindings_sep query in
- if List.length bindings < 1 then
- raise (Malformed_query query);
- List.map
- (fun binding ->
- match Pcre.split ~rex:binding_sep binding with
- | [""; b] -> (* '=b' *) raise (Malformed_query_part (binding, query))
- | [a; b] -> (* 'a=b' *) (http_decode a, http_decode b)
- | [a] -> (* 'a=' || 'a' *) (http_decode a, "")
- | _ -> raise (Malformed_query_part (binding, query)))
- bindings
+let split_query_params query =
+ let bindings = Pcre.split ~rex:bindings_sep query in
+ match bindings with
+ | [] -> raise (Malformed_query query)
+ | bindings ->
+ List.map
+ (fun binding ->
+ match Pcre.split ~rex:binding_sep binding with
+ | [ ""; b ] -> (* '=b' *)
+ raise (Malformed_query_part (binding, query))
+ | [ a; b ] -> (* 'a=b' *) (url_decode a, url_decode b)
+ | [ a ] -> (* 'a=' || 'a' *) (url_decode a, "")
+ | _ -> raise (Malformed_query_part (binding, query)))
+ bindings
+
+ (** internal, used by generic_input_line *)
+exception Line_completed;;
(** given an input channel and a separator
@return a line read from it (like Pervasives.input_line)
try
while true do
if !sep_pointer >= String.length sep then (* line completed *)
- raise End_of_file
+ raise Line_completed
else begin (* incomplete line: need to read more *)
let ch = input_char ic in
if ch = String.get sep !sep_pointer then (* next piece of sep *)
end
done;
assert false (* unreacheable statement *)
- with End_of_file ->
- if !line = "" then
- raise End_of_file
- else
- !line
-
-let parse_request =
- let patch_empty_path s = (if s = "" then "/" else s) in
- let pieces_sep = Pcre.regexp " " in
- fun ic ->
- let request_line = generic_input_line ~sep:crlf ~ic in
- match Pcre.split ~rex:pieces_sep request_line with
- | [meth; request_uri_raw; http_version] ->
- if meth <> "GET" then
- raise (Unsupported_method meth);
- (match http_version with
- | "HTTP/1.0" | "HTTP/1.1" -> ()
- | _ -> raise (Unsupported_HTTP_version http_version));
- let request_uri =
+ with Line_completed -> !line
+
+let patch_empty_path = function "" -> "/" | s -> s
+let debug_dump_request path params =
+ debug_print
+ (sprintf
+ "recevied request; path: %s; params: %s"
+ path
+ (String.concat ", " (List.map (fun (n, v) -> n ^ "=" ^ v) params)))
+
+let parse_request_fst_line ic =
+ let request_line = generic_input_line ~sep:crlf ~ic in
+ match Pcre.split ~rex:pieces_sep request_line with
+ | [ meth_raw; uri_raw; http_version_raw ] ->
+ (try
+ (method_of_string meth_raw, (* method *)
+ Http_parser_sanity.url_of_string uri_raw, (* uri *)
+ version_of_string http_version_raw) (* version *)
+ with Neturl.Malformed_URL -> raise (Malformed_request_URI uri_raw))
+ | _ -> raise (Malformed_request request_line)
+
+let parse_path uri = patch_empty_path (String.concat "/" (Neturl.url_path uri))
+let parse_query_get_params uri =
+ try (* act on HTTP encoded URIs *)
+ split_query_params (Neturl.url_query ~encoded:true uri)
+ with Not_found -> []
+
+let parse_headers ic =
+ (* consume also trailing "^\r\n$" line *)
+ let rec parse_headers' headers =
+ match generic_input_line ~sep:crlf ~ic with
+ | "" -> List.rev headers
+ | line ->
+ (let subs = Pcre.extract ~rex:header_RE line in
+ let header =
try
- url_of_string request_uri_syntax request_uri_raw
- with Malformed_URL ->
- raise (Malformed_request_URI request_uri_raw)
- in
- let path =
- patch_empty_path (String.concat "/" (url_path request_uri))
+ subs.(1)
+ with Invalid_argument "Array.get" -> raise (Invalid_header line)
in
- let query_params =
- try (* act on HTTP encoded URIs *)
- split_query_params (url_query ~encoded:true request_uri)
- with Not_found -> []
+ let value =
+ try
+ Http_parser_sanity.normalize_header_value subs.(2)
+ with Invalid_argument "Array.get" -> ""
in
- Http_common.debug_print
- (sprintf
- "recevied request; path: %s; params: %s"
- path
- (String.concat
- ", "
- (List.map (fun (n, v) -> n ^ "=" ^ v) query_params)));
- (path, query_params)
- | _ -> raise (Malformed_request request_line)
-
-let parse_request' ic =
- let (path, params) = parse_request ic in
- let clisockaddr = Http_misc.peername_of_in_channel ic in
- new Http_request.request ~path ~params ~clisockaddr
+ Http_parser_sanity.heal_header (header, value);
+ parse_headers' ((header, value) :: headers))
+ in
+ parse_headers' []
+
+let parse_request ic =
+ let (meth, uri, version) = parse_request_fst_line ic in
+ let path = parse_path uri in
+ let query_get_params = parse_query_get_params uri in
+ debug_dump_request path query_get_params;
+ (path, query_get_params)