Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)
-(* TODO some useless function here *)
-(* TODO remove is_http* from mli? *)
-
open Neturl;;
open Printf;;
url_is_valid = (fun _ -> true);
}
-module CharSet = Set.Make (Char)
-
- (** create an "is in" predicate over a character set using an efficient,
- set-based implementation *)
-let mk_char_predicate chars =
- let charset =
- List.fold_left (fun oldset c -> CharSet.add c oldset) CharSet.empty chars
- in
- fun c -> CharSet.mem c charset
-
-let is_http_separator =
- mk_char_predicate
- [ '('; ')'; '<'; '>'; '@'; ','; ';'; ':'; '\\'; '"'; '/'; '['; ']'; '?';
- '='; '{'; '}'; ' '; '\t' ]
-
-let is_http_ctl c =
- match Char.code c with
- | c when (((c >= 0) && (c <= 31)) || (c = 127)) -> true
- | _ -> false
-
- (* internal: used to implement is_* functions *)
-exception Invalid_char;;
-
-let is_http_token s =
- try
- String.iter
- (fun c ->
- if (is_http_ctl c) || (is_http_separator) c then raise Invalid_char)
- s;
- true
- with Invalid_char -> false
-
-let rec is_http_lws s =
- (match s.[0] with
- | ' ' | '\t' -> true
- | '\r' ->
- (try
- (s.[1] = '\n') && ((s.[2] = ' ') || (s.[2] = '\t'))
- with Invalid_argument "String.get" -> false)
- | _ -> false)
-
-let is_http_field_name = is_http_token
-
-let is_http_field_value s =
- let rec strip_quoted_string = function
- | [] -> (false, [])
- | '"' :: tl -> (true, tl)
- | '\\' :: '"' :: tl -> strip_quoted_string tl
- | hd :: tl -> strip_quoted_string tl
- in
- let rec is_http_field_value' = function
- | '\r' :: '\n' :: sp :: rest when (sp = ' ' || sp = '\t') -> (* strip LWS *)
- is_http_field_value' rest
- | c :: rest when (is_http_ctl c && c <> '\t') -> (* \t is in CTL /\ SEP *)
- false (* CTL aren't allowed *)
- | '"' :: rest ->
- let (valid, rest) = strip_quoted_string rest in
- if not valid then false else is_http_field_value' rest
- | c :: rest -> is_http_field_value' rest
- | [] -> true
- in is_http_field_value' (Http_misc.string_explode s)
+ (* convention:
+ foo_RE_raw is the uncompiled regexp matching foo
+ foo_RE is the compiled regexp matching foo
+ is_foo is the predicate over string matching foo
+ *)
+
+let separators_RE_raw = "()<>@,;:\\\\\"/\\[\\]?={} \t"
+let ctls_RE_raw = "\\x00-\\x1F\\x7F"
+let token_RE_raw = "[^" ^ separators_RE_raw ^ ctls_RE_raw ^ "]+"
+let lws_RE_raw = "(\r\n)?[ \t]"
+let quoted_string_RE_raw = "\"(([^\"])|(\\\\\"))*\""
+let text_RE_raw = "(([^" ^ ctls_RE_raw ^ "])|(" ^ lws_RE_raw ^ "))+"
+let field_content_RE_raw =
+ sprintf
+ "^(((%s)|(%s)|(%s))|(%s))*$"
+ token_RE_raw
+ separators_RE_raw
+ quoted_string_RE_raw
+ text_RE_raw
+(*
+ (* following RFC 2616 specifications *)
+let field_value_RE_raw = "((" ^ field_content_RE_raw ^ ")|(" ^ lws_RE_raw^ "))*"
+*)
+ (* smarter implementation: TEXT production is included in the regexp below *)
+let field_value_RE_raw =
+ sprintf
+ "^((%s)|(%s)|(%s)|(%s))*$"
+ token_RE_raw
+ separators_RE_raw
+ quoted_string_RE_raw
+ lws_RE_raw
+
+let token_RE = Pcre.regexp ("^" ^ token_RE_raw ^ "$")
+let field_value_RE = Pcre.regexp ("^" ^ field_value_RE_raw ^ "$")
+
+let is_token s = Pcre.pmatch ~rex:token_RE s
+let is_field_name = is_token
+let is_field_value s = Pcre.pmatch ~rex:field_value_RE s
+
+let heal_header_name s =
+ if not (is_field_name s) then raise (Invalid_header_name s) else ()
+
+let heal_header_value s =
+ if not (is_field_value s) then raise (Invalid_header_value s) else ()
let heal_header (name, value) =
- if not (is_http_field_name name && is_http_field_value value) then
- raise (Invalid_header (name ^ ": " ^ value))
- else
- ()
+ heal_header_name name;
+ heal_header_value name
(** given an HTTP like query string (e.g. "name1=value1&name2=value2&...")
@return a list of pairs [("name1", "value1"); ("name2", "value2")]
@return a line read from it (like Pervasives.input_line)
line is returned only after reading a separator string; separator string isn't
included in the returned value
- FIXME what about efficiency?, input is performed char-by-char
+ TODO what about efficiency?, input is performed char-by-char
*)
let generic_input_line ~sep ~ic =
let sep_len = String.length sep in
else
!line
- (** given an input channel, reads from it a GET HTTP request and
- @return a pair <path, query_params> where path is a string representing the
- requested path and query_params is a list of pairs <name, value> (the GET
- parameters)
- *)
let parse_request =
let patch_empty_path s = (if s = "" then "/" else s) in
let pieces_sep = Pcre.regexp " " in
(path, query_params)
| _ -> raise (Malformed_request request_line)
+let parse_request' ic =
+ let (path, params) = parse_request ic in
+ let clisockaddr = Http_misc.peername_of_in_channel ic in
+ new Http_request.request ~path ~params ~clisockaddr
+