From 37f3d7291d4260eb0bc5e6e26afae557f324e587 Mon Sep 17 00:00:00 2001 From: Stefano Zacchiroli Date: Wed, 4 Dec 2002 08:45:54 +0000 Subject: [PATCH] - use Pcre to perform sanity checks - removed is_http* from interface, export only heal_header --- helm/DEVEL/ocaml-http/http_parser.ml | 95 ++++++++------------------- helm/DEVEL/ocaml-http/http_parser.mli | 2 - 2 files changed, 29 insertions(+), 68 deletions(-) diff --git a/helm/DEVEL/ocaml-http/http_parser.ml b/helm/DEVEL/ocaml-http/http_parser.ml index 433754c5c..164f41324 100644 --- a/helm/DEVEL/ocaml-http/http_parser.ml +++ b/helm/DEVEL/ocaml-http/http_parser.ml @@ -19,9 +19,6 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *) -(* TODO some useless function here *) -(* TODO remove is_http* from mli? *) - open Neturl;; open Printf;; @@ -52,70 +49,36 @@ let request_uri_syntax = { url_is_valid = (fun _ -> true); } -module CharSet = Set.Make (Char) - - (** create an "is in" predicate over a character set using an efficient, - set-based implementation *) -let mk_char_predicate chars = - let charset = - List.fold_left (fun oldset c -> CharSet.add c oldset) CharSet.empty chars - in - fun c -> CharSet.mem c charset - -let is_http_separator = - mk_char_predicate - [ '('; ')'; '<'; '>'; '@'; ','; ';'; ':'; '\\'; '"'; '/'; '['; ']'; '?'; - '='; '{'; '}'; ' '; '\t' ] - -let is_http_ctl c = - match Char.code c with - | c when (((c >= 0) && (c <= 31)) || (c = 127)) -> true - | _ -> false - - (* internal: used to implement is_* functions *) -exception Invalid_char;; - -let is_http_token s = - try - String.iter - (fun c -> - if (is_http_ctl c) || (is_http_separator) c then raise Invalid_char) - s; - true - with Invalid_char -> false - -let rec is_http_lws s = - (match s.[0] with - | ' ' | '\t' -> true - | '\r' -> - (try - (s.[1] = '\n') && ((s.[2] = ' ') || (s.[2] = '\t')) - with Invalid_argument "String.get" -> false) - | _ -> false) - -let is_http_field_name = is_http_token - -let is_http_field_value s = - let rec strip_quoted_string = function - | [] -> (false, []) - | '"' :: tl -> (true, tl) - | '\\' :: '"' :: tl -> strip_quoted_string tl - | hd :: tl -> strip_quoted_string tl - in - let rec is_http_field_value' = function - | '\r' :: '\n' :: sp :: rest when (sp = ' ' || sp = '\t') -> (* strip LWS *) - is_http_field_value' rest - | c :: rest when (is_http_ctl c && c <> '\t') -> (* \t is in CTL /\ SEP *) - false (* CTL aren't allowed *) - | '"' :: rest -> - let (valid, rest) = strip_quoted_string rest in - if not valid then false else is_http_field_value' rest - | c :: rest -> is_http_field_value' rest - | [] -> true - in is_http_field_value' (Http_misc.string_explode s) + (* convention: + foo_RE_raw is the uncompiled regexp matching foo + foo_RE is the compiled regexp matching foo + is_foo is the predicate over string matching foo + *) + +let separators_RE_raw = "()<>@,;:\\\\\"/\\[\\]?={} \t" +let ctls_RE_raw = "\\x00-\\x1F\\x7F" +let token_RE_raw = "[^" ^ separators_RE_raw ^ ctls_RE_raw ^ "]+" +let lws_RE_raw = "(\r\n)?[ \t]" +let quoted_string_RE_raw = "\"(([^\"])|(\\\\\"))*\"" +let text_RE_raw = "(([^" ^ ctls_RE_raw ^ "])|(" ^ lws_RE_raw ^ "))+" +let field_content_RE_raw = + sprintf + "^(((%s)|(%s)|(%s))|(%s))*$" + token_RE_raw + separators_RE_raw + quoted_string_RE_raw + text_RE_raw +let field_value_RE_raw = "((" ^ field_content_RE_raw ^ ")|(" ^ lws_RE_raw^ "))*" + +let token_RE = Pcre.regexp ("^" ^ token_RE_raw ^ "$") +let field_value_RE = Pcre.regexp ("^" ^ field_value_RE_raw ^ "$") + +let is_token s = Pcre.pmatch ~rex:token_RE s +let is_field_name = is_token +let is_field_value s = Pcre.pmatch ~rex:field_value_RE s let heal_header (name, value) = - if not (is_http_field_name name && is_http_field_value value) then + if not (is_field_name name && is_field_value value) then raise (Invalid_header (name ^ ": " ^ value)) else () @@ -145,7 +108,7 @@ let split_query_params = @return a line read from it (like Pervasives.input_line) line is returned only after reading a separator string; separator string isn't included in the returned value - FIXME what about efficiency?, input is performed char-by-char + TODO what about efficiency?, input is performed char-by-char *) let generic_input_line ~sep ~ic = let sep_len = String.length sep in diff --git a/helm/DEVEL/ocaml-http/http_parser.mli b/helm/DEVEL/ocaml-http/http_parser.mli index 76f102172..8aa5562bd 100644 --- a/helm/DEVEL/ocaml-http/http_parser.mli +++ b/helm/DEVEL/ocaml-http/http_parser.mli @@ -19,8 +19,6 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *) -val is_http_field_name: string -> bool -val is_http_field_value: string -> bool val heal_header: string * string -> unit val parse_request: in_channel -> string * (string * string) list -- 2.39.2