X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2FDEVEL%2Focaml-http%2Fhttp_parser.ml;h=7f7b22349a907a75309a42eb315b11933306e6ae;hb=b95baa1e1bf6333558efbced2b055264f7a79fed;hp=3bf186fcbc2fae8c1da4fdb76a4a38adeab8fd49;hpb=db0eab65e607d5f499a6f5455d7fcd4130d0b727;p=helm.git diff --git a/helm/DEVEL/ocaml-http/http_parser.ml b/helm/DEVEL/ocaml-http/http_parser.ml index 3bf186fcb..7f7b22349 100644 --- a/helm/DEVEL/ocaml-http/http_parser.ml +++ b/helm/DEVEL/ocaml-http/http_parser.ml @@ -49,73 +49,54 @@ let request_uri_syntax = { url_is_valid = (fun _ -> true); } -module CharSet = Set.Make (Char) - - (** create an "is in" predicate over a character set using an efficient, - set-based implementation *) -let mk_char_predicate chars = - let charset = - List.fold_left (fun oldset c -> CharSet.add c oldset) CharSet.empty chars - in - fun c -> CharSet.mem c charset - -let is_http_separator = - mk_char_predicate - [ '('; ')'; '<'; '>'; '@'; ','; ';'; ':'; '\\'; '"'; '/'; '['; ']'; '?'; - '='; '{'; '}'; ' '; '\t' ] - -let is_http_ctl c = - match Char.code c with - | c when (((c >= 0) && (c <= 31)) || (c = 127)) -> true - | _ -> false - - (* internal: used to implement is_* functions *) -exception Invalid_char;; - -let is_http_token s = - try - String.iter - (fun c -> - if (is_http_ctl c) || (is_http_separator) c then raise Invalid_char) - s; - true - with Invalid_char -> false - -let rec is_http_lws s = - (match s.[0] with - | ' ' | '\t' -> true - | '\r' -> - (try - (s.[1] = '\n') && ((s.[2] = ' ') || (s.[2] = '\t')) - with Invalid_argument "String.get" -> false) - | _ -> false) - -let is_http_field_name = is_http_token - -let is_http_field_value s = - let rec strip_quoted_string = function - | [] -> (false, []) - | '"' :: tl -> (true, tl) - | '\\' :: '"' :: tl -> strip_quoted_string tl - | hd :: tl -> strip_quoted_string tl - in - let rec is_http_field_value' = function - | '\r' :: '\n' :: sp :: rest when (sp = ' ' || sp = '\t') -> (* strip LWS *) - is_http_field_value' rest - | c :: rest when (is_http_ctl c && c <> '\t') -> (* \t is in CTL /\ SEP *) - false (* CTL aren't allowed *) - | '"' :: rest -> - let (valid, rest) = strip_quoted_string rest in - if not valid then false else is_http_field_value' rest - | c :: rest -> is_http_field_value' rest - | [] -> true - in is_http_field_value' (Http_misc.string_explode s) + (* convention: + foo_RE_raw is the uncompiled regexp matching foo + foo_RE is the compiled regexp matching foo + is_foo is the predicate over string matching foo + *) + +let separators_RE_raw = "()<>@,;:\\\\\"/\\[\\]?={} \t" +let ctls_RE_raw = "\\x00-\\x1F\\x7F" +let token_RE_raw = "[^" ^ separators_RE_raw ^ ctls_RE_raw ^ "]+" +let lws_RE_raw = "(\r\n)?[ \t]" +let quoted_string_RE_raw = "\"(([^\"])|(\\\\\"))*\"" +let text_RE_raw = "(([^" ^ ctls_RE_raw ^ "])|(" ^ lws_RE_raw ^ "))+" +let field_content_RE_raw = + sprintf + "^(((%s)|(%s)|(%s))|(%s))*$" + token_RE_raw + separators_RE_raw + quoted_string_RE_raw + text_RE_raw +(* + (* following RFC 2616 specifications *) +let field_value_RE_raw = "((" ^ field_content_RE_raw ^ ")|(" ^ lws_RE_raw^ "))*" +*) + (* smarter implementation: TEXT production is included in the regexp below *) +let field_value_RE_raw = + sprintf + "^((%s)|(%s)|(%s)|(%s))*$" + token_RE_raw + separators_RE_raw + quoted_string_RE_raw + lws_RE_raw + +let token_RE = Pcre.regexp ("^" ^ token_RE_raw ^ "$") +let field_value_RE = Pcre.regexp ("^" ^ field_value_RE_raw ^ "$") + +let is_token s = Pcre.pmatch ~rex:token_RE s +let is_field_name = is_token +let is_field_value s = Pcre.pmatch ~rex:field_value_RE s + +let heal_header_name s = + if not (is_field_name s) then raise (Invalid_header_name s) else () + +let heal_header_value s = + if not (is_field_value s) then raise (Invalid_header_value s) else () let heal_header (name, value) = - if not (is_http_field_name name && is_http_field_value value) then - raise (Invalid_header (name ^ ": " ^ value)) - else - () + heal_header_name name; + heal_header_value name (** given an HTTP like query string (e.g. "name1=value1&name2=value2&...") @return a list of pairs [("name1", "value1"); ("name2", "value2")] @@ -142,7 +123,7 @@ let split_query_params = @return a line read from it (like Pervasives.input_line) line is returned only after reading a separator string; separator string isn't included in the returned value - FIXME what about efficiency?, input is performed char-by-char + TODO what about efficiency?, input is performed char-by-char *) let generic_input_line ~sep ~ic = let sep_len = String.length sep in @@ -175,11 +156,6 @@ let generic_input_line ~sep ~ic = else !line - (** given an input channel, reads from it a GET HTTP request and - @return a pair where path is a string representing the - requested path and query_params is a list of pairs (the GET - parameters) - *) let parse_request = let patch_empty_path s = (if s = "" then "/" else s) in let pieces_sep = Pcre.regexp " " in @@ -216,3 +192,8 @@ let parse_request = (path, query_params) | _ -> raise (Malformed_request request_line) +let parse_request' ic = + let (path, params) = parse_request ic in + let clisockaddr = Http_misc.peername_of_in_channel ic in + new Http_request.request ~path ~params ~clisockaddr +