X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2FDEVEL%2Focaml-http%2Fhttp_parser.ml;h=7f7b22349a907a75309a42eb315b11933306e6ae;hb=d4993a3584bd83a87f642e8766eb86c0a24b682b;hp=87b910010e5f9cbd8d3cf6b44a26fda85857df48;hpb=12acd487ae0e4b47f044e22dfe703843c79fe102;p=helm.git diff --git a/helm/DEVEL/ocaml-http/http_parser.ml b/helm/DEVEL/ocaml-http/http_parser.ml index 87b910010..7f7b22349 100644 --- a/helm/DEVEL/ocaml-http/http_parser.ml +++ b/helm/DEVEL/ocaml-http/http_parser.ml @@ -22,12 +22,8 @@ open Neturl;; open Printf;; -exception Malformed_query of string -exception Malformed_query_binding of string * string -exception Unsupported_method of string -exception Unsupported_HTTP_version of string -exception Malformed_request_URI of string -exception Malformed_request of string +open Http_types;; +open Http_constants;; (* type url_syntax_option = @@ -53,10 +49,59 @@ let request_uri_syntax = { url_is_valid = (fun _ -> true); } + (* convention: + foo_RE_raw is the uncompiled regexp matching foo + foo_RE is the compiled regexp matching foo + is_foo is the predicate over string matching foo + *) + +let separators_RE_raw = "()<>@,;:\\\\\"/\\[\\]?={} \t" +let ctls_RE_raw = "\\x00-\\x1F\\x7F" +let token_RE_raw = "[^" ^ separators_RE_raw ^ ctls_RE_raw ^ "]+" +let lws_RE_raw = "(\r\n)?[ \t]" +let quoted_string_RE_raw = "\"(([^\"])|(\\\\\"))*\"" +let text_RE_raw = "(([^" ^ ctls_RE_raw ^ "])|(" ^ lws_RE_raw ^ "))+" +let field_content_RE_raw = + sprintf + "^(((%s)|(%s)|(%s))|(%s))*$" + token_RE_raw + separators_RE_raw + quoted_string_RE_raw + text_RE_raw +(* + (* following RFC 2616 specifications *) +let field_value_RE_raw = "((" ^ field_content_RE_raw ^ ")|(" ^ lws_RE_raw^ "))*" +*) + (* smarter implementation: TEXT production is included in the regexp below *) +let field_value_RE_raw = + sprintf + "^((%s)|(%s)|(%s)|(%s))*$" + token_RE_raw + separators_RE_raw + quoted_string_RE_raw + lws_RE_raw + +let token_RE = Pcre.regexp ("^" ^ token_RE_raw ^ "$") +let field_value_RE = Pcre.regexp ("^" ^ field_value_RE_raw ^ "$") + +let is_token s = Pcre.pmatch ~rex:token_RE s +let is_field_name = is_token +let is_field_value s = Pcre.pmatch ~rex:field_value_RE s + +let heal_header_name s = + if not (is_field_name s) then raise (Invalid_header_name s) else () + +let heal_header_value s = + if not (is_field_value s) then raise (Invalid_header_value s) else () + +let heal_header (name, value) = + heal_header_name name; + heal_header_value name + (** given an HTTP like query string (e.g. "name1=value1&name2=value2&...") @return a list of pairs [("name1", "value1"); ("name2", "value2")] @raise Malformed_query if the string isn't a valid query string - @raise Malformed_query_binding if some piece of the query isn't valid + @raise Malformed_query_part if some piece of the query isn't valid *) let split_query_params = let (bindings_sep, binding_sep) = (Pcre.regexp "&", Pcre.regexp "=") in @@ -67,19 +112,18 @@ let split_query_params = raise (Malformed_query query); List.map (fun binding -> - let pieces = Pcre.split ~rex:binding_sep binding in - if List.length pieces <> 2 then - raise (Malformed_query_binding (binding, query)); - (match pieces with - | [a; b] -> (http_decode a, http_decode b) - | _ -> assert false)) + match Pcre.split ~rex:binding_sep binding with + | [""; b] -> (* '=b' *) raise (Malformed_query_part (binding, query)) + | [a; b] -> (* 'a=b' *) (http_decode a, http_decode b) + | [a] -> (* 'a=' || 'a' *) (http_decode a, "") + | _ -> raise (Malformed_query_part (binding, query))) bindings (** given an input channel and a separator @return a line read from it (like Pervasives.input_line) line is returned only after reading a separator string; separator string isn't included in the returned value - FIXME what about efficiency?, input is performed char-by-char + TODO what about efficiency?, input is performed char-by-char *) let generic_input_line ~sep ~ic = let sep_len = String.length sep in @@ -112,16 +156,11 @@ let generic_input_line ~sep ~ic = else !line - (** given an input channel, reads from it a GET HTTP request and - @return a pair where path is a string representing the - requested path and query_params is a list of pairs (the GET - parameters) - *) let parse_request = let patch_empty_path s = (if s = "" then "/" else s) in let pieces_sep = Pcre.regexp " " in fun ic -> - let request_line = generic_input_line ~sep:Http_common.crlf ~ic in + let request_line = generic_input_line ~sep:crlf ~ic in match Pcre.split ~rex:pieces_sep request_line with | [meth; request_uri_raw; http_version] -> if meth <> "GET" then @@ -153,3 +192,8 @@ let parse_request = (path, query_params) | _ -> raise (Malformed_request request_line) +let parse_request' ic = + let (path, params) = parse_request ic in + let clisockaddr = Http_misc.peername_of_in_channel ic in + new Http_request.request ~path ~params ~clisockaddr +