X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2FDEVEL%2Focaml-http%2Fhttp_parser.ml;h=7f7b22349a907a75309a42eb315b11933306e6ae;hb=6de3b960806c544b580a15fd28309eea0c319771;hp=a753e40bbb12cfe5e67df2c5701b2fa8d826b65f;hpb=697d0d8857366485238a67386d0ce8f18404ac42;p=helm.git diff --git a/helm/DEVEL/ocaml-http/http_parser.ml b/helm/DEVEL/ocaml-http/http_parser.ml index a753e40bb..7f7b22349 100644 --- a/helm/DEVEL/ocaml-http/http_parser.ml +++ b/helm/DEVEL/ocaml-http/http_parser.ml @@ -20,13 +20,10 @@ *) open Neturl;; +open Printf;; -exception Malformed_query of string -exception Malformed_query_binding of string * string -exception Unsupported_method of string -exception Unsupported_HTTP_version of string -exception Malformed_request_URI of string -exception Malformed_request of string +open Http_types;; +open Http_constants;; (* type url_syntax_option = @@ -52,38 +49,81 @@ let request_uri_syntax = { url_is_valid = (fun _ -> true); } - (** given a list of length 2 - @return a pair formed by the elements of the list - @raise Assert_failure if the list length isn't 2 + (* convention: + foo_RE_raw is the uncompiled regexp matching foo + foo_RE is the compiled regexp matching foo + is_foo is the predicate over string matching foo *) -let pair_of_2_sized_list = function - | [a;b] -> (a,b) - | _ -> assert false + +let separators_RE_raw = "()<>@,;:\\\\\"/\\[\\]?={} \t" +let ctls_RE_raw = "\\x00-\\x1F\\x7F" +let token_RE_raw = "[^" ^ separators_RE_raw ^ ctls_RE_raw ^ "]+" +let lws_RE_raw = "(\r\n)?[ \t]" +let quoted_string_RE_raw = "\"(([^\"])|(\\\\\"))*\"" +let text_RE_raw = "(([^" ^ ctls_RE_raw ^ "])|(" ^ lws_RE_raw ^ "))+" +let field_content_RE_raw = + sprintf + "^(((%s)|(%s)|(%s))|(%s))*$" + token_RE_raw + separators_RE_raw + quoted_string_RE_raw + text_RE_raw +(* + (* following RFC 2616 specifications *) +let field_value_RE_raw = "((" ^ field_content_RE_raw ^ ")|(" ^ lws_RE_raw^ "))*" +*) + (* smarter implementation: TEXT production is included in the regexp below *) +let field_value_RE_raw = + sprintf + "^((%s)|(%s)|(%s)|(%s))*$" + token_RE_raw + separators_RE_raw + quoted_string_RE_raw + lws_RE_raw + +let token_RE = Pcre.regexp ("^" ^ token_RE_raw ^ "$") +let field_value_RE = Pcre.regexp ("^" ^ field_value_RE_raw ^ "$") + +let is_token s = Pcre.pmatch ~rex:token_RE s +let is_field_name = is_token +let is_field_value s = Pcre.pmatch ~rex:field_value_RE s + +let heal_header_name s = + if not (is_field_name s) then raise (Invalid_header_name s) else () + +let heal_header_value s = + if not (is_field_value s) then raise (Invalid_header_value s) else () + +let heal_header (name, value) = + heal_header_name name; + heal_header_value name (** given an HTTP like query string (e.g. "name1=value1&name2=value2&...") @return a list of pairs [("name1", "value1"); ("name2", "value2")] @raise Malformed_query if the string isn't a valid query string - @raise Malformed_query_binding if some piece of the query isn't valid + @raise Malformed_query_part if some piece of the query isn't valid *) let split_query_params = let (bindings_sep, binding_sep) = (Pcre.regexp "&", Pcre.regexp "=") in + let http_decode url = Netencoding.Url.decode ~plus:false url in fun ~query -> let bindings = Pcre.split ~rex:bindings_sep query in if List.length bindings < 1 then raise (Malformed_query query); List.map (fun binding -> - let pieces = Pcre.split ~rex:binding_sep binding in - if List.length pieces <> 2 then - raise (Malformed_query_binding (binding, query)); - pair_of_2_sized_list pieces) + match Pcre.split ~rex:binding_sep binding with + | [""; b] -> (* '=b' *) raise (Malformed_query_part (binding, query)) + | [a; b] -> (* 'a=b' *) (http_decode a, http_decode b) + | [a] -> (* 'a=' || 'a' *) (http_decode a, "") + | _ -> raise (Malformed_query_part (binding, query))) bindings (** given an input channel and a separator @return a line read from it (like Pervasives.input_line) line is returned only after reading a separator string; separator string isn't included in the returned value - FIXME what about efficiency?, input is performed char-by-char + TODO what about efficiency?, input is performed char-by-char *) let generic_input_line ~sep ~ic = let sep_len = String.length sep in @@ -116,16 +156,11 @@ let generic_input_line ~sep ~ic = else !line - (** given an input channel, reads from it a GET HTTP request and - @return a pair where path is a string representing the - requested path and query_params is a list of pairs (the GET - parameters) - *) let parse_request = let patch_empty_path s = (if s = "" then "/" else s) in let pieces_sep = Pcre.regexp " " in fun ic -> - let request_line = generic_input_line ~sep:Http_common.crlf ~ic in + let request_line = generic_input_line ~sep:crlf ~ic in match Pcre.split ~rex:pieces_sep request_line with | [meth; request_uri_raw; http_version] -> if meth <> "GET" then @@ -143,8 +178,22 @@ let parse_request = patch_empty_path (String.concat "/" (url_path request_uri)) in let query_params = - try split_query_params (url_query request_uri) with Not_found -> [] + try (* act on HTTP encoded URIs *) + split_query_params (url_query ~encoded:true request_uri) + with Not_found -> [] in + Http_common.debug_print + (sprintf + "recevied request; path: %s; params: %s" + path + (String.concat + ", " + (List.map (fun (n, v) -> n ^ "=" ^ v) query_params))); (path, query_params) | _ -> raise (Malformed_request request_line) +let parse_request' ic = + let (path, params) = parse_request ic in + let clisockaddr = Http_misc.peername_of_in_channel ic in + new Http_request.request ~path ~params ~clisockaddr +