X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2FDEVEL%2Focaml-http%2Fhttp_parser.ml;h=1113b701edebb87d89c7d83efcb55f406f64327c;hb=ca9cd0aeee0ce78a891f7f6091ca8704231a446d;hp=7f7b22349a907a75309a42eb315b11933306e6ae;hpb=b5703de4f7697426ebda94fce2558add7c4f0285;p=helm.git diff --git a/helm/DEVEL/ocaml-http/http_parser.ml b/helm/DEVEL/ocaml-http/http_parser.ml index 7f7b22349..1113b701e 100644 --- a/helm/DEVEL/ocaml-http/http_parser.ml +++ b/helm/DEVEL/ocaml-http/http_parser.ml @@ -19,105 +19,40 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *) -open Neturl;; open Printf;; +open Http_common;; open Http_types;; open Http_constants;; -(* -type url_syntax_option = - Url_part_not_recognized - | Url_part_allowed - | Url_part_required - -* (1) scheme://user:password@host:port/path;params?query#fragment -*) - -let request_uri_syntax = { - url_enable_scheme = Url_part_not_recognized; - url_enable_user = Url_part_not_recognized; - url_enable_password = Url_part_not_recognized; - url_enable_host = Url_part_not_recognized; - url_enable_port = Url_part_not_recognized; - url_enable_path = Url_part_required; - url_enable_param = Url_part_not_recognized; - url_enable_query = Url_part_allowed; - url_enable_fragment = Url_part_not_recognized; - url_enable_other = Url_part_not_recognized; - url_accepts_8bits = false; - url_is_valid = (fun _ -> true); -} - - (* convention: - foo_RE_raw is the uncompiled regexp matching foo - foo_RE is the compiled regexp matching foo - is_foo is the predicate over string matching foo - *) - -let separators_RE_raw = "()<>@,;:\\\\\"/\\[\\]?={} \t" -let ctls_RE_raw = "\\x00-\\x1F\\x7F" -let token_RE_raw = "[^" ^ separators_RE_raw ^ ctls_RE_raw ^ "]+" -let lws_RE_raw = "(\r\n)?[ \t]" -let quoted_string_RE_raw = "\"(([^\"])|(\\\\\"))*\"" -let text_RE_raw = "(([^" ^ ctls_RE_raw ^ "])|(" ^ lws_RE_raw ^ "))+" -let field_content_RE_raw = - sprintf - "^(((%s)|(%s)|(%s))|(%s))*$" - token_RE_raw - separators_RE_raw - quoted_string_RE_raw - text_RE_raw -(* - (* following RFC 2616 specifications *) -let field_value_RE_raw = "((" ^ field_content_RE_raw ^ ")|(" ^ lws_RE_raw^ "))*" -*) - (* smarter implementation: TEXT production is included in the regexp below *) -let field_value_RE_raw = - sprintf - "^((%s)|(%s)|(%s)|(%s))*$" - token_RE_raw - separators_RE_raw - quoted_string_RE_raw - lws_RE_raw - -let token_RE = Pcre.regexp ("^" ^ token_RE_raw ^ "$") -let field_value_RE = Pcre.regexp ("^" ^ field_value_RE_raw ^ "$") - -let is_token s = Pcre.pmatch ~rex:token_RE s -let is_field_name = is_token -let is_field_value s = Pcre.pmatch ~rex:field_value_RE s - -let heal_header_name s = - if not (is_field_name s) then raise (Invalid_header_name s) else () +let (bindings_sep, binding_sep, pieces_sep, header_sep) = + (Pcre.regexp "&", Pcre.regexp "=", Pcre.regexp " ", Pcre.regexp ":") +let header_RE = Pcre.regexp "([^:]*):(.*)" -let heal_header_value s = - if not (is_field_value s) then raise (Invalid_header_value s) else () - -let heal_header (name, value) = - heal_header_name name; - heal_header_value name +let url_decode url = Netencoding.Url.decode ~plus:true url (** given an HTTP like query string (e.g. "name1=value1&name2=value2&...") @return a list of pairs [("name1", "value1"); ("name2", "value2")] @raise Malformed_query if the string isn't a valid query string @raise Malformed_query_part if some piece of the query isn't valid *) -let split_query_params = - let (bindings_sep, binding_sep) = (Pcre.regexp "&", Pcre.regexp "=") in - let http_decode url = Netencoding.Url.decode ~plus:false url in - fun ~query -> - let bindings = Pcre.split ~rex:bindings_sep query in - if List.length bindings < 1 then - raise (Malformed_query query); - List.map - (fun binding -> - match Pcre.split ~rex:binding_sep binding with - | [""; b] -> (* '=b' *) raise (Malformed_query_part (binding, query)) - | [a; b] -> (* 'a=b' *) (http_decode a, http_decode b) - | [a] -> (* 'a=' || 'a' *) (http_decode a, "") - | _ -> raise (Malformed_query_part (binding, query))) - bindings +let split_query_params query = + let bindings = Pcre.split ~rex:bindings_sep query in + match bindings with + | [] -> raise (Malformed_query query) + | bindings -> + List.map + (fun binding -> + match Pcre.split ~rex:binding_sep binding with + | [ ""; b ] -> (* '=b' *) + raise (Malformed_query_part (binding, query)) + | [ a; b ] -> (* 'a=b' *) (url_decode a, url_decode b) + | [ a ] -> (* 'a=' || 'a' *) (url_decode a, "") + | _ -> raise (Malformed_query_part (binding, query))) + bindings + + (** internal, used by generic_input_line *) +exception Line_completed;; (** given an input channel and a separator @return a line read from it (like Pervasives.input_line) @@ -135,7 +70,7 @@ let generic_input_line ~sep ~ic = try while true do if !sep_pointer >= String.length sep then (* line completed *) - raise End_of_file + raise Line_completed else begin (* incomplete line: need to read more *) let ch = input_char ic in if ch = String.get sep !sep_pointer then (* next piece of sep *) @@ -150,50 +85,59 @@ let generic_input_line ~sep ~ic = end done; assert false (* unreacheable statement *) - with End_of_file -> - if !line = "" then - raise End_of_file - else - !line - -let parse_request = - let patch_empty_path s = (if s = "" then "/" else s) in - let pieces_sep = Pcre.regexp " " in - fun ic -> - let request_line = generic_input_line ~sep:crlf ~ic in - match Pcre.split ~rex:pieces_sep request_line with - | [meth; request_uri_raw; http_version] -> - if meth <> "GET" then - raise (Unsupported_method meth); - (match http_version with - | "HTTP/1.0" | "HTTP/1.1" -> () - | _ -> raise (Unsupported_HTTP_version http_version)); - let request_uri = + with Line_completed -> !line + +let patch_empty_path = function "" -> "/" | s -> s +let debug_dump_request path params = + debug_print + (sprintf + "recevied request; path: %s; params: %s" + path + (String.concat ", " (List.map (fun (n, v) -> n ^ "=" ^ v) params))) + +let parse_request_fst_line ic = + let request_line = generic_input_line ~sep:crlf ~ic in + match Pcre.split ~rex:pieces_sep request_line with + | [ meth_raw; uri_raw; http_version_raw ] -> + (try + (method_of_string meth_raw, (* method *) + Http_parser_sanity.url_of_string uri_raw, (* uri *) + version_of_string http_version_raw) (* version *) + with Neturl.Malformed_URL -> raise (Malformed_request_URI uri_raw)) + | _ -> raise (Malformed_request request_line) + +let parse_path uri = patch_empty_path (String.concat "/" (Neturl.url_path uri)) +let parse_query_get_params uri = + try (* act on HTTP encoded URIs *) + split_query_params (Neturl.url_query ~encoded:true uri) + with Not_found -> [] + +let parse_headers ic = + (* consume also trailing "^\r\n$" line *) + let rec parse_headers' headers = + match generic_input_line ~sep:crlf ~ic with + | "" -> List.rev headers + | line -> + (let subs = Pcre.extract ~rex:header_RE line in + let header = try - url_of_string request_uri_syntax request_uri_raw - with Malformed_URL -> - raise (Malformed_request_URI request_uri_raw) - in - let path = - patch_empty_path (String.concat "/" (url_path request_uri)) + subs.(1) + with Invalid_argument "Array.get" -> raise (Invalid_header line) in - let query_params = - try (* act on HTTP encoded URIs *) - split_query_params (url_query ~encoded:true request_uri) - with Not_found -> [] + let value = + try + Http_parser_sanity.normalize_header_value subs.(2) + with Invalid_argument "Array.get" -> "" in - Http_common.debug_print - (sprintf - "recevied request; path: %s; params: %s" - path - (String.concat - ", " - (List.map (fun (n, v) -> n ^ "=" ^ v) query_params))); - (path, query_params) - | _ -> raise (Malformed_request request_line) - -let parse_request' ic = - let (path, params) = parse_request ic in - let clisockaddr = Http_misc.peername_of_in_channel ic in - new Http_request.request ~path ~params ~clisockaddr + Http_parser_sanity.heal_header (header, value); + parse_headers' ((header, value) :: headers)) + in + parse_headers' [] + +let parse_request ic = + let (meth, uri, version) = parse_request_fst_line ic in + let path = parse_path uri in + let query_get_params = parse_query_get_params uri in + debug_dump_request path query_get_params; + (path, query_get_params)