X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2FDEVEL%2Focaml-http%2Fhttp_parser.ml;h=af371bb1ebd613c0bab0db53d3b68fd75dceafba;hb=4167cea65ca58897d1a3dbb81ff95de5074700cc;hp=433754c5c8938c7bee073c981ce6313d4327d576;hpb=24a81ae6fd39880b77916453134857e51145220f;p=helm.git diff --git a/helm/DEVEL/ocaml-http/http_parser.ml b/helm/DEVEL/ocaml-http/http_parser.ml index 433754c5c..af371bb1e 100644 --- a/helm/DEVEL/ocaml-http/http_parser.ml +++ b/helm/DEVEL/ocaml-http/http_parser.ml @@ -2,150 +2,58 @@ (* OCaml HTTP - do it yourself (fully OCaml) HTTP daemon - Copyright (C) <2002> Stefano Zacchiroli + Copyright (C) <2002-2005> Stefano Zacchiroli This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. + it under the terms of the GNU Library General Public License as + published by the Free Software Foundation, version 2. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + GNU Library General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA *) -(* TODO some useless function here *) -(* TODO remove is_http* from mli? *) - -open Neturl;; open Printf;; +open Http_common;; open Http_types;; open Http_constants;; -(* -type url_syntax_option = - Url_part_not_recognized - | Url_part_allowed - | Url_part_required - -* (1) scheme://user:password@host:port/path;params?query#fragment -*) - -let request_uri_syntax = { - url_enable_scheme = Url_part_not_recognized; - url_enable_user = Url_part_not_recognized; - url_enable_password = Url_part_not_recognized; - url_enable_host = Url_part_not_recognized; - url_enable_port = Url_part_not_recognized; - url_enable_path = Url_part_required; - url_enable_param = Url_part_not_recognized; - url_enable_query = Url_part_allowed; - url_enable_fragment = Url_part_not_recognized; - url_enable_other = Url_part_not_recognized; - url_accepts_8bits = false; - url_is_valid = (fun _ -> true); -} - -module CharSet = Set.Make (Char) - - (** create an "is in" predicate over a character set using an efficient, - set-based implementation *) -let mk_char_predicate chars = - let charset = - List.fold_left (fun oldset c -> CharSet.add c oldset) CharSet.empty chars - in - fun c -> CharSet.mem c charset - -let is_http_separator = - mk_char_predicate - [ '('; ')'; '<'; '>'; '@'; ','; ';'; ':'; '\\'; '"'; '/'; '['; ']'; '?'; - '='; '{'; '}'; ' '; '\t' ] - -let is_http_ctl c = - match Char.code c with - | c when (((c >= 0) && (c <= 31)) || (c = 127)) -> true - | _ -> false - - (* internal: used to implement is_* functions *) -exception Invalid_char;; - -let is_http_token s = - try - String.iter - (fun c -> - if (is_http_ctl c) || (is_http_separator) c then raise Invalid_char) - s; - true - with Invalid_char -> false - -let rec is_http_lws s = - (match s.[0] with - | ' ' | '\t' -> true - | '\r' -> - (try - (s.[1] = '\n') && ((s.[2] = ' ') || (s.[2] = '\t')) - with Invalid_argument "String.get" -> false) - | _ -> false) - -let is_http_field_name = is_http_token - -let is_http_field_value s = - let rec strip_quoted_string = function - | [] -> (false, []) - | '"' :: tl -> (true, tl) - | '\\' :: '"' :: tl -> strip_quoted_string tl - | hd :: tl -> strip_quoted_string tl - in - let rec is_http_field_value' = function - | '\r' :: '\n' :: sp :: rest when (sp = ' ' || sp = '\t') -> (* strip LWS *) - is_http_field_value' rest - | c :: rest when (is_http_ctl c && c <> '\t') -> (* \t is in CTL /\ SEP *) - false (* CTL aren't allowed *) - | '"' :: rest -> - let (valid, rest) = strip_quoted_string rest in - if not valid then false else is_http_field_value' rest - | c :: rest -> is_http_field_value' rest - | [] -> true - in is_http_field_value' (Http_misc.string_explode s) - -let heal_header (name, value) = - if not (is_http_field_name name && is_http_field_value value) then - raise (Invalid_header (name ^ ": " ^ value)) - else - () - - (** given an HTTP like query string (e.g. "name1=value1&name2=value2&...") - @return a list of pairs [("name1", "value1"); ("name2", "value2")] - @raise Malformed_query if the string isn't a valid query string - @raise Malformed_query_part if some piece of the query isn't valid - *) -let split_query_params = - let (bindings_sep, binding_sep) = (Pcre.regexp "&", Pcre.regexp "=") in - let http_decode url = Netencoding.Url.decode ~plus:false url in - fun ~query -> - let bindings = Pcre.split ~rex:bindings_sep query in - if List.length bindings < 1 then - raise (Malformed_query query); - List.map - (fun binding -> - match Pcre.split ~rex:binding_sep binding with - | [""; b] -> (* '=b' *) raise (Malformed_query_part (binding, query)) - | [a; b] -> (* 'a=b' *) (http_decode a, http_decode b) - | [a] -> (* 'a=' || 'a' *) (http_decode a, "") - | _ -> raise (Malformed_query_part (binding, query))) - bindings +let (bindings_sep, binding_sep, pieces_sep, header_sep) = + (Pcre.regexp "&", Pcre.regexp "=", Pcre.regexp " ", Pcre.regexp ":") +let header_RE = Pcre.regexp "([^:]*):(.*)" + +let url_decode url = Netencoding.Url.decode ~plus:true url + +let split_query_params query = + let bindings = Pcre.split ~rex:bindings_sep query in + match bindings with + | [] -> raise (Malformed_query query) + | bindings -> + List.map + (fun binding -> + match Pcre.split ~rex:binding_sep binding with + | [ ""; b ] -> (* '=b' *) + raise (Malformed_query_part (binding, query)) + | [ a; b ] -> (* 'a=b' *) (url_decode a, url_decode b) + | [ a ] -> (* 'a=' || 'a' *) (url_decode a, "") + | _ -> raise (Malformed_query_part (binding, query))) + bindings + + (** internal, used by generic_input_line *) +exception Line_completed;; (** given an input channel and a separator @return a line read from it (like Pervasives.input_line) line is returned only after reading a separator string; separator string isn't included in the returned value - FIXME what about efficiency?, input is performed char-by-char + TODO what about efficiency?, input is performed char-by-char *) let generic_input_line ~sep ~ic = let sep_len = String.length sep in @@ -157,7 +65,7 @@ let generic_input_line ~sep ~ic = try while true do if !sep_pointer >= String.length sep then (* line completed *) - raise End_of_file + raise Line_completed else begin (* incomplete line: need to read more *) let ch = input_char ic in if ch = String.get sep !sep_pointer then (* next piece of sep *) @@ -172,50 +80,81 @@ let generic_input_line ~sep ~ic = end done; assert false (* unreacheable statement *) - with End_of_file -> - if !line = "" then - raise End_of_file - else - !line - - (** given an input channel, reads from it a GET HTTP request and - @return a pair where path is a string representing the - requested path and query_params is a list of pairs (the GET - parameters) - *) -let parse_request = - let patch_empty_path s = (if s = "" then "/" else s) in - let pieces_sep = Pcre.regexp " " in - fun ic -> - let request_line = generic_input_line ~sep:crlf ~ic in - match Pcre.split ~rex:pieces_sep request_line with - | [meth; request_uri_raw; http_version] -> - if meth <> "GET" then - raise (Unsupported_method meth); - (match http_version with - | "HTTP/1.0" | "HTTP/1.1" -> () - | _ -> raise (Unsupported_HTTP_version http_version)); - let request_uri = + with Line_completed -> !line + +let patch_empty_path = function "" -> "/" | s -> s +let debug_dump_request path params = + debug_print + (sprintf + "recevied request; path: %s; params: %s" + path + (String.concat ", " (List.map (fun (n, v) -> n ^ "=" ^ v) params))) + +let parse_request_fst_line ic = + let request_line = generic_input_line ~sep:crlf ~ic in + debug_print (sprintf "HTTP request line (not yet parsed): %s" request_line); + try + (match Pcre.split ~rex:pieces_sep request_line with + | [ meth_raw; uri_raw ] -> (* ancient HTTP request line *) + (method_of_string meth_raw, (* method *) + Http_parser_sanity.url_of_string uri_raw, (* uri *) + None) (* no version given *) + | [ meth_raw; uri_raw; http_version_raw ] -> (* HTTP 1.{0,1} *) + (method_of_string meth_raw, (* method *) + Http_parser_sanity.url_of_string uri_raw, (* uri *) + Some (version_of_string http_version_raw)) (* version *) + | _ -> raise (Malformed_request request_line)) + with Malformed_URL url -> raise (Malformed_request_URI url) + +let parse_response_fst_line ic = + let response_line = generic_input_line ~sep:crlf ~ic in + debug_print (sprintf "HTTP response line (not yet parsed): %s" response_line); + try + (match Pcre.split ~rex:pieces_sep response_line with + | version_raw :: code_raw :: _ -> + (version_of_string version_raw, (* method *) + status_of_code (int_of_string code_raw)) (* status *) + | _ -> raise (Malformed_response response_line)) + with + | Malformed_URL _ | Invalid_code _ | Failure "int_of_string" -> + raise (Malformed_response response_line) + +let parse_path uri = patch_empty_path (String.concat "/" (Neturl.url_path uri)) +let parse_query_get_params uri = + try (* act on HTTP encoded URIs *) + split_query_params (Neturl.url_query ~encoded:true uri) + with Not_found -> [] + +let parse_headers ic = + (* consume also trailing "^\r\n$" line *) + let rec parse_headers' headers = + match generic_input_line ~sep:crlf ~ic with + | "" -> List.rev headers + | line -> + (let subs = try - url_of_string request_uri_syntax request_uri_raw - with Malformed_URL -> - raise (Malformed_request_URI request_uri_raw) + Pcre.extract ~rex:header_RE line + with Not_found -> raise (Invalid_header line) in - let path = - patch_empty_path (String.concat "/" (url_path request_uri)) + let header = + try + subs.(1) + with Invalid_argument "Array.get" -> raise (Invalid_header line) in - let query_params = - try (* act on HTTP encoded URIs *) - split_query_params (url_query ~encoded:true request_uri) - with Not_found -> [] + let value = + try + Http_parser_sanity.normalize_header_value subs.(2) + with Invalid_argument "Array.get" -> "" in - Http_common.debug_print - (sprintf - "recevied request; path: %s; params: %s" - path - (String.concat - ", " - (List.map (fun (n, v) -> n ^ "=" ^ v) query_params))); - (path, query_params) - | _ -> raise (Malformed_request request_line) + Http_parser_sanity.heal_header (header, value); + parse_headers' ((header, value) :: headers)) + in + parse_headers' [] + +let parse_request ic = + let (meth, uri, version) = parse_request_fst_line ic in + let path = parse_path uri in + let query_get_params = parse_query_get_params uri in + debug_dump_request path query_get_params; + (path, query_get_params)