(* OCaml HTTP - do it yourself (fully OCaml) HTTP daemon Copyright (C) <2002> Stefano Zacchiroli This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *) open Neturl;; open Printf;; open Http_types;; open Http_constants;; (* type url_syntax_option = Url_part_not_recognized | Url_part_allowed | Url_part_required * (1) scheme://user:password@host:port/path;params?query#fragment *) let request_uri_syntax = { url_enable_scheme = Url_part_not_recognized; url_enable_user = Url_part_not_recognized; url_enable_password = Url_part_not_recognized; url_enable_host = Url_part_not_recognized; url_enable_port = Url_part_not_recognized; url_enable_path = Url_part_required; url_enable_param = Url_part_not_recognized; url_enable_query = Url_part_allowed; url_enable_fragment = Url_part_not_recognized; url_enable_other = Url_part_not_recognized; url_accepts_8bits = false; url_is_valid = (fun _ -> true); } (* convention: foo_RE_raw is the uncompiled regexp matching foo foo_RE is the compiled regexp matching foo is_foo is the predicate over string matching foo *) let separators_RE_raw = "()<>@,;:\\\\\"/\\[\\]?={} \t" let ctls_RE_raw = "\\x00-\\x1F\\x7F" let token_RE_raw = "[^" ^ separators_RE_raw ^ ctls_RE_raw ^ "]+" let lws_RE_raw = "(\r\n)?[ \t]" let quoted_string_RE_raw = "\"(([^\"])|(\\\\\"))*\"" let text_RE_raw = "(([^" ^ ctls_RE_raw ^ "])|(" ^ lws_RE_raw ^ "))+" let field_content_RE_raw = sprintf "^(((%s)|(%s)|(%s))|(%s))*$" token_RE_raw separators_RE_raw quoted_string_RE_raw text_RE_raw (* (* following RFC 2616 specifications *) let field_value_RE_raw = "((" ^ field_content_RE_raw ^ ")|(" ^ lws_RE_raw^ "))*" *) (* smarter implementation: TEXT production is included in the regexp below *) let field_value_RE_raw = sprintf "^((%s)|(%s)|(%s)|(%s))*$" token_RE_raw separators_RE_raw quoted_string_RE_raw lws_RE_raw let token_RE = Pcre.regexp ("^" ^ token_RE_raw ^ "$") let field_value_RE = Pcre.regexp ("^" ^ field_value_RE_raw ^ "$") let is_token s = Pcre.pmatch ~rex:token_RE s let is_field_name = is_token let is_field_value s = Pcre.pmatch ~rex:field_value_RE s let heal_header_name s = if not (is_field_name s) then raise (Invalid_header_name s) else () let heal_header_value s = if not (is_field_value s) then raise (Invalid_header_value s) else () let heal_header (name, value) = heal_header_name name; heal_header_value name (** given an HTTP like query string (e.g. "name1=value1&name2=value2&...") @return a list of pairs [("name1", "value1"); ("name2", "value2")] @raise Malformed_query if the string isn't a valid query string @raise Malformed_query_part if some piece of the query isn't valid *) let split_query_params = let (bindings_sep, binding_sep) = (Pcre.regexp "&", Pcre.regexp "=") in let http_decode url = Netencoding.Url.decode ~plus:false url in fun ~query -> let bindings = Pcre.split ~rex:bindings_sep query in if List.length bindings < 1 then raise (Malformed_query query); List.map (fun binding -> match Pcre.split ~rex:binding_sep binding with | [""; b] -> (* '=b' *) raise (Malformed_query_part (binding, query)) | [a; b] -> (* 'a=b' *) (http_decode a, http_decode b) | [a] -> (* 'a=' || 'a' *) (http_decode a, "") | _ -> raise (Malformed_query_part (binding, query))) bindings (** given an input channel and a separator @return a line read from it (like Pervasives.input_line) line is returned only after reading a separator string; separator string isn't included in the returned value TODO what about efficiency?, input is performed char-by-char *) let generic_input_line ~sep ~ic = let sep_len = String.length sep in if sep_len < 1 then failwith ("Separator '" ^ sep ^ "' is too short!") else (* valid separator *) let line = ref "" in let sep_pointer = ref 0 in try while true do if !sep_pointer >= String.length sep then (* line completed *) raise End_of_file else begin (* incomplete line: need to read more *) let ch = input_char ic in if ch = String.get sep !sep_pointer then (* next piece of sep *) incr sep_pointer else begin (* useful char *) for i = 0 to !sep_pointer - 1 do line := !line ^ (String.make 1 (String.get sep i)) done; sep_pointer := 0; line := !line ^ (String.make 1 ch) end end done; assert false (* unreacheable statement *) with End_of_file -> if !line = "" then raise End_of_file else !line let parse_request = let patch_empty_path s = (if s = "" then "/" else s) in let pieces_sep = Pcre.regexp " " in fun ic -> let request_line = generic_input_line ~sep:crlf ~ic in match Pcre.split ~rex:pieces_sep request_line with | [meth; request_uri_raw; http_version] -> if meth <> "GET" then raise (Unsupported_method meth); (match http_version with | "HTTP/1.0" | "HTTP/1.1" -> () | _ -> raise (Unsupported_HTTP_version http_version)); let request_uri = try url_of_string request_uri_syntax request_uri_raw with Malformed_URL -> raise (Malformed_request_URI request_uri_raw) in let path = patch_empty_path (String.concat "/" (url_path request_uri)) in let query_params = try (* act on HTTP encoded URIs *) split_query_params (url_query ~encoded:true request_uri) with Not_found -> [] in Http_common.debug_print (sprintf "recevied request; path: %s; params: %s" path (String.concat ", " (List.map (fun (n, v) -> n ^ "=" ^ v) query_params))); (path, query_params) | _ -> raise (Malformed_request request_line) let parse_request' ic = let (path, params) = parse_request ic in new Http_request.request ~path ~params