(* OCaml HTTP - do it yourself (fully OCaml) HTTP daemon Copyright (C) <2002> Stefano Zacchiroli This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *) (* TODO some useless function here *) (* TODO remove is_http* from mli? *) open Neturl;; open Printf;; open Http_types;; open Http_constants;; (* type url_syntax_option = Url_part_not_recognized | Url_part_allowed | Url_part_required * (1) scheme://user:password@host:port/path;params?query#fragment *) let request_uri_syntax = { url_enable_scheme = Url_part_not_recognized; url_enable_user = Url_part_not_recognized; url_enable_password = Url_part_not_recognized; url_enable_host = Url_part_not_recognized; url_enable_port = Url_part_not_recognized; url_enable_path = Url_part_required; url_enable_param = Url_part_not_recognized; url_enable_query = Url_part_allowed; url_enable_fragment = Url_part_not_recognized; url_enable_other = Url_part_not_recognized; url_accepts_8bits = false; url_is_valid = (fun _ -> true); } module CharSet = Set.Make (Char) (** create an "is in" predicate over a character set using an efficient, set-based implementation *) let mk_char_predicate chars = let charset = List.fold_left (fun oldset c -> CharSet.add c oldset) CharSet.empty chars in fun c -> CharSet.mem c charset let is_http_separator = mk_char_predicate [ '('; ')'; '<'; '>'; '@'; ','; ';'; ':'; '\\'; '"'; '/'; '['; ']'; '?'; '='; '{'; '}'; ' '; '\t' ] let is_http_ctl c = match Char.code c with | c when (((c >= 0) && (c <= 31)) || (c = 127)) -> true | _ -> false (* internal: used to implement is_* functions *) exception Invalid_char;; let is_http_token s = try String.iter (fun c -> if (is_http_ctl c) || (is_http_separator) c then raise Invalid_char) s; true with Invalid_char -> false let rec is_http_lws s = (match s.[0] with | ' ' | '\t' -> true | '\r' -> (try (s.[1] = '\n') && ((s.[2] = ' ') || (s.[2] = '\t')) with Invalid_argument "String.get" -> false) | _ -> false) let is_http_field_name = is_http_token let is_http_field_value s = let rec strip_quoted_string = function | [] -> (false, []) | '"' :: tl -> (true, tl) | '\\' :: '"' :: tl -> strip_quoted_string tl | hd :: tl -> strip_quoted_string tl in let rec is_http_field_value' = function | '\r' :: '\n' :: sp :: rest when (sp = ' ' || sp = '\t') -> (* strip LWS *) is_http_field_value' rest | c :: rest when (is_http_ctl c && c <> '\t') -> (* \t is in CTL /\ SEP *) false (* CTL aren't allowed *) | '"' :: rest -> let (valid, rest) = strip_quoted_string rest in if not valid then false else is_http_field_value' rest | c :: rest -> is_http_field_value' rest | [] -> true in is_http_field_value' (Http_misc.string_explode s) let heal_header (name, value) = if not (is_http_field_name name && is_http_field_value value) then raise (Invalid_header (name ^ ": " ^ value)) else () (** given an HTTP like query string (e.g. "name1=value1&name2=value2&...") @return a list of pairs [("name1", "value1"); ("name2", "value2")] @raise Malformed_query if the string isn't a valid query string @raise Malformed_query_part if some piece of the query isn't valid *) let split_query_params = let (bindings_sep, binding_sep) = (Pcre.regexp "&", Pcre.regexp "=") in let http_decode url = Netencoding.Url.decode ~plus:false url in fun ~query -> let bindings = Pcre.split ~rex:bindings_sep query in if List.length bindings < 1 then raise (Malformed_query query); List.map (fun binding -> match Pcre.split ~rex:binding_sep binding with | [""; b] -> (* '=b' *) raise (Malformed_query_part (binding, query)) | [a; b] -> (* 'a=b' *) (http_decode a, http_decode b) | [a] -> (* 'a=' || 'a' *) (http_decode a, "") | _ -> raise (Malformed_query_part (binding, query))) bindings (** given an input channel and a separator @return a line read from it (like Pervasives.input_line) line is returned only after reading a separator string; separator string isn't included in the returned value FIXME what about efficiency?, input is performed char-by-char *) let generic_input_line ~sep ~ic = let sep_len = String.length sep in if sep_len < 1 then failwith ("Separator '" ^ sep ^ "' is too short!") else (* valid separator *) let line = ref "" in let sep_pointer = ref 0 in try while true do if !sep_pointer >= String.length sep then (* line completed *) raise End_of_file else begin (* incomplete line: need to read more *) let ch = input_char ic in if ch = String.get sep !sep_pointer then (* next piece of sep *) incr sep_pointer else begin (* useful char *) for i = 0 to !sep_pointer - 1 do line := !line ^ (String.make 1 (String.get sep i)) done; sep_pointer := 0; line := !line ^ (String.make 1 ch) end end done; assert false (* unreacheable statement *) with End_of_file -> if !line = "" then raise End_of_file else !line (** given an input channel, reads from it a GET HTTP request and @return a pair where path is a string representing the requested path and query_params is a list of pairs (the GET parameters) *) let parse_request = let patch_empty_path s = (if s = "" then "/" else s) in let pieces_sep = Pcre.regexp " " in fun ic -> let request_line = generic_input_line ~sep:crlf ~ic in match Pcre.split ~rex:pieces_sep request_line with | [meth; request_uri_raw; http_version] -> if meth <> "GET" then raise (Unsupported_method meth); (match http_version with | "HTTP/1.0" | "HTTP/1.1" -> () | _ -> raise (Unsupported_HTTP_version http_version)); let request_uri = try url_of_string request_uri_syntax request_uri_raw with Malformed_URL -> raise (Malformed_request_URI request_uri_raw) in let path = patch_empty_path (String.concat "/" (url_path request_uri)) in let query_params = try (* act on HTTP encoded URIs *) split_query_params (url_query ~encoded:true request_uri) with Not_found -> [] in Http_common.debug_print (sprintf "recevied request; path: %s; params: %s" path (String.concat ", " (List.map (fun (n, v) -> n ^ "=" ^ v) query_params))); (path, query_params) | _ -> raise (Malformed_request request_line)