3 OCaml HTTP - do it yourself (fully OCaml) HTTP daemon
5 Copyright (C) <2002> Stefano Zacchiroli <zack@cs.unibo.it>
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 type url_syntax_option =
30 Url_part_not_recognized
34 * (1) scheme://user:password@host:port/path;params?query#fragment
37 let request_uri_syntax = {
38 url_enable_scheme = Url_part_not_recognized;
39 url_enable_user = Url_part_not_recognized;
40 url_enable_password = Url_part_not_recognized;
41 url_enable_host = Url_part_not_recognized;
42 url_enable_port = Url_part_not_recognized;
43 url_enable_path = Url_part_required;
44 url_enable_param = Url_part_not_recognized;
45 url_enable_query = Url_part_allowed;
46 url_enable_fragment = Url_part_not_recognized;
47 url_enable_other = Url_part_not_recognized;
48 url_accepts_8bits = false;
49 url_is_valid = (fun _ -> true);
53 foo_RE_raw is the uncompiled regexp matching foo
54 foo_RE is the compiled regexp matching foo
55 is_foo is the predicate over string matching foo
58 let separators_RE_raw = "()<>@,;:\\\\\"/\\[\\]?={} \t"
59 let ctls_RE_raw = "\\x00-\\x1F\\x7F"
60 let token_RE_raw = "[^" ^ separators_RE_raw ^ ctls_RE_raw ^ "]+"
61 let lws_RE_raw = "(\r\n)?[ \t]"
62 let quoted_string_RE_raw = "\"(([^\"])|(\\\\\"))*\""
63 let text_RE_raw = "(([^" ^ ctls_RE_raw ^ "])|(" ^ lws_RE_raw ^ "))+"
64 let field_content_RE_raw =
66 "^(((%s)|(%s)|(%s))|(%s))*$"
71 let field_value_RE_raw = "((" ^ field_content_RE_raw ^ ")|(" ^ lws_RE_raw^ "))*"
73 let token_RE = Pcre.regexp ("^" ^ token_RE_raw ^ "$")
74 let field_value_RE = Pcre.regexp ("^" ^ field_value_RE_raw ^ "$")
76 let is_token s = Pcre.pmatch ~rex:token_RE s
77 let is_field_name = is_token
78 let is_field_value s = Pcre.pmatch ~rex:field_value_RE s
80 let heal_header (name, value) =
81 if not (is_field_name name && is_field_value value) then
82 raise (Invalid_header (name ^ ": " ^ value))
86 (** given an HTTP like query string (e.g. "name1=value1&name2=value2&...")
87 @return a list of pairs [("name1", "value1"); ("name2", "value2")]
88 @raise Malformed_query if the string isn't a valid query string
89 @raise Malformed_query_part if some piece of the query isn't valid
91 let split_query_params =
92 let (bindings_sep, binding_sep) = (Pcre.regexp "&", Pcre.regexp "=") in
93 let http_decode url = Netencoding.Url.decode ~plus:false url in
95 let bindings = Pcre.split ~rex:bindings_sep query in
96 if List.length bindings < 1 then
97 raise (Malformed_query query);
100 match Pcre.split ~rex:binding_sep binding with
101 | [""; b] -> (* '=b' *) raise (Malformed_query_part (binding, query))
102 | [a; b] -> (* 'a=b' *) (http_decode a, http_decode b)
103 | [a] -> (* 'a=' || 'a' *) (http_decode a, "")
104 | _ -> raise (Malformed_query_part (binding, query)))
107 (** given an input channel and a separator
108 @return a line read from it (like Pervasives.input_line)
109 line is returned only after reading a separator string; separator string isn't
110 included in the returned value
111 TODO what about efficiency?, input is performed char-by-char
113 let generic_input_line ~sep ~ic =
114 let sep_len = String.length sep in
116 failwith ("Separator '" ^ sep ^ "' is too short!")
117 else (* valid separator *)
119 let sep_pointer = ref 0 in
122 if !sep_pointer >= String.length sep then (* line completed *)
124 else begin (* incomplete line: need to read more *)
125 let ch = input_char ic in
126 if ch = String.get sep !sep_pointer then (* next piece of sep *)
128 else begin (* useful char *)
129 for i = 0 to !sep_pointer - 1 do
130 line := !line ^ (String.make 1 (String.get sep i))
133 line := !line ^ (String.make 1 ch)
137 assert false (* unreacheable statement *)
144 (** given an input channel, reads from it a GET HTTP request and
145 @return a pair <path, query_params> where path is a string representing the
146 requested path and query_params is a list of pairs <name, value> (the GET
150 let patch_empty_path s = (if s = "" then "/" else s) in
151 let pieces_sep = Pcre.regexp " " in
153 let request_line = generic_input_line ~sep:crlf ~ic in
154 match Pcre.split ~rex:pieces_sep request_line with
155 | [meth; request_uri_raw; http_version] ->
156 if meth <> "GET" then
157 raise (Unsupported_method meth);
158 (match http_version with
159 | "HTTP/1.0" | "HTTP/1.1" -> ()
160 | _ -> raise (Unsupported_HTTP_version http_version));
163 url_of_string request_uri_syntax request_uri_raw
164 with Malformed_URL ->
165 raise (Malformed_request_URI request_uri_raw)
168 patch_empty_path (String.concat "/" (url_path request_uri))
171 try (* act on HTTP encoded URIs *)
172 split_query_params (url_query ~encoded:true request_uri)
175 Http_common.debug_print
177 "recevied request; path: %s; params: %s"
181 (List.map (fun (n, v) -> n ^ "=" ^ v) query_params)));
183 | _ -> raise (Malformed_request request_line)