(* OCaml HTTP - do it yourself (fully OCaml) HTTP daemon Copyright (C) <2002-2004> Stefano Zacchiroli This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *) open Neturl;; open Printf;; open Http_types;; open Http_constants;; (* type url_syntax_option = Url_part_not_recognized | Url_part_allowed | Url_part_required * (1) scheme://user:password@host:port/path;params?query#fragment *) let request_uri_syntax = { url_enable_scheme = Url_part_not_recognized; url_enable_user = Url_part_not_recognized; url_enable_password = Url_part_not_recognized; url_enable_host = Url_part_not_recognized; url_enable_port = Url_part_not_recognized; url_enable_path = Url_part_required; url_enable_param = Url_part_not_recognized; url_enable_query = Url_part_allowed; url_enable_fragment = Url_part_not_recognized; url_enable_other = Url_part_not_recognized; url_accepts_8bits = false; url_is_valid = (fun _ -> true); } (* convention: foo_RE_raw is the uncompiled regexp matching foo foo_RE is the compiled regexp matching foo is_foo is the predicate over string matching foo *) let separators_RE_raw = "()<>@,;:\\\\\"/\\[\\]?={} \t" let ctls_RE_raw = "\\x00-\\x1F\\x7F" let token_RE_raw = "[^" ^ separators_RE_raw ^ ctls_RE_raw ^ "]+" let lws_RE_raw = "(\r\n)?[ \t]" let quoted_string_RE_raw = "\"(([^\"])|(\\\\\"))*\"" let text_RE_raw = "(([^" ^ ctls_RE_raw ^ "])|(" ^ lws_RE_raw ^ "))+" let field_content_RE_raw = sprintf "^(((%s)|(%s)|(%s))|(%s))*$" token_RE_raw separators_RE_raw quoted_string_RE_raw text_RE_raw (* (* following RFC 2616 specifications *) let field_value_RE_raw = "((" ^ field_content_RE_raw ^ ")|(" ^ lws_RE_raw^ "))*" *) (* smarter implementation: TEXT production is included in the regexp below *) let field_value_RE_raw = sprintf "^((%s)|(%s)|(%s)|(%s))*$" token_RE_raw separators_RE_raw quoted_string_RE_raw lws_RE_raw let token_RE = Pcre.regexp ("^" ^ token_RE_raw ^ "$") let field_value_RE = Pcre.regexp ("^" ^ field_value_RE_raw ^ "$") let heading_lws_RE = Pcre.regexp (sprintf "^%s*" lws_RE_raw) let trailing_lws_RE = Pcre.regexp (sprintf "%s*$" lws_RE_raw) let is_token s = Pcre.pmatch ~rex:token_RE s let is_field_name = is_token let is_field_value s = Pcre.pmatch ~rex:field_value_RE s let heal_header_name s = if not (is_field_name s) then raise (Invalid_header_name s) else () let heal_header_value s = if not (is_field_value s) then raise (Invalid_header_value s) else () let normalize_header_value s = Pcre.replace ~rex:trailing_lws_RE (Pcre.replace ~rex:heading_lws_RE s) let heal_header (name, value) = heal_header_name name; heal_header_value name let url_of_string s = try url_of_string request_uri_syntax s with Neturl.Malformed_URL -> raise (Malformed_URL s) let string_of_url = Neturl.string_of_url