open Printf
+let file_scheme_prefix = "file://"
+
let trailing_dot_gz_RE = Pcre.regexp "\\.gz$" (* for g{,un}zip *)
let url_RE = Pcre.regexp "^([\\w.-]+)(:(\\d+))?(/.*)?$"
let http_scheme_RE = Pcre.regexp ~flags:[`CASELESS] "^http://"
-let file_scheme_RE = Pcre.regexp ~flags:[`CASELESS] "^file://"
+let file_scheme_RE = Pcre.regexp ~flags:[`CASELESS] ("^" ^ file_scheme_prefix)
let dir_sep_RE = Pcre.regexp "/"
let heading_slash_RE = Pcre.regexp "^/"
-let is_local_url s = Pcre.pmatch ~rex:file_scheme_RE s
+let local_url =
+ let rex = Pcre.regexp ("^(" ^ file_scheme_prefix ^ ")(.*)(.gz)$") in
+ fun s ->
+ try
+ Some ((Pcre.extract ~rex s).(2))
+ with Not_found -> None
let bufsiz = 16384 (* for file system I/O *)
let tcp_bufsiz = 4096 (* for TCP I/O *)
failure reason *)
exception Mkdir_failure of string * string
- (** true for URI belonging to the "file://" scheme *)
-val is_local_url: string -> bool
+ (** @return Some localpart for URI belonging to the "file://" scheme, None for
+ * other URIs
+ * removes trailing ".gz", if any
+ * e.g.: local_url "file:///etc/passwd.gz" = Some "/etc/passwd"
+ * local_url "http://...." = None *)
+val local_url: string -> string option
(** "fold_left" like function on file lines, trailing newline is not passed to
the given function *)