open Printf
+let file_scheme_prefix = "file://"
+
let trailing_dot_gz_RE = Pcre.regexp "\\.gz$" (* for g{,un}zip *)
let url_RE = Pcre.regexp "^([\\w.-]+)(:(\\d+))?(/.*)?$"
let http_scheme_RE = Pcre.regexp ~flags:[`CASELESS] "^http://"
-let file_scheme_RE = Pcre.regexp ~flags:[`CASELESS] "^file://"
+let file_scheme_RE = Pcre.regexp ~flags:[`CASELESS] ("^" ^ file_scheme_prefix)
let dir_sep_RE = Pcre.regexp "/"
let heading_slash_RE = Pcre.regexp "^/"
+let local_url =
+ let rex = Pcre.regexp ("^(" ^ file_scheme_prefix ^ ")(.*)(.gz)$") in
+ fun s ->
+ try
+ Some ((Pcre.extract ~rex s).(2))
+ with Not_found -> None
+
let bufsiz = 16384 (* for file system I/O *)
let tcp_bufsiz = 4096 (* for TCP I/O *)
let iter_file f = fold_file (fun line _ -> f line) ()
+let iter_buf_size = 10240
+
+let iter_file_data f fname =
+ let ic = open_in fname in
+ let buf = String.create iter_buf_size in
+ try
+ while true do
+ let bytes = input ic buf 0 iter_buf_size in
+ if bytes = 0 then raise End_of_file;
+ f (String.sub buf 0 bytes)
+ done
+ with End_of_file -> close_in ic
+
let hashtbl_sorted_fold f tbl init =
let sorted_keys =
List.sort compare (Hashtbl.fold (fun key _ keys -> key::keys) tbl [])
begin
try
let ic = Gzip.open_in_chan zic in
- Http_getter_logger.log (sprintf "LUCA: OK" );
let oc = open_out output in
let buf = String.create bufsiz in
(try