open Printf
+let file_scheme_prefix = "file://"
+
let trailing_dot_gz_RE = Pcre.regexp "\\.gz$" (* for g{,un}zip *)
let url_RE = Pcre.regexp "^([\\w.-]+)(:(\\d+))?(/.*)?$"
let http_scheme_RE = Pcre.regexp ~flags:[`CASELESS] "^http://"
-let file_scheme_RE = Pcre.regexp ~flags:[`CASELESS] "^file://"
+let file_scheme_RE = Pcre.regexp ~flags:[`CASELESS] ("^" ^ file_scheme_prefix)
let dir_sep_RE = Pcre.regexp "/"
let heading_slash_RE = Pcre.regexp "^/"
+let local_url =
+ let rex = Pcre.regexp ("^(" ^ file_scheme_prefix ^ ")(.*)(.gz)$") in
+ fun s ->
+ try
+ Some ((Pcre.extract ~rex s).(2))
+ with Not_found -> None
+
let bufsiz = 16384 (* for file system I/O *)
let tcp_bufsiz = 4096 (* for TCP I/O *)
let iter_file f = fold_file (fun line _ -> f line) ()
+let iter_buf_size = 10240
+
+let iter_file_data f fname =
+ let ic = open_in fname in
+ let buf = String.create iter_buf_size in
+ try
+ while true do
+ let bytes = input ic buf 0 iter_buf_size in
+ if bytes = 0 then raise End_of_file;
+ f (String.sub buf 0 bytes)
+ done
+ with End_of_file -> close_in ic
+
let hashtbl_sorted_fold f tbl init =
let sorted_keys =
List.sort compare (Hashtbl.fold (fun key _ keys -> key::keys) tbl [])
let sorted_keys =
List.sort compare (Hashtbl.fold (fun key _ keys -> key::keys) tbl [])
in
- List.iter (fun k -> f k (Hashtbl.find tbl k)) sorted_keys
+ List.iter (fun k -> f k (Hashtbl.find tbl k)) sorted_keys
let cp src dst =
- let (ic, oc) = (open_in src, open_out dst) in
- let buf = String.create bufsiz in
- (try
- while true do
- let bytes = input ic buf 0 bufsiz in
- if bytes = 0 then raise End_of_file else output oc buf 0 bytes
- done
- with End_of_file -> ());
- close_in ic; close_out oc
+ try
+ let ic = open_in src in
+ try
+ let oc = open_out dst in
+ let buf = String.create bufsiz in
+ (try
+ while true do
+ let bytes = input ic buf 0 bufsiz in
+ if bytes = 0 then raise End_of_file else output oc buf 0 bytes
+ done
+ with
+ End_of_file -> ()
+ );
+ close_in ic; close_out oc
+ with
+ Sys_error s ->
+ Http_getter_logger.log s;
+ close_in ic
+ | e ->
+ Http_getter_logger.log (Printexc.to_string e);
+ close_in ic;
+ raise e
+ with
+ Sys_error s ->
+ Http_getter_logger.log s
+ | e ->
+ Http_getter_logger.log (Printexc.to_string e);
+ raise e
let wget ?output url =
Http_getter_logger.log
begin
try
let ic = Gzip.open_in_chan zic in
- Http_getter_logger.log (sprintf "LUCA: OK" );
let oc = open_out output in
let buf = String.create bufsiz in
(try
if bytes = 0 then raise End_of_file else Pervasives.output oc buf 0 bytes
done
with End_of_file -> ());
- close_out oc
+ close_out oc;
+ Gzip.close_in ic
with
e -> close_in zic ; raise e
end ;
- close_in zic ;
if not keep then Sys.remove fname
;;