open Printf
+let file_scheme_prefix = "file://"
+
let trailing_dot_gz_RE = Pcre.regexp "\\.gz$" (* for g{,un}zip *)
let url_RE = Pcre.regexp "^([\\w.-]+)(:(\\d+))?(/.*)?$"
let http_scheme_RE = Pcre.regexp ~flags:[`CASELESS] "^http://"
-let file_scheme_RE = Pcre.regexp ~flags:[`CASELESS] "^file://"
+let file_scheme_RE = Pcre.regexp ~flags:[`CASELESS] ("^" ^ file_scheme_prefix)
let dir_sep_RE = Pcre.regexp "/"
let heading_slash_RE = Pcre.regexp "^/"
-let is_local_url s = Pcre.pmatch ~rex:file_scheme_RE s
+let local_url =
+ let rex = Pcre.regexp ("^(" ^ file_scheme_prefix ^ ")(.*)(.gz)$") in
+ fun s ->
+ try
+ Some ((Pcre.extract ~rex s).(2))
+ with Not_found -> None
let bufsiz = 16384 (* for file system I/O *)
let tcp_bufsiz = 4096 (* for TCP I/O *)
let iter_file f = fold_file (fun line _ -> f line) ()
+let iter_buf_size = 10240
+
+let iter_file_data f fname =
+ let ic = open_in fname in
+ let buf = String.create iter_buf_size in
+ try
+ while true do
+ let bytes = input ic buf 0 iter_buf_size in
+ if bytes = 0 then raise End_of_file;
+ f (String.sub buf 0 bytes)
+ done
+ with End_of_file -> close_in ic
+
let hashtbl_sorted_fold f tbl init =
let sorted_keys =
List.sort compare (Hashtbl.fold (fun key _ keys -> key::keys) tbl [])
begin
try
let ic = Gzip.open_in_chan zic in
- Http_getter_logger.log (sprintf "LUCA: OK" );
let oc = open_out output in
let buf = String.create bufsiz in
(try
fun line ->
Pcre.pmatch ~rex:blank_line_RE line
+let normalize_dir s = (* append "/" if missing *)
+ let len = String.length s in
+ try
+ if s.[len - 1] = '/' then s
+ else s ^ "/"
+ with Invalid_argument _ -> (* string is empty *) "/"
+
+let strip_trailing_slash s =
+ try
+ let len = String.length s in
+ if s.[len - 1] = '/' then String.sub s 0 (len - 1)
+ else s
+ with Invalid_argument _ -> s
+
+let strip_suffix ~suffix s =
+ try
+ let s_len = String.length s in
+ let suffix_len = String.length suffix in
+ let suffix_sub = String.sub s (s_len - suffix_len) suffix_len in
+ if suffix_sub <> suffix then raise (Invalid_argument "");
+ String.sub s 0 (s_len - suffix_len)
+ with Invalid_argument _ ->
+ raise (Invalid_argument "Http_getter_misc.strip_suffix")
+
+let rec list_uniq = function
+ | [] -> []
+ | h::[] -> [h]
+ | h1::h2::tl when h1 = h2 -> list_uniq (h2 :: tl)
+ | h1::tl (* when h1 <> h2 *) -> h1 :: list_uniq tl
+
+let extension s =
+ try
+ let idx = String.rindex s '.' in
+ String.sub s idx (String.length s - idx)
+ with Not_found -> ""
+
+let temp_file_of_uri uri =
+ let flat_string s s' c =
+ let cs = String.copy s in
+ for i = 0 to (String.length s) - 1 do
+ if String.contains s' s.[i] then cs.[i] <- c
+ done;
+ cs
+ in
+ let user = try Unix.getlogin () with _ -> "" in
+ Filename.open_temp_file (user ^ flat_string uri ".-=:;!?/&" '_') ""
+
+let backtick cmd =
+ let ic = Unix.open_process_in cmd in
+ let res = input_line ic in
+ ignore (Unix.close_process_in ic);
+ res
+