(*
- * Copyright (C) 2003:
+ * Copyright (C) 2003-2004:
* Stefano Zacchiroli <zack@cs.unibo.it>
* for the HELM Team http://helm.cs.unibo.it/
*
* http://helm.cs.unibo.it/
*)
-open Http_getter_debugger;;
-open Printf;;
+open Printf
+
+open Http_getter_debugger
let trailing_dot_gz_RE = Pcre.regexp "\\.gz$" (* for g{,un}zip *)
-let url_RE = Pcre.regexp "^([\\w.]+)(:(\\d+))?(/.*)?$"
+let url_RE = Pcre.regexp "^([\\w.-]+)(:(\\d+))?(/.*)?$"
let http_scheme_RE = Pcre.regexp ~flags:[`CASELESS] "^http://"
let file_scheme_RE = Pcre.regexp ~flags:[`CASELESS] "^file://"
let dir_sep_RE = Pcre.regexp "/"
let tcp_bufsiz = 4096 (* for TCP I/O *)
let fold_file f init fname =
- let inchan = open_in fname in
- let rec fold_lines' value =
- try
- let line = input_line inchan in
- fold_lines' (f value line)
- with End_of_file -> value
+ let ic = open_in fname in
+ let rec aux acc =
+ let line = try Some (input_line ic) with End_of_file -> None in
+ match line with
+ | None -> acc
+ | Some line -> aux (f line acc)
in
- let res = (try fold_lines' init with e -> (close_in inchan; raise e)) in
- close_in inchan;
+ let res = try aux init with e -> close_in ic; raise e in
+ close_in ic;
res
-let iter_file f = fold_file (fun _ line -> f line) ()
+
+let iter_file f = fold_file (fun line _ -> f line) ()
let hashtbl_sorted_fold f tbl init =
let sorted_keys =
in
List.fold_left (fun acc k -> f k (Hashtbl.find tbl k) acc) init sorted_keys
+let hashtbl_sorted_iter f tbl =
+ let sorted_keys =
+ List.sort compare (Hashtbl.fold (fun key _ keys -> key::keys) tbl [])
+ in
+ List.iter (fun k -> f k (Hashtbl.find tbl k)) sorted_keys
+
let cp src dst =
let (ic, oc) = (open_in src, open_out dst) in
let buf = String.create bufsiz in
let (address, port, path) = parse_url url in
let buf = String.create tcp_bufsiz in
let (inchan, outchan) = init_socket address port in
- output_string outchan (sprintf "GET %s HTTP/1.0\r\n\r\n" path);
+ output_string outchan (sprintf "GET %s\r\n" path);
flush outchan;
(try
while true do
close_in inchan (* close also outchan, same fd *)
let wget ?output url =
+ debug_print
+ (sprintf "wgetting %s (output: %s)" url
+ (match output with None -> "default" | Some f -> f));
match url with
| url when Pcre.pmatch ~rex:file_scheme_RE url -> (* file:// *)
(let src_fname = Pcre.replace ~rex:file_scheme_RE url in
| scheme -> (* unsupported scheme *)
failwith ("Http_getter_misc.wget: unsupported scheme: " ^ scheme)
-let gzip ?(keep = false) fname =
- debug_print (sprintf "gzipping %s (keep: %b)" fname keep);
- let (ic, oc) = (open_in fname, Gzip.open_out (fname ^ ".gz")) in
+let gzip ?(keep = false) ?output fname =
+ let output = match output with None -> fname ^ ".gz" | Some fname -> fname in
+ debug_print (sprintf "gzipping %s (keep: %b, output: %s)" fname keep output);
+ let (ic, oc) = (open_in fname, Gzip.open_out output) in
let buf = String.create bufsiz in
(try
while true do
with End_of_file -> ());
close_in ic; Gzip.close_out oc;
if not keep then Sys.remove fname
-
-let gunzip ?(keep = false) fname =
- debug_print (sprintf "gunzipping %s (keep: %b)" fname keep);
- let basename = Pcre.replace ~rex:trailing_dot_gz_RE fname in
- assert (basename <> fname);
- let (ic, oc) = (Gzip.open_in fname, open_out basename) in
+;;
+
+let gunzip ?(keep = false) ?output fname =
+ (* assumption: given file name ends with ".gz" or output is set *)
+ let output =
+ match output with
+ | None ->
+ if (Pcre.pmatch ~rex:trailing_dot_gz_RE fname) then
+ Pcre.replace ~rex:trailing_dot_gz_RE fname
+ else
+ failwith
+ "Http_getter_misc.gunzip: unable to determine output file name"
+ | Some fname -> fname
+ in
+ debug_print (sprintf "gunzipping %s (keep: %b, output: %s)"
+ fname keep output);
+ let (ic, oc) = (Gzip.open_in fname, open_out output) in
let buf = String.create bufsiz in
(try
while true do
let bytes = Gzip.input ic buf 0 bufsiz in
- if bytes = 0 then raise End_of_file else output oc buf 0 bytes
+ if bytes = 0 then raise End_of_file else Pervasives.output oc buf 0 bytes
done
with End_of_file -> ());
Gzip.close_in ic; close_out oc;
if not keep then Sys.remove fname
+;;
let tempfile () = Filename.temp_file "http_getter_" ""
close_in ic;
Some buf
with Unix.Unix_error (Unix.ENOENT, "stat", _) -> None
- end else (* other URL, pass it to netclient *)
+ end else (* other URL, pass it to Http_client *)
try
- Some (Http_client.Convenience.http_get url)
- with Http_client.Http_error (code, _) -> None
+ Some (Http_client.http_get url)
+ with e ->
+ prerr_endline (sprintf
+ "Warning: Http_client failed on url %s with exception: %s"
+ url (Printexc.to_string e));
+ None
+
+let is_blank_line =
+ let blank_line_RE = Pcre.regexp "(^#)|(^\\s*$)" in
+ fun line ->
+ Pcre.pmatch ~rex:blank_line_RE line