4 let http_scheme_RE = Pcre.regexp ~flags:[`CASELESS] "^http://"
5 let url_RE = Pcre.regexp "^([\\w.-]+)(:(\\d+))?(/.*)?$"
7 let tcp_bufsiz = 4096 (* for TCP I/O *)
12 Pcre.extract ~rex:url_RE (Pcre.replace ~rex:http_scheme_RE url)
15 (if subs.(2) = "" then 80 else int_of_string subs.(3)),
16 (if subs.(4) = "" then "/" else subs.(4)))
19 (sprintf "Can't parse url: %s (exception: %s)"
20 url (Printexc.to_string exc))
22 let init_socket addr port =
23 let inet_addr = (Unix.gethostbyname addr).Unix.h_addr_list.(0) in
24 let sockaddr = Unix.ADDR_INET (inet_addr, port) in
25 let suck = Unix.socket Unix.PF_INET Unix.SOCK_STREAM 0 in
26 Unix.connect suck sockaddr;
27 let outchan = Unix.out_channel_of_descr suck in
28 let inchan = Unix.in_channel_of_descr suck in
31 let http_get_iter_buf ~callback url =
32 let (address, port, path) = parse_url url in
33 let buf = String.create tcp_bufsiz in
34 let (inchan, outchan) = init_socket address port in
35 output_string outchan (sprintf "GET %s HTTP/1.0\r\n\r\n" path);
39 match input inchan buf 0 tcp_bufsiz with
40 | 0 -> raise End_of_file
41 | bytes when bytes = tcp_bufsiz -> (* buffer full, no need to slice it *)
43 | bytes when bytes < tcp_bufsiz -> (* buffer not full, slice it *)
44 callback (String.sub buf 0 bytes)
45 | _ -> (* ( bytes < 0 ) || ( bytes > tcp_bufsiz ) *)
48 with End_of_file -> ());
49 close_in inchan (* close also outchan, same fd *)
52 let buf = Buffer.create 10240 in
53 http_get_iter_buf ~callback:(Buffer.add_string buf) url;