open Printf let http_scheme_RE = Pcre.regexp ~flags:[`CASELESS] "^http://" let url_RE = Pcre.regexp "^([\\w.-]+)(:(\\d+))?(/.*)?$" let tcp_bufsiz = 4096 (* for TCP I/O *) let parse_url url = try let subs = Pcre.extract ~rex:url_RE (Pcre.replace ~rex:http_scheme_RE url) in (subs.(1), (if subs.(2) = "" then 80 else int_of_string subs.(3)), (if subs.(4) = "" then "/" else subs.(4))) with exc -> failwith (sprintf "Can't parse url: %s (exception: %s)" url (Printexc.to_string exc)) let init_socket addr port = let inet_addr = (Unix.gethostbyname addr).Unix.h_addr_list.(0) in let sockaddr = Unix.ADDR_INET (inet_addr, port) in let suck = Unix.socket Unix.PF_INET Unix.SOCK_STREAM 0 in Unix.connect suck sockaddr; let outchan = Unix.out_channel_of_descr suck in let inchan = Unix.in_channel_of_descr suck in (inchan, outchan) let http_get_iter_buf ~callback url = let (address, port, path) = parse_url url in let buf = String.create tcp_bufsiz in let (inchan, outchan) = init_socket address port in output_string outchan (sprintf "GET %s HTTP/1.0\r\n\r\n" path); flush outchan; (try while true do match input inchan buf 0 tcp_bufsiz with | 0 -> raise End_of_file | bytes when bytes = tcp_bufsiz -> (* buffer full, no need to slice it *) callback buf | bytes when bytes < tcp_bufsiz -> (* buffer not full, slice it *) callback (String.sub buf 0 bytes) | _ -> (* ( bytes < 0 ) || ( bytes > tcp_bufsiz ) *) assert false done with End_of_file -> ()); close_in inchan (* close also outchan, same fd *) let http_get url = let buf = Buffer.create 10240 in http_get_iter_buf ~callback:(Buffer.add_string buf) url; Buffer.contents buf