include ../Makefile.common
-test: getter.cma test.ml
- $(OCAMLC) -linkpkg -o $@ $^
-
logger `BR
let getxml_remote ~format ~patch_dtd uri =
- ClientHTTP.get_and_save_to_tmp
- (sprintf "%sgetxml?uri=%s&format=%s&patch_dtd=%s"
+ let uri =
+ sprintf "%sgetxml?uri=%s&format=%s&patch_dtd=%s"
(getter_url ()) uri
- (match format with `Normal -> "normal" | `Gzipped -> "gzipped")
- (match patch_dtd with true -> "yes" | false -> "no"))
+ (match format with `Normal -> "normal" | `Gzipped -> "gz")
+ (match patch_dtd with true -> "yes" | false -> "no")
+ in
+ ClientHTTP.get_and_save_to_tmp uri
(* API *)
else
update_from_all_servers logger ()
-let getxml ?(format = `Normal) ?(patch_dtd = true) uri =
+let getxml ?(format = `Gzipped) ?(patch_dtd = false) uri =
if remote () then
getxml_remote ~format ~patch_dtd uri
else begin
val unregister: string -> unit
val update: ?logger:logger_callback -> unit -> unit
+
+ (** @param format defaults to `Gzipped
+ * @param patch_dtd defaults to false *)
val getxml : ?format:encoding -> ?patch_dtd:bool -> string -> string
val getxslt : ?patch_dtd:bool -> string -> string
val getdtd : ?patch_dtd:bool -> string -> string
(** {2 Shorthands} *)
-val getxml' : UriManager.uri -> string
+val getxml' : UriManager.uri -> string (* `Gzipped format, no DTD patch *)
val resolve' : UriManager.uri -> string
val register' : UriManager.uri -> string -> unit
val unregister' : UriManager.uri -> unit
None
in
if patch then
- Http_getter_common.patch_xml ?xmlbases ~via_http ()
+ Some (Http_getter_common.patch_xml ?xmlbases ~via_http ())
else
- (fun x -> x)
+ None
in
let basename = Pcre.replace ~pat:"\\.gz$" downloadname in
let contype = "text/xml" in
| `Normal ->
Http_getter_logger.log ~level:2
"No format mangling required (encoding = normal)";
- return_file ~via_http ~fname:basename ~contype ~patch_fun outchan
+ return_file ~via_http ~fname:basename ~contype ?patch_fun ~enc
+ outchan
| `Gzipped ->
Http_getter_logger.log ~level:2
"No format mangling required (encoding = gzipped)";
return_file
~via_http ~fname:(basename ^ ".gz") ~contype ~contenc:"x-gzip"
- ~patch_fun ~gunzip:true
+ ?patch_fun ~gunzip:true ~enc
outchan)
| `Normal, `Gzipped | `Gzipped, `Normal ->
(match tmp_short_circuit with
(* required format normal, cached (or local) version gzipped *)
gunzip (* gunzip to tmp *)
~output:tmp ~keep:true (basename ^ ".gz");
- return_file ~via_http ~fname:tmp ~contype ~patch_fun outchan;
+ return_file ~via_http ~fname:tmp ~contype ?patch_fun ~enc outchan
| `Gzipped ->
(* required format is gzipped, cached version is normal *)
gzip ~output:tmp ~keep:true basename; (* gzip to tmp *)
return_file
~via_http ~fname:tmp ~contype ~contenc:"x-gzip"
- ~patch_fun ~gunzip:true
+ ?patch_fun ~gunzip:true ~enc
outchan)
))
| Some (fname, `Normal) -> (* short circuit available, use it! *)
Http_getter_logger.log ~level:2
"Using short circuit (encoding = normal)";
finally (fun () -> Sys.remove fname) (lazy (
- return_file ~via_http ~fname ~contype ~patch_fun outchan
+ return_file ~via_http ~fname ~contype ?patch_fun ~enc outchan
))
| Some (fname, `Gzipped) -> (* short circuit available, use it! *)
Http_getter_logger.log ~level:2
"Using short circuit (encoding = gzipped)";
finally (fun () -> Sys.remove fname) (lazy (
- return_file ~via_http ~fname ~contype ~contenc:"x-gzip" ~patch_fun
- ~gunzip:true outchan
+ return_file ~via_http ~fname ~contype ~contenc:"x-gzip" ?patch_fun
+ ~gunzip:true ~enc outchan
)))
))
let fname = tempfile () in
finally (fun () -> Sys.remove fname) (lazy (
wget ~output:fname url;
- return_file ~via_http ~fname ~contype:"text/xml" ~patch_fun outchan
+ return_file ~via_http ~fname ~contype:"text/xml" ~patch_fun ~enc outchan
))
(* TODO enc is not yet supported *)
in
if Sys.file_exists url then
(* TODO check this: old getter here used text/xml *)
- return_file ~via_http ~fname:url ~contype:"text/plain" ~patch_fun outchan
+ return_file ~via_http ~fname:url ~contype:"text/plain" ~patch_fun ~enc
+ outchan
else
raise (Dtd_not_found url)
(patch_doctype ?via_http () (patch_entity ?via_http () line))
let return_file
- ~fname ?contype ?contenc
- ?(patch_fun = fun x -> x) ?(gunzip = false) ?(via_http = true) outchan
- =
+ ~fname ?contype ?contenc ?patch_fun ?(gunzip = false) ?(via_http = true)
+ ~enc outchan
+=
let headers =
match (contype, contenc) with
| (Some t, Some e) -> ["Content-Encoding", e; "Content-Type", t]
Http_daemon.send_headers headers outchan;
Http_daemon.send_CRLF outchan
end;
- if gunzip then begin (* gunzip needed, uncompress file, apply patch_fun to
- it, compress the result and sent it to client *)
- let (tmp1, tmp2) =
- (Http_getter_misc.tempfile (), Http_getter_misc.tempfile ())
- in
- try
- Http_getter_misc.gunzip ~keep:true ~output:tmp1 fname;(* gunzip to tmp1 *)
- let new_file = open_out tmp2 in
- Http_getter_misc.iter_file (* tmp2 = patch(tmp1) *)
- (fun line ->
- output_string new_file (patch_fun line ^ "\n");
- flush outchan)
- tmp1;
- close_out new_file;
- Http_getter_misc.gzip ~output:tmp1 tmp2; (* tmp1 = gzip(tmp2); rm tmp2 *)
- Http_getter_misc.iter_file (* send tmp1 to client as is*)
- (fun line -> output_string outchan (line ^ "\n"); flush outchan)
- tmp1;
- Sys.remove tmp1 (* rm tmp1 *)
- with e ->
- Sys.remove tmp1;
- raise e
- end else begin (* no need to gunzip, apply patch_fun directly to file *)
- Http_getter_misc.iter_file
- (fun line -> output_string outchan (patch_fun line ^ "\n"); flush outchan)
- fname;
- end
+ match gunzip, patch_fun with
+ | true, Some patch_fun ->
+ Http_getter_logger.log ~level:2
+ "Patch required, uncompress/compress cycle needed :-(";
+ (* gunzip needed, uncompress file, apply patch_fun to it, compress the
+ * result and sent it to client *)
+ let (tmp1, tmp2) =
+ (Http_getter_misc.tempfile (), Http_getter_misc.tempfile ())
+ in
+ (try
+ Http_getter_misc.gunzip ~keep:true ~output:tmp1 fname; (* gunzip tmp1 *)
+ let new_file = open_out tmp2 in
+ Http_getter_misc.iter_file (* tmp2 = patch(tmp1) *)
+ (fun line ->
+ output_string new_file (patch_fun line ^ "\n");
+ flush outchan)
+ tmp1;
+ close_out new_file;
+ Http_getter_misc.gzip ~output:tmp1 tmp2;(* tmp1 = gzip(tmp2); rm tmp2 *)
+ Http_getter_misc.iter_file (* send tmp1 to client as is*)
+ (fun line -> output_string outchan (line ^ "\n"); flush outchan)
+ tmp1;
+ Sys.remove tmp1 (* rm tmp1 *)
+ with e ->
+ Sys.remove tmp1;
+ raise e)
+ | false, Some patch_fun ->
+ (match enc with
+ | `Normal ->
+ Http_getter_misc.iter_file
+ (fun line -> output_string outchan (patch_fun (line ^ "\n")))
+ fname
+ | `Gzipped -> assert false)
+ (* dangerous case, if this happens it needs to be investigated *)
+ | _, None -> Http_getter_misc.iter_file_data (output_string outchan) fname
;;
@param contenc Content-Enconding header value
@param patch_fun function used to patch file contents
@param gunzip is meaningful only if a patch function is provided. If gunzip
- is true patch_fun is applied to the uncompressed version of the file. The file
- is then compressed again and send to client
+ is true and patch_fun is given (i.e. is not None), then patch_fun is applied
+ to the uncompressed version of the file. The file is then compressed again and
+ send to client
@param via_http (default: true) if true http specific communications are used
(e.g. headers, crlf before body) and sent via outchan, otherwise they're not.
Set it to false when saving to a local file
fname:string ->
?contype:string -> ?contenc:string ->
?patch_fun:(string -> string) -> ?gunzip:bool -> ?via_http:bool ->
+ enc:encoding ->
out_channel ->
unit
let iter_file f = fold_file (fun line _ -> f line) ()
+let iter_buf_size = 10240
+
+let iter_file_data f fname =
+ let ic = open_in fname in
+ let buf = String.create iter_buf_size in
+ try
+ while true do
+ let bytes = input ic buf 0 iter_buf_size in
+ if bytes = 0 then raise End_of_file;
+ f (String.sub buf 0 bytes)
+ done
+ with End_of_file -> close_in ic
+
let hashtbl_sorted_fold f tbl init =
let sorted_keys =
List.sort compare (Hashtbl.fold (fun key _ keys -> key::keys) tbl [])
(** "fold_left" like function on file lines, trailing newline is not passed to
the given function *)
val fold_file : (string -> 'a -> 'a) -> 'a -> string -> 'a
+
(* "iter" like function on file lines, trailing newline is not passed to the
given function *)
val iter_file : (string -> unit) -> string -> unit
+ (* "iter" like function on file data chunks of fixed size *)
+val iter_file_data: (string -> unit) -> string -> unit
+
(** like Hashtbl.fold but keys are processed ordered *)
val hashtbl_sorted_fold :
('a -> 'b -> 'c -> 'c) -> ('a, 'b) Hashtbl.t -> 'c -> 'c