From 52fdcda3e0083391fa04a064f3e07279d975d5ba Mon Sep 17 00:00:00 2001 From: Stefano Zacchiroli Date: Tue, 10 May 2005 10:59:13 +0000 Subject: [PATCH] - changes defaults of getxml (format gzipped, don't patch dtd) - big bug fix which avoid spurious \n at end of gzipped file --- helm/ocaml/getter/Makefile | 3 -- helm/ocaml/getter/http_getter.ml | 12 +++-- helm/ocaml/getter/http_getter.mli | 5 +- helm/ocaml/getter/http_getter_cache.ml | 24 +++++---- helm/ocaml/getter/http_getter_common.ml | 68 +++++++++++++----------- helm/ocaml/getter/http_getter_common.mli | 6 ++- helm/ocaml/getter/http_getter_misc.ml | 13 +++++ helm/ocaml/getter/http_getter_misc.mli | 4 ++ 8 files changed, 83 insertions(+), 52 deletions(-) diff --git a/helm/ocaml/getter/Makefile b/helm/ocaml/getter/Makefile index 6827e2bb4..820b1628e 100644 --- a/helm/ocaml/getter/Makefile +++ b/helm/ocaml/getter/Makefile @@ -24,6 +24,3 @@ IMPLEMENTATION_FILES = \ include ../Makefile.common -test: getter.cma test.ml - $(OCAMLC) -linkpkg -o $@ $^ - diff --git a/helm/ocaml/getter/http_getter.ml b/helm/ocaml/getter/http_getter.ml index 4f80006c6..e0a1a4658 100644 --- a/helm/ocaml/getter/http_getter.ml +++ b/helm/ocaml/getter/http_getter.ml @@ -291,11 +291,13 @@ let update_remote logger () = logger `BR let getxml_remote ~format ~patch_dtd uri = - ClientHTTP.get_and_save_to_tmp - (sprintf "%sgetxml?uri=%s&format=%s&patch_dtd=%s" + let uri = + sprintf "%sgetxml?uri=%s&format=%s&patch_dtd=%s" (getter_url ()) uri - (match format with `Normal -> "normal" | `Gzipped -> "gzipped") - (match patch_dtd with true -> "yes" | false -> "no")) + (match format with `Normal -> "normal" | `Gzipped -> "gz") + (match patch_dtd with true -> "yes" | false -> "no") + in + ClientHTTP.get_and_save_to_tmp uri (* API *) @@ -335,7 +337,7 @@ let update ?(logger = fun _ -> ()) () = else update_from_all_servers logger () -let getxml ?(format = `Normal) ?(patch_dtd = true) uri = +let getxml ?(format = `Gzipped) ?(patch_dtd = false) uri = if remote () then getxml_remote ~format ~patch_dtd uri else begin diff --git a/helm/ocaml/getter/http_getter.mli b/helm/ocaml/getter/http_getter.mli index b58c3dcf4..0b86e730d 100644 --- a/helm/ocaml/getter/http_getter.mli +++ b/helm/ocaml/getter/http_getter.mli @@ -49,6 +49,9 @@ val register: uri:string -> url:string -> unit val unregister: string -> unit val update: ?logger:logger_callback -> unit -> unit + + (** @param format defaults to `Gzipped + * @param patch_dtd defaults to false *) val getxml : ?format:encoding -> ?patch_dtd:bool -> string -> string val getxslt : ?patch_dtd:bool -> string -> string val getdtd : ?patch_dtd:bool -> string -> string @@ -64,7 +67,7 @@ val ls: string -> ls_item list (** {2 Shorthands} *) -val getxml' : UriManager.uri -> string +val getxml' : UriManager.uri -> string (* `Gzipped format, no DTD patch *) val resolve' : UriManager.uri -> string val register' : UriManager.uri -> string -> unit val unregister' : UriManager.uri -> unit diff --git a/helm/ocaml/getter/http_getter_cache.ml b/helm/ocaml/getter/http_getter_cache.ml index 62a009c0f..e6627fa1e 100644 --- a/helm/ocaml/getter/http_getter_cache.ml +++ b/helm/ocaml/getter/http_getter_cache.ml @@ -110,9 +110,9 @@ let respond_xml None in if patch then - Http_getter_common.patch_xml ?xmlbases ~via_http () + Some (Http_getter_common.patch_xml ?xmlbases ~via_http ()) else - (fun x -> x) + None in let basename = Pcre.replace ~pat:"\\.gz$" downloadname in let contype = "text/xml" in @@ -182,13 +182,14 @@ let respond_xml | `Normal -> Http_getter_logger.log ~level:2 "No format mangling required (encoding = normal)"; - return_file ~via_http ~fname:basename ~contype ~patch_fun outchan + return_file ~via_http ~fname:basename ~contype ?patch_fun ~enc + outchan | `Gzipped -> Http_getter_logger.log ~level:2 "No format mangling required (encoding = gzipped)"; return_file ~via_http ~fname:(basename ^ ".gz") ~contype ~contenc:"x-gzip" - ~patch_fun ~gunzip:true + ?patch_fun ~gunzip:true ~enc outchan) | `Normal, `Gzipped | `Gzipped, `Normal -> (match tmp_short_circuit with @@ -202,27 +203,27 @@ let respond_xml (* required format normal, cached (or local) version gzipped *) gunzip (* gunzip to tmp *) ~output:tmp ~keep:true (basename ^ ".gz"); - return_file ~via_http ~fname:tmp ~contype ~patch_fun outchan; + return_file ~via_http ~fname:tmp ~contype ?patch_fun ~enc outchan | `Gzipped -> (* required format is gzipped, cached version is normal *) gzip ~output:tmp ~keep:true basename; (* gzip to tmp *) return_file ~via_http ~fname:tmp ~contype ~contenc:"x-gzip" - ~patch_fun ~gunzip:true + ?patch_fun ~gunzip:true ~enc outchan) )) | Some (fname, `Normal) -> (* short circuit available, use it! *) Http_getter_logger.log ~level:2 "Using short circuit (encoding = normal)"; finally (fun () -> Sys.remove fname) (lazy ( - return_file ~via_http ~fname ~contype ~patch_fun outchan + return_file ~via_http ~fname ~contype ?patch_fun ~enc outchan )) | Some (fname, `Gzipped) -> (* short circuit available, use it! *) Http_getter_logger.log ~level:2 "Using short circuit (encoding = gzipped)"; finally (fun () -> Sys.remove fname) (lazy ( - return_file ~via_http ~fname ~contype ~contenc:"x-gzip" ~patch_fun - ~gunzip:true outchan + return_file ~via_http ~fname ~contype ~contenc:"x-gzip" ?patch_fun + ~gunzip:true ~enc outchan ))) )) @@ -236,7 +237,7 @@ let respond_xsl let fname = tempfile () in finally (fun () -> Sys.remove fname) (lazy ( wget ~output:fname url; - return_file ~via_http ~fname ~contype:"text/xml" ~patch_fun outchan + return_file ~via_http ~fname ~contype:"text/xml" ~patch_fun ~enc outchan )) (* TODO enc is not yet supported *) @@ -248,7 +249,8 @@ let respond_dtd in if Sys.file_exists url then (* TODO check this: old getter here used text/xml *) - return_file ~via_http ~fname:url ~contype:"text/plain" ~patch_fun outchan + return_file ~via_http ~fname:url ~contype:"text/plain" ~patch_fun ~enc + outchan else raise (Dtd_not_found url) diff --git a/helm/ocaml/getter/http_getter_common.ml b/helm/ocaml/getter/http_getter_common.ml index 15810b93c..c85f680c2 100644 --- a/helm/ocaml/getter/http_getter_common.ml +++ b/helm/ocaml/getter/http_getter_common.ml @@ -112,9 +112,9 @@ let patch_xml ?via_http ?xmlbases () = (patch_doctype ?via_http () (patch_entity ?via_http () line)) let return_file - ~fname ?contype ?contenc - ?(patch_fun = fun x -> x) ?(gunzip = false) ?(via_http = true) outchan - = + ~fname ?contype ?contenc ?patch_fun ?(gunzip = false) ?(via_http = true) + ~enc outchan += let headers = match (contype, contenc) with | (Some t, Some e) -> ["Content-Encoding", e; "Content-Type", t] @@ -127,32 +127,40 @@ let return_file Http_daemon.send_headers headers outchan; Http_daemon.send_CRLF outchan end; - if gunzip then begin (* gunzip needed, uncompress file, apply patch_fun to - it, compress the result and sent it to client *) - let (tmp1, tmp2) = - (Http_getter_misc.tempfile (), Http_getter_misc.tempfile ()) - in - try - Http_getter_misc.gunzip ~keep:true ~output:tmp1 fname;(* gunzip to tmp1 *) - let new_file = open_out tmp2 in - Http_getter_misc.iter_file (* tmp2 = patch(tmp1) *) - (fun line -> - output_string new_file (patch_fun line ^ "\n"); - flush outchan) - tmp1; - close_out new_file; - Http_getter_misc.gzip ~output:tmp1 tmp2; (* tmp1 = gzip(tmp2); rm tmp2 *) - Http_getter_misc.iter_file (* send tmp1 to client as is*) - (fun line -> output_string outchan (line ^ "\n"); flush outchan) - tmp1; - Sys.remove tmp1 (* rm tmp1 *) - with e -> - Sys.remove tmp1; - raise e - end else begin (* no need to gunzip, apply patch_fun directly to file *) - Http_getter_misc.iter_file - (fun line -> output_string outchan (patch_fun line ^ "\n"); flush outchan) - fname; - end + match gunzip, patch_fun with + | true, Some patch_fun -> + Http_getter_logger.log ~level:2 + "Patch required, uncompress/compress cycle needed :-("; + (* gunzip needed, uncompress file, apply patch_fun to it, compress the + * result and sent it to client *) + let (tmp1, tmp2) = + (Http_getter_misc.tempfile (), Http_getter_misc.tempfile ()) + in + (try + Http_getter_misc.gunzip ~keep:true ~output:tmp1 fname; (* gunzip tmp1 *) + let new_file = open_out tmp2 in + Http_getter_misc.iter_file (* tmp2 = patch(tmp1) *) + (fun line -> + output_string new_file (patch_fun line ^ "\n"); + flush outchan) + tmp1; + close_out new_file; + Http_getter_misc.gzip ~output:tmp1 tmp2;(* tmp1 = gzip(tmp2); rm tmp2 *) + Http_getter_misc.iter_file (* send tmp1 to client as is*) + (fun line -> output_string outchan (line ^ "\n"); flush outchan) + tmp1; + Sys.remove tmp1 (* rm tmp1 *) + with e -> + Sys.remove tmp1; + raise e) + | false, Some patch_fun -> + (match enc with + | `Normal -> + Http_getter_misc.iter_file + (fun line -> output_string outchan (patch_fun (line ^ "\n"))) + fname + | `Gzipped -> assert false) + (* dangerous case, if this happens it needs to be investigated *) + | _, None -> Http_getter_misc.iter_file_data (output_string outchan) fname ;; diff --git a/helm/ocaml/getter/http_getter_common.mli b/helm/ocaml/getter/http_getter_common.mli index 0aec9bc18..e94e39314 100644 --- a/helm/ocaml/getter/http_getter_common.mli +++ b/helm/ocaml/getter/http_getter_common.mli @@ -52,8 +52,9 @@ val patch_xsl : ?via_http:bool -> unit -> string -> string @param contenc Content-Enconding header value @param patch_fun function used to patch file contents @param gunzip is meaningful only if a patch function is provided. If gunzip - is true patch_fun is applied to the uncompressed version of the file. The file - is then compressed again and send to client + is true and patch_fun is given (i.e. is not None), then patch_fun is applied + to the uncompressed version of the file. The file is then compressed again and + send to client @param via_http (default: true) if true http specific communications are used (e.g. headers, crlf before body) and sent via outchan, otherwise they're not. Set it to false when saving to a local file @@ -62,6 +63,7 @@ val return_file: fname:string -> ?contype:string -> ?contenc:string -> ?patch_fun:(string -> string) -> ?gunzip:bool -> ?via_http:bool -> + enc:encoding -> out_channel -> unit diff --git a/helm/ocaml/getter/http_getter_misc.ml b/helm/ocaml/getter/http_getter_misc.ml index b25a42580..c1f21512e 100644 --- a/helm/ocaml/getter/http_getter_misc.ml +++ b/helm/ocaml/getter/http_getter_misc.ml @@ -61,6 +61,19 @@ let fold_file f init fname = let iter_file f = fold_file (fun line _ -> f line) () +let iter_buf_size = 10240 + +let iter_file_data f fname = + let ic = open_in fname in + let buf = String.create iter_buf_size in + try + while true do + let bytes = input ic buf 0 iter_buf_size in + if bytes = 0 then raise End_of_file; + f (String.sub buf 0 bytes) + done + with End_of_file -> close_in ic + let hashtbl_sorted_fold f tbl init = let sorted_keys = List.sort compare (Hashtbl.fold (fun key _ keys -> key::keys) tbl []) diff --git a/helm/ocaml/getter/http_getter_misc.mli b/helm/ocaml/getter/http_getter_misc.mli index 39b40e3b5..bc2f72a31 100644 --- a/helm/ocaml/getter/http_getter_misc.mli +++ b/helm/ocaml/getter/http_getter_misc.mli @@ -40,10 +40,14 @@ val local_url: string -> string option (** "fold_left" like function on file lines, trailing newline is not passed to the given function *) val fold_file : (string -> 'a -> 'a) -> 'a -> string -> 'a + (* "iter" like function on file lines, trailing newline is not passed to the given function *) val iter_file : (string -> unit) -> string -> unit + (* "iter" like function on file data chunks of fixed size *) +val iter_file_data: (string -> unit) -> string -> unit + (** like Hashtbl.fold but keys are processed ordered *) val hashtbl_sorted_fold : ('a -> 'b -> 'c -> 'c) -> ('a, 'b) Hashtbl.t -> 'c -> 'c -- 2.39.2