clients, uwobo (java implementation, not yet tested with the OCaml one)
starts looping sending output to one of the client *)
-open Http_getter_common;;
-open Http_getter_debugger;;
-open Http_getter_misc;;
-open Http_getter_types;;
-open Printf;;
+open Http_getter_common
+open Http_getter_misc
+open Http_getter_types
+open Printf
(* expose ThreadSafe.threadSafe methods *)
class threadSafe =
method virtual doReader : 'a. 'a lazy_t -> 'a
method virtual doWriter : 'a. 'a lazy_t -> 'a
end
-;;
-let threadSafe = new threadSafe ;;
+
+let threadSafe = new threadSafe
+
+let finally cleanup f =
+ try
+ let res = Lazy.force f in
+ cleanup ();
+ res
+ with e ->
+ cleanup ();
+ raise (Http_getter_types.Cache_failure (Printexc.to_string e))
let resource_type_of_url = function
- | url when Pcre.pmatch ~pat:"\\.xml\\.gz$" url -> Enc_gzipped
- | url when Pcre.pmatch ~pat:"\\.xml$" url -> Enc_normal
+ | url when Pcre.pmatch ~pat:"\\.xml\\.gz$" url -> `Gzipped
+ | url when Pcre.pmatch ~pat:"\\.xml$" url -> `Normal
| url -> raise (Invalid_URL url)
+
let extension_of_resource_type = function
- | Enc_normal -> "xml"
- | Enc_gzipped -> "xml.gz"
+ | `Normal -> "xml"
+ | `Gzipped -> "xml.gz"
(* basename = resource name without trailing ".gz", if any *)
let is_in_cache basename =
Sys.file_exists
(match Lazy.force Http_getter_env.cache_mode with
- | Enc_normal -> basename
- | Enc_gzipped -> basename ^ ".gz")
+ | `Normal -> basename
+ | `Gzipped -> basename ^ ".gz")
-let respond_xml ?(enc = Enc_normal) ?(patch = true) ~url ~uri outchan =
+let respond_xml
+ ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url ~uri outchan
+ =
let resource_type = resource_type_of_url url in
let extension = extension_of_resource_type resource_type in
let downloadname =
sprintf "%s%s.%s" (Lazy.force Http_getter_env.cic_dir) baseuri extension
| Nuprl_uri baseuri ->
(* assumption: baseuri starts with "/" *)
- sprintf "%s%s.%s" (Lazy.force Http_getter_env.nuprl_dir) baseuri extension
+ sprintf "%s%s.%s" (Lazy.force Http_getter_env.nuprl_dir) baseuri
+ extension
| Rdf_uri (prefix, ((Cic baseuri) as qbaseuri))
| Rdf_uri (prefix, ((Theory baseuri) as qbaseuri)) ->
let escaped_prefix =
(Lazy.force Http_getter_env.rdf_dir) escaped_prefix baseuri extension
in
let patch_fun =
- if patch then Http_getter_common.patch_xml else (fun x -> x)
+ let xmlbases =
+ if Http_getter_common.is_theory_uri uri then
+ Some (Filename.dirname uri, Filename.dirname url)
+ else
+ None
+ in
+ if patch then
+ Http_getter_common.patch_xml ?xmlbases ~via_http ()
+ else
+ (fun x -> x)
in
let basename = Pcre.replace ~pat:"\\.gz$" downloadname in
let contype = "text/xml" in
let fill_cache () =
threadSafe#doWriter (lazy(
if not (is_in_cache basename) then begin (* cache MISS *)
- debug_print "Cache MISS :-(";
+ Http_getter_logger.log ~level:2 "Cache MISS :-(";
mkdir ~parents:true (Filename.dirname downloadname);
match (resource_type, Lazy.force Http_getter_env.cache_mode) with
- | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped ->
+ | `Normal, `Normal | `Gzipped, `Gzipped ->
wget ~output:downloadname url;
None
- | Enc_normal, Enc_gzipped -> (* resource normal, cache gzipped *)
+ | `Normal, `Gzipped -> (* resource normal, cache gzipped *)
let tmp = tempfile () in
- wget ~output:tmp url;
- gzip ~output:(basename ^ ".gz") ~keep:true tmp; (* fill cache *)
- if enc = Enc_normal then (* user wants normal: don't delete it! *)
- Some (tmp, enc)
- else begin
- Sys.remove tmp;
- None
- end
- | Enc_gzipped, Enc_normal -> (* resource gzipped, cache normal *)
+ let (res, cleanup) =
+ if enc = `Normal then (* user wants normal: don't delete it! *)
+ (Some (tmp, enc), (fun () -> ()))
+ else
+ (None, (fun () -> Sys.remove tmp))
+ in
+ finally cleanup (lazy (
+ wget ~output:tmp url;
+ gzip ~output:(basename ^ ".gz") ~keep:true tmp; (* fill cache *)
+ res
+ ));
+ | `Gzipped, `Normal -> (* resource gzipped, cache normal *)
let tmp = tempfile () in
- wget ~output:tmp url;
- gunzip ~output:basename ~keep:true tmp; (* fill cache *)
- if enc = Enc_gzipped then (* user wants gzipped: don't delete it! *)
- Some (tmp, enc)
- else begin
- Sys.remove tmp;
- None
- end
+ let (res, cleanup) =
+ if enc = `Gzipped then (* user wants .gz: don't delete it! *)
+ (Some (tmp, enc), (fun () -> ()))
+ else
+ (None, (fun () -> Sys.remove tmp))
+ in
+ finally cleanup (lazy (
+ wget ~output:tmp url;
+ gunzip ~output:basename ~keep:true tmp; (* fill cache *)
+ res
+ ));
end else begin
- debug_print "Cache HIT :-)";
+ Http_getter_logger.log ~level:2 "Cache HIT :-)";
None
end
)) in
threadSafe#doReader (lazy(
assert (is_in_cache basename);
match (enc, Lazy.force Http_getter_env.cache_mode) with
- | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped ->
+ | `Normal, `Normal | `Gzipped, `Gzipped ->
(* resource in cache is already in the required format *)
(match enc with
- | Enc_normal ->
- debug_print "No format mangling required (encoding = normal)";
- return_file ~fname:basename ~contype ~patch_fun outchan
- | Enc_gzipped ->
- debug_print "No format mangling required (encoding = gzipped)";
+ | `Normal ->
+ Http_getter_logger.log ~level:2
+ "No format mangling required (encoding = normal)";
+ return_file ~via_http ~fname:basename ~contype ~patch_fun outchan
+ | `Gzipped ->
+ Http_getter_logger.log ~level:2
+ "No format mangling required (encoding = gzipped)";
return_file
- ~fname:(basename ^ ".gz") ~contype ~contenc:"x-gzip"
+ ~via_http ~fname:(basename ^ ".gz") ~contype ~contenc:"x-gzip"
~patch_fun ~gunzip:true
outchan)
- | Enc_normal, Enc_gzipped | Enc_gzipped, Enc_normal ->
+ | `Normal, `Gzipped | `Gzipped, `Normal ->
(match tmp_short_circuit with
| None -> (* no short circuit possible, use cache *)
- debug_print "No short circuit available, use cache";
+ Http_getter_logger.log ~level:2
+ "No short circuit available, use cache";
let tmp = tempfile () in
- (match enc with
- | Enc_normal ->
- (* required format is normal, cached version is gzipped *)
- gunzip (* gunzip to tmp *)
- ~output:tmp ~keep:true (basename ^ ".gz");
- return_file ~fname:tmp ~contype ~patch_fun outchan;
- | Enc_gzipped ->
- (* required format is gzipped, cached version is normal *)
- gzip ~output:tmp ~keep:true basename; (* gzip to tmp *)
- return_file
- ~fname:tmp ~contype ~contenc:"x-gzip"
- ~patch_fun ~gunzip:true
- outchan);
- Sys.remove tmp
- | Some (fname, Enc_normal) -> (* short circuit available, use it! *)
- debug_print "Using short circuit (encoding = normal)";
- return_file ~fname ~contype ~patch_fun outchan;
- Sys.remove fname
- | Some (fname, Enc_gzipped) -> (* short circuit available, use it! *)
- debug_print "Using short circuit (encoding = gzipped)";
- return_file
- ~fname ~contype ~contenc:"x-gzip" ~patch_fun ~gunzip:true outchan;
- Sys.remove fname)
+ finally (fun () -> Sys.remove tmp) (lazy (
+ (match enc with
+ | `Normal ->
+ (* required format is normal, cached version is gzipped *)
+ gunzip (* gunzip to tmp *)
+ ~output:tmp ~keep:true (basename ^ ".gz");
+ return_file ~via_http ~fname:tmp ~contype ~patch_fun outchan;
+ | `Gzipped ->
+ (* required format is gzipped, cached version is normal *)
+ gzip ~output:tmp ~keep:true basename; (* gzip to tmp *)
+ return_file
+ ~via_http ~fname:tmp ~contype ~contenc:"x-gzip"
+ ~patch_fun ~gunzip:true
+ outchan)
+ ))
+ | Some (fname, `Normal) -> (* short circuit available, use it! *)
+ Http_getter_logger.log ~level:2
+ "Using short circuit (encoding = normal)";
+ finally (fun () -> Sys.remove fname) (lazy (
+ return_file ~via_http ~fname ~contype ~patch_fun outchan
+ ))
+ | Some (fname, `Gzipped) -> (* short circuit available, use it! *)
+ Http_getter_logger.log ~level:2
+ "Using short circuit (encoding = gzipped)";
+ finally (fun () -> Sys.remove fname) (lazy (
+ return_file ~via_http ~fname ~contype ~contenc:"x-gzip" ~patch_fun
+ ~gunzip:true outchan
+ )))
))
-;;
(* TODO enc is not yet supported *)
-let respond_xsl ?(enc = Enc_normal) ?(patch = true) ~url outchan =
+let respond_xsl
+ ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url outchan
+ =
let patch_fun =
- if patch then Http_getter_common.patch_xsl else (fun x -> x)
+ if patch then Http_getter_common.patch_xsl ~via_http () else (fun x -> x)
in
let fname = tempfile () in
- wget ~output:fname url;
- return_file ~fname ~contype:"text/xml" ~patch_fun outchan;
- Sys.remove fname
-;;
+ finally (fun () -> Sys.remove fname) (lazy (
+ wget ~output:fname url;
+ return_file ~via_http ~fname ~contype:"text/xml" ~patch_fun outchan
+ ))
(* TODO enc is not yet supported *)
-let respond_dtd ?(enc = Enc_normal) ?(patch = true) ~url outchan =
+let respond_dtd
+ ?(via_http = true) ?(enc = `Normal) ?(patch = true) ~url outchan
+ =
let patch_fun =
- if patch then Http_getter_common.patch_dtd else (fun x -> x)
+ if patch then Http_getter_common.patch_dtd ~via_http () else (fun x -> x)
in
if Sys.file_exists url then
(* TODO check this: old getter here used text/xml *)
- return_file ~fname:url ~contype:"text/plain" ~patch_fun outchan
+ return_file ~via_http ~fname:url ~contype:"text/plain" ~patch_fun outchan
else
- return_html_error ("Can't find DTD: " ^ url) outchan
-;;
+ raise (Dtd_not_found url)
let clean () =
let module E = Http_getter_env in
List.iter
(function dir -> ignore (Unix.system ("rm -rf " ^ dir ^ "/*")))
[ Lazy.force E.cic_dir; Lazy.force E.nuprl_dir; Lazy.force E.rdf_dir ]
-;;
+