(*
- * Copyright (C) 2003:
+ * Copyright (C) 2003-2004:
* Stefano Zacchiroli <zack@cs.unibo.it>
* for the HELM Team http://helm.cs.unibo.it/
*
*)
(* TODO cache expires control!!! *)
+(* TODO uwobo loop:
+ if two large proof (already in cache) are requested at the same time by two
+ clients, uwobo (java implementation, not yet tested with the OCaml one)
+ starts looping sending output to one of the client *)
open Http_getter_common;;
+open Http_getter_debugger;;
open Http_getter_misc;;
open Http_getter_types;;
open Printf;;
+ (* expose ThreadSafe.threadSafe methods *)
+class threadSafe =
+ object
+ inherit ThreadSafe.threadSafe
+ method virtual doCritical : 'a. 'a lazy_t -> 'a
+ method virtual doReader : 'a. 'a lazy_t -> 'a
+ method virtual doWriter : 'a. 'a lazy_t -> 'a
+ end
+;;
+let threadSafe = new threadSafe ;;
+
let resource_type_of_url = function
| url when Pcre.pmatch ~pat:"\\.xml\\.gz$" url -> Enc_gzipped
| url when Pcre.pmatch ~pat:"\\.xml$" url -> Enc_normal
- | url -> raise (Http_getter_invalid_URL url)
+ | url -> raise (Invalid_URL url)
let extension_of_resource_type = function
| Enc_normal -> "xml"
| Enc_gzipped -> "xml.gz"
let resource_type = resource_type_of_url url in
let extension = extension_of_resource_type resource_type in
let downloadname =
- match http_getter_uri_of_string uri with (* parse uri *)
- | Xml_uri (Cic baseuri) | Xml_uri (Theory baseuri) ->
+ match uri_of_string uri with (* parse uri *)
+ | Cic_uri (Cic baseuri) | Cic_uri (Theory baseuri) ->
+ (* assumption: baseuri starts with "/" *)
+ sprintf "%s%s.%s" Http_getter_env.cic_dir baseuri extension
+ | Nuprl_uri baseuri ->
(* assumption: baseuri starts with "/" *)
- sprintf "%s%s.%s" Http_getter_env.xml_dir baseuri extension
+ sprintf "%s%s.%s" Http_getter_env.nuprl_dir baseuri extension
| Rdf_uri (prefix, ((Cic baseuri) as qbaseuri))
| Rdf_uri (prefix, ((Theory baseuri) as qbaseuri)) ->
let escaped_prefix =
if patch then Http_getter_common.patch_xml else (fun x -> x)
in
let basename = Pcre.replace ~pat:"\\.gz$" downloadname in
- if not (is_in_cache basename) then begin (* download and fill cache *)
- mkdir ~parents:true (Filename.dirname downloadname);
- wget ~output:downloadname url;
- match (resource_type, Http_getter_env.cache_mode) with
- | Enc_normal, Enc_normal ->
- (if enc = Enc_gzipped then gzip ~keep:true downloadname)
- | Enc_gzipped, Enc_gzipped ->
- (if enc = Enc_normal then gunzip ~keep:true downloadname)
- | Enc_normal, Enc_gzipped -> gzip ~keep:(enc = Enc_normal) downloadname
- | Enc_gzipped, Enc_normal -> gunzip ~keep:(enc = Enc_gzipped) downloadname
- end else begin (* resource already in cache *)
+ let contype = "text/xml" in
+ (* Fill cache if needed and return a short circuit file.
+ Short circuit is needed in situations like:
+ resource type = normal, cache type = gzipped, required encoding = normal
+ we would like to avoid normal -> gzipped -> normal conversions. To avoid
+ this tmp_short_circuit is used to remember the name of the intermediate
+ file name *)
+ let fill_cache () =
+ threadSafe#doWriter (lazy(
+ if not (is_in_cache basename) then begin (* cache MISS *)
+ debug_print "Cache MISS :-(";
+ mkdir ~parents:true (Filename.dirname downloadname);
+ match (resource_type, Http_getter_env.cache_mode) with
+ | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped ->
+ wget ~output:downloadname url;
+ None
+ | Enc_normal, Enc_gzipped -> (* resource normal, cache gzipped *)
+ let tmp = tempfile () in
+ wget ~output:tmp url;
+ gzip ~output:(basename ^ ".gz") ~keep:true tmp; (* fill cache *)
+ if enc = Enc_normal then (* user wants normal: don't delete it! *)
+ Some (tmp, enc)
+ else begin
+ Sys.remove tmp;
+ None
+ end
+ | Enc_gzipped, Enc_normal -> (* resource gzipped, cache normal *)
+ let tmp = tempfile () in
+ wget ~output:tmp url;
+ gunzip ~output:basename ~keep:true tmp; (* fill cache *)
+ if enc = Enc_gzipped then (* user wants gzipped: don't delete it! *)
+ Some (tmp, enc)
+ else begin
+ Sys.remove tmp;
+ None
+ end
+ end else begin
+ debug_print "Cache HIT :-)";
+ None
+ end
+ )) in
+ let tmp_short_circuit = fill_cache () in
+ threadSafe#doReader (lazy(
+ assert (is_in_cache basename);
match (enc, Http_getter_env.cache_mode) with
- | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped -> ()
- | Enc_normal, Enc_gzipped -> gunzip ~keep:true (basename ^ ".gz")
- | Enc_gzipped, Enc_normal -> gzip ~keep:true basename
- end; (* now resource is in cache *)
- (* invariant: file to be sent back to client is available on disk in the
- format the client likes *)
- (match enc with (* send file to client *)
- | Enc_normal ->
- return_file ~fname:basename ~contype:"text/xml" ~patch_fun outchan
- | Enc_gzipped ->
- return_file
- ~fname:(basename ^ ".gz") ~contype:"text/xml" ~contenc:"x-gzip"
- ~patch_fun outchan);
- match (enc, Http_getter_env.cache_mode) with (* remove temp files *)
- | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped -> ()
- | Enc_normal, Enc_gzipped -> Sys.remove basename
- | Enc_gzipped, Enc_normal -> Sys.remove (basename ^ ".gz")
+ | Enc_normal, Enc_normal | Enc_gzipped, Enc_gzipped ->
+ (* resource in cache is already in the required format *)
+ (match enc with
+ | Enc_normal ->
+ debug_print "No format mangling required (encoding = normal)";
+ return_file ~fname:basename ~contype ~patch_fun outchan
+ | Enc_gzipped ->
+ debug_print "No format mangling required (encoding = gzipped)";
+ return_file
+ ~fname:(basename ^ ".gz") ~contype ~contenc:"x-gzip"
+ ~patch_fun ~gunzip:true
+ outchan)
+ | Enc_normal, Enc_gzipped | Enc_gzipped, Enc_normal ->
+ (match tmp_short_circuit with
+ | None -> (* no short circuit possible, use cache *)
+ debug_print "No short circuit available, use cache";
+ let tmp = tempfile () in
+ (match enc with
+ | Enc_normal ->
+ (* required format is normal, cached version is gzipped *)
+ gunzip (* gunzip to tmp *)
+ ~output:tmp ~keep:true (basename ^ ".gz");
+ return_file ~fname:tmp ~contype ~patch_fun outchan;
+ | Enc_gzipped ->
+ (* required format is gzipped, cached version is normal *)
+ gzip ~output:tmp ~keep:true basename; (* gzip to tmp *)
+ return_file
+ ~fname:tmp ~contype ~contenc:"x-gzip"
+ ~patch_fun ~gunzip:true
+ outchan);
+ Sys.remove tmp
+ | Some (fname, Enc_normal) -> (* short circuit available, use it! *)
+ debug_print "Using short circuit (encoding = normal)";
+ return_file ~fname ~contype ~patch_fun outchan;
+ Sys.remove fname
+ | Some (fname, Enc_gzipped) -> (* short circuit available, use it! *)
+ debug_print "Using short circuit (encoding = gzipped)";
+ return_file
+ ~fname ~contype ~contenc:"x-gzip" ~patch_fun ~gunzip:true outchan;
+ Sys.remove fname)
+ ))
+;;
(* TODO enc is not yet supported *)
let respond_xsl ?(enc = Enc_normal) ?(patch = true) ~url outchan =
wget ~output:fname url;
return_file ~fname ~contype:"text/xml" ~patch_fun outchan;
Sys.remove fname
+;;
(* TODO enc is not yet supported *)
let respond_dtd ?(enc = Enc_normal) ?(patch = true) ~url outchan =
return_file ~fname:url ~contype:"text/plain" ~patch_fun outchan
else
return_html_error ("Can't find DTD: " ^ url) outchan
+;;
+let clean () =
+ let module E = Http_getter_env in
+ List.iter
+ (function dir -> ignore (Unix.system ("rm -rf " ^ dir ^ "/*"))
+ ) [E.cic_dir ; E.nuprl_dir ; E.rdf_dir ]
+;;