X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2Fhttp_getter%2Fhttp_getter.pl.in;h=43c7ff0790ec3750ce8858c6a54e972071a269c9;hb=32ca7ef634a861ddca8e63491d8fada852d6cabf;hp=f0392de2caa9acd9cbafb1b4805ba6f71e28e1c3;hpb=d71338dee9f65981e827bca3b4d6d79c0197b4d9;p=helm.git
diff --git a/helm/http_getter/http_getter.pl.in b/helm/http_getter/http_getter.pl.in
index f0392de2c..43c7ff079 100755
--- a/helm/http_getter/http_getter.pl.in
+++ b/helm/http_getter/http_getter.pl.in
@@ -27,12 +27,21 @@
use Env;
my $HELM_LIB_DIR = $ENV{"HELM_LIB_DIR"};
# this should be the only fixed constant
-my $DEFAULT_HELM_LIB_DIR = "@DEFAULT_HELM_LIB_DIR@";
+my $DEFAULT_HELM_LIB_DIR = "@HELM_LIB_DIR@";
if (defined ($HELM_LIB_DIR)) {
$HELM_LIB_PATH = $HELM_LIB_DIR."/configuration.pl";
} else {
$HELM_LIB_PATH = $DEFAULT_HELM_LIB_DIR."/configuration.pl";
}
+
+# : TODO temporary, move this setting to configuration file
+# set the cache mode, may be "gzipped" or "normal"
+my $cachemode = $ENV{'HTTP_GETTER_CACHE_MODE'} || 'gzipped';
+if (($cachemode ne 'gzipped') and ($cachemode ne 'normal')) {
+ die "Invalid HTTP_GETTER_CACHE_MODE environment variable, must be 'normal' or 'gzipped'\n";
+}
+#
+
# next require defines: $helm_dir, $html_link, $dtd_dir, $uris_dbm
require $HELM_LIB_PATH;
@@ -53,6 +62,8 @@ print "Please contact me at: url, ">\n";
print "helm_dir: $helm_dir\n";
print "dtd_dir: $dtd_dir\n";
print "urls_of_uris.db: $uris_dbm.db\n";
+print "cache mode: $cachemode\n";
+
$SIG{CHLD} = "IGNORE"; # do not accumulate defunct processes
$SIG{USR1} = \&update; # sent by the child to make the parent update
while (my $c = $d->accept) {
@@ -208,7 +219,7 @@ EOT
print "DTD: $inputuri ==> ($filename)\n";
if (stat($filename)) {
print "Using local copy\n";
- open(FD, $filename);
+ open(FD, $filename) or die "Cannot open $filename\n";
$cont = "";
while() { $cont .= $_; }
close(FD);
@@ -276,39 +287,70 @@ sub callback
$cont .= $data;
}
+sub gunzip { # gunzip a file and return the deflated content
+ my ($filename) = @_;
+
+ my ($gz, $buffer, $cont);
+
+ print "deflating $filename ...\n";
+ $gz = gzopen($filename, "r") or die "Cannot open gzip'ed file $filename: $gzerrno";
+ $cont = "";
+ while ( $gz->gzread($buffer) > 0 ) {
+ $cont .= $buffer;
+ }
+ die "Error while reading : $gzerrno\n" if $gzerrno != Z_STREAM_END ;
+ $gz->gzclose();
+
+ return $cont;
+}
+
+sub gzip { # gzip the content argument and save it to filename argument
+ my ($cont, $filename) = @_;
+
+ my ($gz, $cont);
+
+ $gz = gzopen($filename, "w") or die "Cannot gzopen for writing file $filename: $gzerrno";
+ $gz->gzwrite($cont) or die "error writing: $gzerrno\n" ;
+ $gz->gzclose();
+}
+
sub download
{
my ($remove_headers,$str,$url,$filename) = @_;
#
my ($gz, $buffer);
- my $mode; # retrieve mode: "normal" (.xml) or "gzipped" (.xml.gz)
+ my $resourcetype; # retrieve mode: "normal" (.xml) or "gzipped" (.xml.gz)
if ($filename =~ /\.xml$/) { # set retrieve mode
- $mode = "normal";
+ $resourcetype = "normal";
} elsif ($filename =~ /\.xml\.gz$/) {
- $mode = "gzipped";
+ $resourcetype = "gzipped";
} else {
die "Unsupported download extension, might be '.gz' or '.xml'\n";
}
+ my $basefname = $filename;
+ $basefname =~ s/\.gz$//; # get base resource name removing trailing .gz
#
$cont = ""; # modified by side-effect by the callback function
- if (stat($filename)) { # we already have local copy of requested file
+
+ my $localfname="";
+ if (stat($basefname)) {
+ $localfname=$basefname;
+ } elsif (stat($basefname.".gz")) {
+ $localfname=$basefname.".gz";
+ }
+ if ($localfname ne "") { # we already have local copy of requested file
+ # check both possible cache entry: gzipped or normal
print "Using local copy for the $str file\n";
#
- if ($mode eq "gzipped") { # deflating cached file
- print "deflating local file ...\n";
- $gz = gzopen($filename, "r") or die "Cannot open gzip'ed file $filename: $gzerrno";
- while ( $gz->gzread($buffer) > 0 ) {
- $cont .= $buffer;
- }
- die "Error while reading : $gzerrno\n" if $gzerrno != Z_STREAM_END ;
- $gz->gzclose();
- } elsif ($mode eq "normal") { # return cached file
- open(FD, $filename);
+ if ($localfname =~ /\.xml\.gz$/) { # deflating cached file and return it
+ $cont = gunzip($localfname);
+ } elsif ($localfname =~ /\.xml$/) { # just return cached file
+ open(FD, $localfname) or die "Cannot open $localfname";
while() { $cont .= $_; }
close(FD);
} else { # error
- die "Internal error: unexpected mode: $mode, might be 'normal' or 'gzipped'";
+ die "Internal error: unexpected file name $localfname, must end with '.gz' or '.xml.gz'\n";
}
#
} else { # download file from net
@@ -317,23 +359,38 @@ sub download
$request = HTTP::Request->new(GET => "$url");
$response = $ua->request($request, \&callback);
- print "Storing the $str file\n";
- mkdirs($filename);
- open(FD, ">".$filename);
- print FD $cont;
- close(FD);
+ # cache retrieved file to disk
+# TODO: inefficent, I haven't yet undestood how to deflate in memory gzipped file,
+# without call "gzopen"
#
- if ($mode eq "gzipped") { # deflate gzipped retrieved file
- print "deflating just retrieved file ...\n";
- $cont = ""; # reset $cont, cause $cont actually contain gzipped data
- $gz = gzopen($filename, "r") or die "Cannot open gzip'ed file $filename: $gzerrno";
- while ( $gz->gzread($buffer) > 0 ) {
- $cont .= $buffer;
- }
- die "Error while reading : $gzerrno\n" if $gzerrno != Z_STREAM_END ;
- $gz->gzclose();
- # now $cont contain deflated, clear text data
+ print "Storing the $str file\n";
+ mkdirs($filename);
+ open(FD, ">".$filename.".tmp") or die "Cannot open $filename.tmp\n";
+ print FD $cont;
+ close(FD);
+
+ # handle cache conversion normal->gzipped or gzipped->normal as user choice
+ if (($cachemode eq 'normal') and ($resourcetype eq 'normal')) { # cache the file as is
+ rename "$filename.tmp", $filename;
+ } elsif (($cachemode eq 'gzipped') and ($resourcetype eq 'gzipped')) { # cache the file as is
+ # and update the $cont variabile with deflated content
+ rename "$filename.tmp", $filename;
+ $cont = gunzip($filename);
+ } elsif (($cachemode eq 'normal') and ($resourcetype eq 'gzipped')) { # deflate cache entry
+ # and update $cont
+ open(FD, "> $basefname") or die "cannot open $basefname\n";
+ $cont = gunzip($filename.".tmp");
+ print FD $cont;
+ close(FD);
+ unlink "$filename.tmp"; # delete old gzipped file
+ } elsif (($cachemode eq 'gzipped') and ($resourcetype eq 'normal')) { # compress cache entry
+ gzip($cont, $basefname.".gz");
+ unlink "$filename.tmp"; # delete old uncompressed file
+ } else {
+ die "Internal error, unsopported cachemode, resourcetype couple\n";
}
+ # $cont now contained uncompressed data
+
#
}
if ($remove_headers) {