} else {
$HELM_LIB_PATH = $DEFAULT_HELM_LIB_DIR."/configuration.pl";
}
+
+# <ZACK>: TODO temporary, move this setting to configuration file
+# set the cache mode, may be "gzipped" or "normal"
+my $cachemode = $ENV{'HTTP_GETTER_CACHE_MODE'} || 'gzipped';
+if (($cachemode ne 'gzipped') and ($cachemode ne 'normal')) {
+ die "Invalid HTTP_GETTER_CACHE_MODE environment variable, must be 'normal' or 'gzipped'\n";
+}
+# </ZACK>
+
# next require defines: $helm_dir, $html_link, $dtd_dir, $uris_dbm
require $HELM_LIB_PATH;
print "helm_dir: $helm_dir\n";
print "dtd_dir: $dtd_dir\n";
print "urls_of_uris.db: $uris_dbm.db\n";
+print "cache mode: $cachemode\n";
+
$SIG{CHLD} = "IGNORE"; # do not accumulate defunct processes
$SIG{USR1} = \&update; # sent by the child to make the parent update
while (my $c = $d->accept) {
$cont .= $data;
}
+sub gunzip { # gunzip a file and return the deflated content
+ my ($filename) = @_;
+
+ my ($gz, $buffer, $cont);
+
+ print "deflating $filename ...\n";
+ $gz = gzopen($filename, "r") or die "Cannot open gzip'ed file $filename: $gzerrno";
+ $cont = "";
+ while ( $gz->gzread($buffer) > 0 ) {
+ $cont .= $buffer;
+ }
+ die "Error while reading : $gzerrno\n" if $gzerrno != Z_STREAM_END ;
+ $gz->gzclose();
+
+ return $cont;
+}
+
+sub gzip { # gzip the content argument and save it to filename argument
+ my ($cont, $filename) = @_;
+
+ my ($gz, $cont);
+
+ $gz = gzopen($filename, "w") or die "Cannot gzopen for writing file $filename: $gzerrno";
+ $gz->gzwrite($cont) or die "error writing: $gzerrno\n" ;
+ $gz->gzclose();
+}
+
sub download
{
my ($remove_headers,$str,$url,$filename) = @_;
# <gzip>
my ($gz, $buffer);
- my $mode; # retrieve mode: "normal" (.xml) or "gzipped" (.xml.gz)
+ my $resourcetype; # retrieve mode: "normal" (.xml) or "gzipped" (.xml.gz)
if ($filename =~ /\.xml$/) { # set retrieve mode
- $mode = "normal";
+ $resourcetype = "normal";
} elsif ($filename =~ /\.xml\.gz$/) {
- $mode = "gzipped";
+ $resourcetype = "gzipped";
} else {
die "Unsupported download extension, might be '.gz' or '.xml'\n";
}
+ my $basefname = $filename;
+ $basefname =~ s/\.gz$//; # get base resource name removing trailing .gz
# </gzip>
$cont = ""; # modified by side-effect by the callback function
- if (stat($filename)) { # we already have local copy of requested file
+
+ my $localfname="";
+ if (stat($basefname)) {
+ $localfname=$basefname;
+ } elsif (stat($basefname.".gz")) {
+ $localfname=$basefname.".gz";
+ }
+ if ($localfname ne "") { # we already have local copy of requested file
+ # check both possible cache entry: gzipped or normal
print "Using local copy for the $str file\n";
# <gzip>
- if ($mode eq "gzipped") { # deflating cached file
- print "deflating local file ...\n";
- $gz = gzopen($filename, "r") or die "Cannot open gzip'ed file $filename: $gzerrno";
- while ( $gz->gzread($buffer) > 0 ) {
- $cont .= $buffer;
- }
- die "Error while reading : $gzerrno\n" if $gzerrno != Z_STREAM_END ;
- $gz->gzclose();
- } elsif ($mode eq "normal") { # return cached file
- open(FD, $filename);
+ if ($localfname =~ /\.xml\.gz$/) { # deflating cached file and return it
+ $cont = gunzip($localfname);
+ } elsif ($localfname =~ /\.xml$/) { # just return cached file
+ open(FD, $localfname);
while(<FD>) { $cont .= $_; }
close(FD);
} else { # error
- die "Internal error: unexpected mode: $mode, might be 'normal' or 'gzipped'";
+ die "Internal error: unexpected file name $localfname, must end with '.gz' or '.xml.gz'\n";
}
# </gzip>
} else { # download file from net
$request = HTTP::Request->new(GET => "$url");
$response = $ua->request($request, \&callback);
- print "Storing the $str file\n";
- mkdirs($filename);
- open(FD, ">".$filename);
- print FD $cont;
- close(FD);
+ # cache retrieved file to disk
+# <ZACK/> TODO: inefficent, I haven't yet undestood how to deflate in memory gzipped file,
+# without call "gzopen"
# <gzip>
- if ($mode eq "gzipped") { # deflate gzipped retrieved file
- print "deflating just retrieved file ...\n";
- $cont = ""; # reset $cont, cause $cont actually contain gzipped data
- $gz = gzopen($filename, "r") or die "Cannot open gzip'ed file $filename: $gzerrno";
- while ( $gz->gzread($buffer) > 0 ) {
- $cont .= $buffer;
- }
- die "Error while reading : $gzerrno\n" if $gzerrno != Z_STREAM_END ;
- $gz->gzclose();
- # now $cont contain deflated, clear text data
+ print "Storing the $str file\n";
+ mkdirs($filename);
+ open(FD, ">".$filename.".tmp");
+ print FD $cont;
+ close(FD);
+
+ # handle cache conversion normal->gzipped or gzipped->normal as user choice
+ if (($cachemode eq 'normal') and ($resourcetype eq 'normal')) { # cache the file as is
+ rename "$filename.tmp", $filename;
+ } elsif (($cachemode eq 'gzipped') and ($resourcetype eq 'gzipped')) { # cache the file as is
+ # and update the $cont variabile with deflated content
+ rename "$filename.tmp", $filename;
+ $cont = gunzip($filename);
+ } elsif (($cachemode eq 'normal') and ($resourcetype eq 'gzipped')) { # deflate cache entry
+ # and update $cont
+ open(FD, "> $basefname");
+ $cont = gunzip($filename.".tmp");
+ print FD $cont;
+ close(FD);
+ unlink "$filename.tmp"; # delete old gzipped file
+ } elsif (($cachemode eq 'gzipped') and ($resourcetype eq 'normal')) { # compress cache entry
+ gzip($cont, $basefname.".gz");
+ unlink "$filename.tmp"; # delete old uncompressed file
+ } else {
+ die "Internal error, unsopported cachemode, resourcetype couple\n";
}
+ # $cont now contained uncompressed data
+
# </gzip>
}
if ($remove_headers) {