From 07b29a457f5f16e1aa85afbe057242a01c9303f1 Mon Sep 17 00:00:00 2001 From: Stefano Zacchiroli Date: Tue, 30 Jan 2001 09:59:38 +0000 Subject: [PATCH] Added mixed cache support through HTTP_GETTER_CACHE_MODE environment variable --- helm/http_getter/http_getter.pl.in | 117 +++++++++++++++++++++-------- 1 file changed, 87 insertions(+), 30 deletions(-) diff --git a/helm/http_getter/http_getter.pl.in b/helm/http_getter/http_getter.pl.in index bee3315e7..674757642 100755 --- a/helm/http_getter/http_getter.pl.in +++ b/helm/http_getter/http_getter.pl.in @@ -33,6 +33,15 @@ if (defined ($HELM_LIB_DIR)) { } else { $HELM_LIB_PATH = $DEFAULT_HELM_LIB_DIR."/configuration.pl"; } + +# : TODO temporary, move this setting to configuration file +# set the cache mode, may be "gzipped" or "normal" +my $cachemode = $ENV{'HTTP_GETTER_CACHE_MODE'} || 'gzipped'; +if (($cachemode ne 'gzipped') and ($cachemode ne 'normal')) { + die "Invalid HTTP_GETTER_CACHE_MODE environment variable, must be 'normal' or 'gzipped'\n"; +} +# + # next require defines: $helm_dir, $html_link, $dtd_dir, $uris_dbm require $HELM_LIB_PATH; @@ -53,6 +62,8 @@ print "Please contact me at: url, ">\n"; print "helm_dir: $helm_dir\n"; print "dtd_dir: $dtd_dir\n"; print "urls_of_uris.db: $uris_dbm.db\n"; +print "cache mode: $cachemode\n"; + $SIG{CHLD} = "IGNORE"; # do not accumulate defunct processes $SIG{USR1} = \&update; # sent by the child to make the parent update while (my $c = $d->accept) { @@ -276,39 +287,70 @@ sub callback $cont .= $data; } +sub gunzip { # gunzip a file and return the deflated content + my ($filename) = @_; + + my ($gz, $buffer, $cont); + + print "deflating $filename ...\n"; + $gz = gzopen($filename, "r") or die "Cannot open gzip'ed file $filename: $gzerrno"; + $cont = ""; + while ( $gz->gzread($buffer) > 0 ) { + $cont .= $buffer; + } + die "Error while reading : $gzerrno\n" if $gzerrno != Z_STREAM_END ; + $gz->gzclose(); + + return $cont; +} + +sub gzip { # gzip the content argument and save it to filename argument + my ($cont, $filename) = @_; + + my ($gz, $cont); + + $gz = gzopen($filename, "w") or die "Cannot gzopen for writing file $filename: $gzerrno"; + $gz->gzwrite($cont) or die "error writing: $gzerrno\n" ; + $gz->gzclose(); +} + sub download { my ($remove_headers,$str,$url,$filename) = @_; # my ($gz, $buffer); - my $mode; # retrieve mode: "normal" (.xml) or "gzipped" (.xml.gz) + my $resourcetype; # retrieve mode: "normal" (.xml) or "gzipped" (.xml.gz) if ($filename =~ /\.xml$/) { # set retrieve mode - $mode = "normal"; + $resourcetype = "normal"; } elsif ($filename =~ /\.xml\.gz$/) { - $mode = "gzipped"; + $resourcetype = "gzipped"; } else { die "Unsupported download extension, might be '.gz' or '.xml'\n"; } + my $basefname = $filename; + $basefname =~ s/\.gz$//; # get base resource name removing trailing .gz # $cont = ""; # modified by side-effect by the callback function - if (stat($filename)) { # we already have local copy of requested file + + my $localfname=""; + if (stat($basefname)) { + $localfname=$basefname; + } elsif (stat($basefname.".gz")) { + $localfname=$basefname.".gz"; + } + if ($localfname ne "") { # we already have local copy of requested file + # check both possible cache entry: gzipped or normal print "Using local copy for the $str file\n"; # - if ($mode eq "gzipped") { # deflating cached file - print "deflating local file ...\n"; - $gz = gzopen($filename, "r") or die "Cannot open gzip'ed file $filename: $gzerrno"; - while ( $gz->gzread($buffer) > 0 ) { - $cont .= $buffer; - } - die "Error while reading : $gzerrno\n" if $gzerrno != Z_STREAM_END ; - $gz->gzclose(); - } elsif ($mode eq "normal") { # return cached file - open(FD, $filename); + if ($localfname =~ /\.xml\.gz$/) { # deflating cached file and return it + $cont = gunzip($localfname); + } elsif ($localfname =~ /\.xml$/) { # just return cached file + open(FD, $localfname); while() { $cont .= $_; } close(FD); } else { # error - die "Internal error: unexpected mode: $mode, might be 'normal' or 'gzipped'"; + die "Internal error: unexpected file name $localfname, must end with '.gz' or '.xml.gz'\n"; } # } else { # download file from net @@ -317,23 +359,38 @@ sub download $request = HTTP::Request->new(GET => "$url"); $response = $ua->request($request, \&callback); - print "Storing the $str file\n"; - mkdirs($filename); - open(FD, ">".$filename); - print FD $cont; - close(FD); + # cache retrieved file to disk +# TODO: inefficent, I haven't yet undestood how to deflate in memory gzipped file, +# without call "gzopen" # - if ($mode eq "gzipped") { # deflate gzipped retrieved file - print "deflating just retrieved file ...\n"; - $cont = ""; # reset $cont, cause $cont actually contain gzipped data - $gz = gzopen($filename, "r") or die "Cannot open gzip'ed file $filename: $gzerrno"; - while ( $gz->gzread($buffer) > 0 ) { - $cont .= $buffer; - } - die "Error while reading : $gzerrno\n" if $gzerrno != Z_STREAM_END ; - $gz->gzclose(); - # now $cont contain deflated, clear text data + print "Storing the $str file\n"; + mkdirs($filename); + open(FD, ">".$filename.".tmp"); + print FD $cont; + close(FD); + + # handle cache conversion normal->gzipped or gzipped->normal as user choice + if (($cachemode eq 'normal') and ($resourcetype eq 'normal')) { # cache the file as is + rename "$filename.tmp", $filename; + } elsif (($cachemode eq 'gzipped') and ($resourcetype eq 'gzipped')) { # cache the file as is + # and update the $cont variabile with deflated content + rename "$filename.tmp", $filename; + $cont = gunzip($filename); + } elsif (($cachemode eq 'normal') and ($resourcetype eq 'gzipped')) { # deflate cache entry + # and update $cont + open(FD, "> $basefname"); + $cont = gunzip($filename.".tmp"); + print FD $cont; + close(FD); + unlink "$filename.tmp"; # delete old gzipped file + } elsif (($cachemode eq 'gzipped') and ($resourcetype eq 'normal')) { # compress cache entry + gzip($cont, $basefname.".gz"); + unlink "$filename.tmp"; # delete old uncompressed file + } else { + die "Internal error, unsopported cachemode, resourcetype couple\n"; } + # $cont now contained uncompressed data + # } if ($remove_headers) { -- 2.39.2