From 45eeb63cbcf9807064a4d67acf8396a3e42ad744 Mon Sep 17 00:00:00 2001 From: Stefano Zacchiroli Date: Mon, 15 Jan 2001 12:58:05 +0000 Subject: [PATCH] Added preliminary support for gzipped library --- helm/http_getter/AUTHORS | 1 + helm/http_getter/http_getter.pl.in | 110 +++++++++++++++++++++++++---- 2 files changed, 98 insertions(+), 13 deletions(-) diff --git a/helm/http_getter/AUTHORS b/helm/http_getter/AUTHORS index 1088c5689..72c65aaa0 100644 --- a/helm/http_getter/AUTHORS +++ b/helm/http_getter/AUTHORS @@ -1 +1,2 @@ Claudio Sacerdoti Coen +Stefano Zacchiroli diff --git a/helm/http_getter/http_getter.pl.in b/helm/http_getter/http_getter.pl.in index e8adb3bc0..9c68ae710 100755 --- a/helm/http_getter/http_getter.pl.in +++ b/helm/http_getter/http_getter.pl.in @@ -41,6 +41,7 @@ use HTTP::Status; use HTTP::Request; use LWP::UserAgent; use DB_File; +use Compress::Zlib; #CSC: mancano i controlli sulle condizioni di errore di molte funzioni #CSC: ==> non e' robusto @@ -72,9 +73,21 @@ while (my $c = $d->accept) { my $cicfilename = $cicuri; $cicfilename =~ s/cic:(.*)/$1/; $cicfilename =~ s/theory:(.*)/$1/; - $cicfilename = $helm_dir.$cicfilename.".xml"; - +# $cicfilename = $helm_dir.$cicfilename.".xml"; +# my $cicurl = $map{$cicuri}; + my $extension; + if ($cicurl =~ /\.xml$/) { # non gzipped file + $extension = ".xml"; + } elsif ($cicurl =~ /\.xml\.gz$/) { # gzipped file + $extension = ".xml.gz"; + } else { # error: unknown extension + die "unexpected extension in url: $cicurl, might be '.xml' or '.xml.gz'"; + } + $cicfilename = $helm_dir.$cicfilename.$extension; + + #my $cicurl = $map{$cicuri}; +# if (!defined($cicurl)) { print "\nNOT FOUND!!!!!\n"; $c->send_error(RC_NOT_FOUND) @@ -114,13 +127,45 @@ while (my $c = $d->accept) { $cicfilename =~ s/theory:(.*)/$1/; $cicfilename = $helm_dir.$cicfilename; - my $typesfilename = $cicfilename.".types.xml" if $typesuri; - my $annfilename = $cicfilename.$annsuffix.".xml" if $annuri; - $cicfilename .= ".xml"; +# my $typesfilename = $cicfilename.".types.xml" if $typesuri; +# my $annfilename = $cicfilename.$annsuffix.".xml" if $annuri; +# $cicfilename .= ".xml"; +# my $cicurl = $map{$cicuri}; - my $typesurl = $map{$typesuri} if $typesuri; - my $annurl = $map{$annuri} if $annuri; + my $typesurl = $map{$typesuri} if (defined($typesuri)); + my $annurl = $map{$annuri} if (defined($annuri)); + my ($cicext, $typesext, $annext); + if ($cicurl =~ /\.xml$/) { # normal file + $cicext = ".xml"; + } elsif ($cicurl =~ /\.xml\.gz$/) { # gzipped file + $cicext = ".xml.gz"; + } else { + die "unexpected extension in url: $cicurl; might be '.xml' or '.xml.gz'"; + } + if (defined($typesuri)) { # extension selection for types file + if ($typesurl =~ /\.xml$/) { # normal file + $typesext = ".types.xml"; + } elsif ($typesurl =~ /\.xml\.gz$/) { # gzipped file + $typesext = ".types.xml.gz"; + } else { + die "unexpected extension in url: $typesurl; might be '.xml' or '.xml.gz'"; + } + } + if (defined($annuri)) { # extension selection for annotation file + if ($annurl =~ /\.xml$/) { # normal file + $annext = ".xml"; + } elsif ($annurl =~ /\.xml\.gz$/) { # gzipped file + $annext = ".xml.gz"; + } else { + die "unexpected extension in url: $annurl might be '.xml' or '.xml.gz'"; + } + } + my $typesfilename = $cicfilename.$typesext if $typesuri; + my $annfilename = $cicfilename.$annsuffix.$annext if $annuri; + $cicfilename .= $cicext; +# + if (!defined($cicurl) || (!defined($typesurl) && $typesuri) || @@ -234,14 +279,40 @@ sub callback sub download { my ($remove_headers,$str,$url,$filename) = @_; +# + my ($gz, $buffer); + + my $mode; # retrieve mode: "normal" (.xml) or "gzipped" (.xml.gz) + if ($filename =~ /\.xml$/) { # set retrieve mode + $mode = "normal"; + } elsif ($filename =~ /\.xml\.gz$/) { + $mode = "gzipped"; + } else { + die "Unsupported download extension, might be '.gz' or '.xml'\n"; + } +# $cont = ""; # modified by side-effect by the callback function - if (stat($filename)) { + if (stat($filename)) { # we already have local copy of requested file print "Using local copy for the $str file\n"; - open(FD, $filename); - while() { $cont .= $_; } - close(FD); - } else { - print "Downloading the $str file\n"; +# + if ($mode eq "gzipped") { # deflating cached file + print "deflating local file ...\n"; + $gz = gzopen($filename, "r") or die "Cannot open gzip'ed file $filename: $gzerrno"; + while ( $gz->gzread($buffer) > 0 ) { + $cont .= $buffer; + } + die "Error while reading : $gzerrno\n" if $gzerrno != Z_STREAM_END ; + $gz->gzclose(); + } elsif ($mode eq "normal") { # return cached file + open(FD, $filename); + while() { $cont .= $_; } + close(FD); + } else { # error + die "Internal error: unexpected mode: $mode, might be 'normal' or 'gzipped'"; + } +# + } else { # download file from net + print "Downloading the $str file\n"; # download file $ua = LWP::UserAgent->new; $request = HTTP::Request->new(GET => "$url"); $response = $ua->request($request, \&callback); @@ -251,6 +322,19 @@ sub download open(FD, ">".$filename); print FD $cont; close(FD); +# + if ($mode eq "gzipped") { # deflate gzipped retrieved file + print "deflating just retrieved file ...\n"; + $cont = ""; # reset $cont, cause $cont actually contain gzipped data + $gz = gzopen($filename, "r") or die "Cannot open gzip'ed file $filename: $gzerrno"; + while ( $gz->gzread($buffer) > 0 ) { + $cont .= $buffer; + } + die "Error while reading : $gzerrno\n" if $gzerrno != Z_STREAM_END ; + $gz->gzclose(); + # now $cont contain deflated, clear text data + } +# } if ($remove_headers) { $cont =~ s/<\?xml [^?]*\?>//sg; -- 2.39.2