X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2Fhttp_getter%2Fhttp_getter.pl.in;h=bee3315e7583cb9e1a98700559c36fc9eafb34b4;hb=4e8b137b58ec6763785b8ee490ee8cd6cb77b5bf;hp=fde70e168177b78a74ca178e0a03eec23fd08d17;hpb=d30b81ca62c2cb11b9ec418484f2d9c298f83f72;p=helm.git diff --git a/helm/http_getter/http_getter.pl.in b/helm/http_getter/http_getter.pl.in index fde70e168..bee3315e7 100755 --- a/helm/http_getter/http_getter.pl.in +++ b/helm/http_getter/http_getter.pl.in @@ -1,23 +1,47 @@ #!@PERL_BINARY@ +# Copyright (C) 2000, HELM Team. +# +# This file is part of HELM, an Hypertextual, Electronic +# Library of Mathematics, developed at the Computer Science +# Department, University of Bologna, Italy. +# +# HELM is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# HELM is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HELM; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# For details, see the HELM World-Wide-Web page, +# http://cs.unibo.it/helm/. + # First of all, let's load HELM configuration use Env; -my $HELM_LIBRARY_DIR = $ENV{"HELM_LIBRARY_DIR"}; +my $HELM_LIB_DIR = $ENV{"HELM_LIB_DIR"}; # this should be the only fixed constant -my $DEFAULT_HELM_LIBRARY_DIR = "@DEFAULT_HELM_LIBRARY_DIR@"; -if (defined ($HELM_LIBRARY_DIR)) { - $HELM_LIBRARY_PATH = $HELM_LIBRARY_DIR."./configuration.pl"; +my $DEFAULT_HELM_LIB_DIR = "@HELM_LIB_DIR@"; +if (defined ($HELM_LIB_DIR)) { + $HELM_LIB_PATH = $HELM_LIB_DIR."/configuration.pl"; } else { - $HELM_LIBRARY_PATH = $DEFAULT_HELM_LIBRARY_DIR."./configuration.pl"; + $HELM_LIB_PATH = $DEFAULT_HELM_LIB_DIR."/configuration.pl"; } # next require defines: $helm_dir, $html_link, $dtd_dir, $uris_dbm -require $HELM_LIBRARY_PATH; +require $HELM_LIB_PATH; use HTTP::Daemon; use HTTP::Status; use HTTP::Request; use LWP::UserAgent; use DB_File; +use Compress::Zlib; #CSC: mancano i controlli sulle condizioni di errore di molte funzioni #CSC: ==> non e' robusto @@ -49,9 +73,21 @@ while (my $c = $d->accept) { my $cicfilename = $cicuri; $cicfilename =~ s/cic:(.*)/$1/; $cicfilename =~ s/theory:(.*)/$1/; - $cicfilename = $helm_dir.$cicfilename.".xml"; - +# $cicfilename = $helm_dir.$cicfilename.".xml"; +# my $cicurl = $map{$cicuri}; + my $extension; + if ($cicurl =~ /\.xml$/) { # non gzipped file + $extension = ".xml"; + } elsif ($cicurl =~ /\.xml\.gz$/) { # gzipped file + $extension = ".xml.gz"; + } else { # error: unknown extension + die "unexpected extension in url: $cicurl, might be '.xml' or '.xml.gz'"; + } + $cicfilename = $helm_dir.$cicfilename.$extension; + + #my $cicurl = $map{$cicuri}; +# if (!defined($cicurl)) { print "\nNOT FOUND!!!!!\n"; $c->send_error(RC_NOT_FOUND) @@ -91,13 +127,45 @@ while (my $c = $d->accept) { $cicfilename =~ s/theory:(.*)/$1/; $cicfilename = $helm_dir.$cicfilename; - my $typesfilename = $cicfilename.".types.xml" if $typesuri; - my $annfilename = $cicfilename.$annsuffix.".xml" if $annuri; - $cicfilename .= ".xml"; +# my $typesfilename = $cicfilename.".types.xml" if $typesuri; +# my $annfilename = $cicfilename.$annsuffix.".xml" if $annuri; +# $cicfilename .= ".xml"; +# my $cicurl = $map{$cicuri}; - my $typesurl = $map{$typesuri} if $typesuri; - my $annurl = $map{$annuri} if $annuri; + my $typesurl = $map{$typesuri} if (defined($typesuri)); + my $annurl = $map{$annuri} if (defined($annuri)); + my ($cicext, $typesext, $annext); + if ($cicurl =~ /\.xml$/) { # normal file + $cicext = ".xml"; + } elsif ($cicurl =~ /\.xml\.gz$/) { # gzipped file + $cicext = ".xml.gz"; + } else { + die "unexpected extension in url: $cicurl; might be '.xml' or '.xml.gz'"; + } + if (defined($typesuri)) { # extension selection for types file + if ($typesurl =~ /\.xml$/) { # normal file + $typesext = ".types.xml"; + } elsif ($typesurl =~ /\.xml\.gz$/) { # gzipped file + $typesext = ".types.xml.gz"; + } else { + die "unexpected extension in url: $typesurl; might be '.xml' or '.xml.gz'"; + } + } + if (defined($annuri)) { # extension selection for annotation file + if ($annurl =~ /\.xml$/) { # normal file + $annext = ".xml"; + } elsif ($annurl =~ /\.xml\.gz$/) { # gzipped file + $annext = ".xml.gz"; + } else { + die "unexpected extension in url: $annurl might be '.xml' or '.xml.gz'"; + } + } + my $typesfilename = $cicfilename.$typesext if $typesuri; + my $annfilename = $cicfilename.$annsuffix.$annext if $annuri; + $cicfilename .= $cicext; +# + if (!defined($cicurl) || (!defined($typesurl) && $typesuri) || @@ -211,14 +279,40 @@ sub callback sub download { my ($remove_headers,$str,$url,$filename) = @_; +# + my ($gz, $buffer); + + my $mode; # retrieve mode: "normal" (.xml) or "gzipped" (.xml.gz) + if ($filename =~ /\.xml$/) { # set retrieve mode + $mode = "normal"; + } elsif ($filename =~ /\.xml\.gz$/) { + $mode = "gzipped"; + } else { + die "Unsupported download extension, might be '.gz' or '.xml'\n"; + } +# $cont = ""; # modified by side-effect by the callback function - if (stat($filename)) { + if (stat($filename)) { # we already have local copy of requested file print "Using local copy for the $str file\n"; - open(FD, $filename); - while() { $cont .= $_; } - close(FD); - } else { - print "Downloading the $str file\n"; +# + if ($mode eq "gzipped") { # deflating cached file + print "deflating local file ...\n"; + $gz = gzopen($filename, "r") or die "Cannot open gzip'ed file $filename: $gzerrno"; + while ( $gz->gzread($buffer) > 0 ) { + $cont .= $buffer; + } + die "Error while reading : $gzerrno\n" if $gzerrno != Z_STREAM_END ; + $gz->gzclose(); + } elsif ($mode eq "normal") { # return cached file + open(FD, $filename); + while() { $cont .= $_; } + close(FD); + } else { # error + die "Internal error: unexpected mode: $mode, might be 'normal' or 'gzipped'"; + } +# + } else { # download file from net + print "Downloading the $str file\n"; # download file $ua = LWP::UserAgent->new; $request = HTTP::Request->new(GET => "$url"); $response = $ua->request($request, \&callback); @@ -228,6 +322,19 @@ sub download open(FD, ">".$filename); print FD $cont; close(FD); +# + if ($mode eq "gzipped") { # deflate gzipped retrieved file + print "deflating just retrieved file ...\n"; + $cont = ""; # reset $cont, cause $cont actually contain gzipped data + $gz = gzopen($filename, "r") or die "Cannot open gzip'ed file $filename: $gzerrno"; + while ( $gz->gzread($buffer) > 0 ) { + $cont .= $buffer; + } + die "Error while reading : $gzerrno\n" if $gzerrno != Z_STREAM_END ; + $gz->gzclose(); + # now $cont contain deflated, clear text data + } +# } if ($remove_headers) { $cont =~ s/<\?xml [^?]*\?>//sg;