X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2Fhttp_getter%2Fhttp_getter.pl.in;h=addd81dccc1a2213455c10188072635257dd5985;hb=6d71aa3ee23468b86bcad8fb640d71e2692bd901;hp=206ba6b4ef911ed7c489eaedbed04103735a217f;hpb=c9e626cba3af69354dac69bfaefeb346a191fc5a;p=helm.git diff --git a/helm/http_getter/http_getter.pl.in b/helm/http_getter/http_getter.pl.in index 206ba6b4e..addd81dcc 100755 --- a/helm/http_getter/http_getter.pl.in +++ b/helm/http_getter/http_getter.pl.in @@ -1,23 +1,60 @@ #!@PERL_BINARY@ +# Copyright (C) 2000, HELM Team. +# +# This file is part of HELM, an Hypertextual, Electronic +# Library of Mathematics, developed at the Computer Science +# Department, University of Bologna, Italy. +# +# HELM is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# HELM is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HELM; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# For details, see the HELM World-Wide-Web page, +# http://cs.unibo.it/helm/. + # First of all, let's load HELM configuration use Env; -my $HELM_LIBRARY_DIR = $ENV{"HELM_LIBRARY_DIR"}; +my $HELM_LIB_DIR = $ENV{"HELM_LIB_DIR"}; # this should be the only fixed constant -my $DEFAULT_HELM_LIBRARY_DIR = "@DEFAULT_HELM_LIBRARY_DIR@"; -if (defined ($HELM_LIBRARY_DIR) { - $HELM_LIBRARY_PATH = $HELM_LIBRARY_DIR."./configuration.pl"; +my $DEFAULT_HELM_LIB_DIR = "@HELM_LIB_DIR@"; +if (defined ($HELM_LIB_DIR)) { + $HELM_LIB_PATH = $HELM_LIB_DIR."/configuration.pl"; } else { - $HELM_LIBRARY_PATH = $DEFAULT_HELM_LIBRARY_DIR."./configuration.pl"; + $HELM_LIB_PATH = $DEFAULT_HELM_LIB_DIR."/configuration.pl"; } + +# Let's override the configuration file +$styles_dir = $ENV{"HELM_STYLE_DIR"} if (defined ($ENV{"HELM_STYLE_DIR"})); +$dtd_dir = $ENV{"HELM_DTD_DIR"} if (defined ($ENV{"HELM_DTD_DIR"})); + +# : TODO temporary, move this setting to configuration file +# set the cache mode, may be "gzipped" or "normal" +my $cachemode = $ENV{'HTTP_GETTER_CACHE_MODE'} || 'gzipped'; +if (($cachemode ne 'gzipped') and ($cachemode ne 'normal')) { + die "Invalid HTTP_GETTER_CACHE_MODE environment variable, must be 'normal' or 'gzipped'\n"; +} +# + # next require defines: $helm_dir, $html_link, $dtd_dir, $uris_dbm -require $HELM_LIBRARY_PATH; +require $HELM_LIB_PATH; use HTTP::Daemon; use HTTP::Status; use HTTP::Request; use LWP::UserAgent; use DB_File; +use Compress::Zlib; #CSC: mancano i controlli sulle condizioni di errore di molte funzioni #CSC: ==> non e' robusto @@ -29,6 +66,8 @@ print "Please contact me at: url, ">\n"; print "helm_dir: $helm_dir\n"; print "dtd_dir: $dtd_dir\n"; print "urls_of_uris.db: $uris_dbm.db\n"; +print "cache mode: $cachemode\n"; + $SIG{CHLD} = "IGNORE"; # do not accumulate defunct processes $SIG{USR1} = \&update; # sent by the child to make the parent update while (my $c = $d->accept) { @@ -49,9 +88,21 @@ while (my $c = $d->accept) { my $cicfilename = $cicuri; $cicfilename =~ s/cic:(.*)/$1/; $cicfilename =~ s/theory:(.*)/$1/; - $cicfilename = $helm_dir.$cicfilename.".xml"; - +# $cicfilename = $helm_dir.$cicfilename.".xml"; +# my $cicurl = $map{$cicuri}; + my $extension; + if ($cicurl =~ /\.xml$/) { # non gzipped file + $extension = ".xml"; + } elsif ($cicurl =~ /\.xml\.gz$/) { # gzipped file + $extension = ".xml.gz"; + } else { # error: unknown extension + die "unexpected extension in url: $cicurl, might be '.xml' or '.xml.gz'"; + } + $cicfilename = $helm_dir.$cicfilename.$extension; + + #my $cicurl = $map{$cicuri}; +# if (!defined($cicurl)) { print "\nNOT FOUND!!!!!\n"; $c->send_error(RC_NOT_FOUND) @@ -91,13 +142,45 @@ while (my $c = $d->accept) { $cicfilename =~ s/theory:(.*)/$1/; $cicfilename = $helm_dir.$cicfilename; - my $typesfilename = $cicfilename.".types.xml" if $typesuri; - my $annfilename = $cicfilename.$annsuffix.".xml" if $annuri; - $cicfilename .= ".xml"; +# my $typesfilename = $cicfilename.".types.xml" if $typesuri; +# my $annfilename = $cicfilename.$annsuffix.".xml" if $annuri; +# $cicfilename .= ".xml"; +# my $cicurl = $map{$cicuri}; - my $typesurl = $map{$typesuri} if $typesuri; - my $annurl = $map{$annuri} if $annuri; + my $typesurl = $map{$typesuri} if (defined($typesuri)); + my $annurl = $map{$annuri} if (defined($annuri)); + my ($cicext, $typesext, $annext); + if ($cicurl =~ /\.xml$/) { # normal file + $cicext = ".xml"; + } elsif ($cicurl =~ /\.xml\.gz$/) { # gzipped file + $cicext = ".xml.gz"; + } else { + die "unexpected extension in url: $cicurl; might be '.xml' or '.xml.gz'"; + } + if (defined($typesuri)) { # extension selection for types file + if ($typesurl =~ /\.xml$/) { # normal file + $typesext = ".types.xml"; + } elsif ($typesurl =~ /\.xml\.gz$/) { # gzipped file + $typesext = ".types.xml.gz"; + } else { + die "unexpected extension in url: $typesurl; might be '.xml' or '.xml.gz'"; + } + } + if (defined($annuri)) { # extension selection for annotation file + if ($annurl =~ /\.xml$/) { # normal file + $annext = ".xml"; + } elsif ($annurl =~ /\.xml\.gz$/) { # gzipped file + $annext = ".xml.gz"; + } else { + die "unexpected extension in url: $annurl might be '.xml' or '.xml.gz'"; + } + } + my $typesfilename = $cicfilename.$typesext if $typesuri; + my $annfilename = $cicfilename.$annsuffix.$annext if $annuri; + $cicfilename .= $cicext; +# + if (!defined($cicurl) || (!defined($typesurl) && $typesuri) || @@ -140,7 +223,7 @@ EOT print "DTD: $inputuri ==> ($filename)\n"; if (stat($filename)) { print "Using local copy\n"; - open(FD, $filename); + open(FD, $filename) or die "Cannot open $filename\n"; $cont = ""; while() { $cont .= $_; } close(FD); @@ -148,6 +231,20 @@ EOT } else { die "Could not find DTD!"; } + } elsif ($http_method eq 'GET' and $http_path eq "/getxslt") { + my $filename = $inputuri; + $filename = $styles_dir."/".$filename; + print "XSLT: $inputuri ==> ($filename)\n"; + if (stat($filename)) { + print "Using local copy\n"; + open(FD, $filename) or die "Cannot open $filename\n"; + $cont = ""; + while() { $cont .= $_; } + close(FD); + answer($c,$cont); + } else { + die "Could not find XSLT!"; + } } elsif ($http_method eq 'GET' and $http_path eq "/conf") { my $quoted_html_link = $html_link; $quoted_html_link =~ s/&/&/g; @@ -208,26 +305,111 @@ sub callback $cont .= $data; } +sub gunzip { # gunzip a file and return the deflated content + my ($filename) = @_; + + my ($gz, $buffer, $cont); + + print "deflating $filename ...\n"; + $gz = gzopen($filename, "r") or die "Cannot open gzip'ed file $filename: $gzerrno"; + $cont = ""; + while ( $gz->gzread($buffer) > 0 ) { + $cont .= $buffer; + } + die "Error while reading : $gzerrno\n" if $gzerrno != Z_STREAM_END ; + $gz->gzclose(); + + return $cont; +} + +sub gzip { # gzip the content argument and save it to filename argument + my ($cont, $filename) = @_; + + my ($gz, $cont); + + $gz = gzopen($filename, "w") or die "Cannot gzopen for writing file $filename: $gzerrno"; + $gz->gzwrite($cont) or die "error writing: $gzerrno\n" ; + $gz->gzclose(); +} + sub download { my ($remove_headers,$str,$url,$filename) = @_; +# + my ($gz, $buffer); + + my $resourcetype; # retrieve mode: "normal" (.xml) or "gzipped" (.xml.gz) + if ($filename =~ /\.xml$/) { # set retrieve mode + $resourcetype = "normal"; + } elsif ($filename =~ /\.xml\.gz$/) { + $resourcetype = "gzipped"; + } else { + die "Unsupported download extension, might be '.gz' or '.xml'\n"; + } + my $basefname = $filename; + $basefname =~ s/\.gz$//; # get base resource name removing trailing .gz +# $cont = ""; # modified by side-effect by the callback function - if (stat($filename)) { + + my $localfname=""; + if (stat($basefname)) { + $localfname=$basefname; + } elsif (stat($basefname.".gz")) { + $localfname=$basefname.".gz"; + } + if ($localfname ne "") { # we already have local copy of requested file + # check both possible cache entry: gzipped or normal print "Using local copy for the $str file\n"; - open(FD, $filename); - while() { $cont .= $_; } - close(FD); - } else { - print "Downloading the $str file\n"; +# + if ($localfname =~ /\.xml\.gz$/) { # deflating cached file and return it + $cont = gunzip($localfname); + } elsif ($localfname =~ /\.xml$/) { # just return cached file + open(FD, $localfname) or die "Cannot open $localfname"; + while() { $cont .= $_; } + close(FD); + } else { # error + die "Internal error: unexpected file name $localfname, must end with '.gz' or '.xml.gz'\n"; + } +# + } else { # download file from net + print "Downloading the $str file\n"; # download file $ua = LWP::UserAgent->new; $request = HTTP::Request->new(GET => "$url"); $response = $ua->request($request, \&callback); - print "Storing the $str file\n"; - mkdirs($filename); - open(FD, ">".$filename); - print FD $cont; - close(FD); + # cache retrieved file to disk +# TODO: inefficent, I haven't yet undestood how to deflate in memory gzipped file, +# without call "gzopen" +# + print "Storing the $str file\n"; + mkdirs($filename); + open(FD, ">".$filename.".tmp") or die "Cannot open $filename.tmp\n"; + print FD $cont; + close(FD); + + # handle cache conversion normal->gzipped or gzipped->normal as user choice + if (($cachemode eq 'normal') and ($resourcetype eq 'normal')) { # cache the file as is + rename "$filename.tmp", $filename; + } elsif (($cachemode eq 'gzipped') and ($resourcetype eq 'gzipped')) { # cache the file as is + # and update the $cont variabile with deflated content + rename "$filename.tmp", $filename; + $cont = gunzip($filename); + } elsif (($cachemode eq 'normal') and ($resourcetype eq 'gzipped')) { # deflate cache entry + # and update $cont + open(FD, "> $basefname") or die "cannot open $basefname\n"; + $cont = gunzip($filename.".tmp"); + print FD $cont; + close(FD); + unlink "$filename.tmp"; # delete old gzipped file + } elsif (($cachemode eq 'gzipped') and ($resourcetype eq 'normal')) { # compress cache entry + gzip($cont, $basefname.".gz"); + unlink "$filename.tmp"; # delete old uncompressed file + } else { + die "Internal error, unsopported cachemode, resourcetype couple\n"; + } + # $cont now contained uncompressed data + +# } if ($remove_headers) { $cont =~ s/<\?xml [^?]*\?>//sg;