X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2Fhttp_getter%2Fhttp_getter.pl.in;h=89f496ac7bc201e6cadc50bb09812b347eace3cb;hb=083c36a4acd76127530b9af3d4c6c311f447d79b;hp=e8adb3bc02f321d0c3ca1adc05b4f894749c0758;hpb=8e1a7e55cbc7750446f0a7ab3d071190594243fb;p=helm.git diff --git a/helm/http_getter/http_getter.pl.in b/helm/http_getter/http_getter.pl.in index e8adb3bc0..89f496ac7 100755 --- a/helm/http_getter/http_getter.pl.in +++ b/helm/http_getter/http_getter.pl.in @@ -23,35 +23,65 @@ # For details, see the HELM World-Wide-Web page, # http://cs.unibo.it/helm/. +my $VERSION = "@VERSION@"; + # First of all, let's load HELM configuration use Env; -my $HELM_LIBRARY_DIR = $ENV{"HELM_LIBRARY_DIR"}; +my $HELM_LIB_DIR = $ENV{"HELM_LIB_DIR"}; # this should be the only fixed constant -my $DEFAULT_HELM_LIBRARY_DIR = "@DEFAULT_HELM_LIBRARY_DIR@"; -if (defined ($HELM_LIBRARY_DIR)) { - $HELM_LIBRARY_PATH = $HELM_LIBRARY_DIR."/configuration.pl"; +my $DEFAULT_HELM_LIB_DIR = "@HELM_LIB_DIR@"; +if (defined ($HELM_LIB_DIR)) { + $HELM_LIB_PATH = $HELM_LIB_DIR."/configuration.pl"; } else { - $HELM_LIBRARY_PATH = $DEFAULT_HELM_LIBRARY_DIR."/configuration.pl"; + $HELM_LIB_PATH = $DEFAULT_HELM_LIB_DIR."/configuration.pl"; +} + +# Let's override the configuration file +$style_dir = $ENV{"HELM_STYLE_DIR"} if (defined ($ENV{"HELM_STYLE_DIR"})); +$dtd_dir = $ENV{"HELM_DTD_DIR"} if (defined ($ENV{"HELM_DTD_DIR"})); + +# : TODO temporary, move this setting to configuration file +# set the cache mode, may be "gzipped" or "normal" +my $cachemode = $ENV{'HTTP_GETTER_CACHE_MODE'} || 'gzipped'; +if (($cachemode ne 'gzipped') and ($cachemode ne 'normal')) { + die "Invalid HTTP_GETTER_CACHE_MODE environment variable, must be". + "'normal' or 'gzipped'\n"; } +# + # next require defines: $helm_dir, $html_link, $dtd_dir, $uris_dbm -require $HELM_LIBRARY_PATH; +require $HELM_LIB_PATH; use HTTP::Daemon; use HTTP::Status; use HTTP::Request; use LWP::UserAgent; use DB_File; +use Compress::Zlib; #CSC: mancano i controlli sulle condizioni di errore di molte funzioni #CSC: ==> non e' robusto #CSC: altra roba da sistemare segnata con CSC my $d = new HTTP::Daemon LocalPort => 8081; +my $myownurl = $d->url; + +# Let's patch the returned URL +$myownurl =~ s/\/$//; # chop the final slash +my $myownport = $myownurl; +$myownport =~ s/http:\/\/(.*):(.*)/$2/; +$myownurl =~ s/http:\/\/(.*):(.*)/$1/; +($myownurl) = gethostbyname($myownurl); +$myownurl = "http://".$myownurl.":".$myownport; + tie(%map, 'DB_File', $uris_dbm.".db", O_RDONLY, 0664); -print "Please contact me at: url, ">\n"; +print "Please contact me at: \n"; print "helm_dir: $helm_dir\n"; +print "style_dir: $style_dir\n"; print "dtd_dir: $dtd_dir\n"; print "urls_of_uris.db: $uris_dbm.db\n"; +print "cache mode: $cachemode\n"; + $SIG{CHLD} = "IGNORE"; # do not accumulate defunct processes $SIG{USR1} = \&update; # sent by the child to make the parent update while (my $c = $d->accept) { @@ -72,9 +102,19 @@ while (my $c = $d->accept) { my $cicfilename = $cicuri; $cicfilename =~ s/cic:(.*)/$1/; $cicfilename =~ s/theory:(.*)/$1/; - $cicfilename = $helm_dir.$cicfilename.".xml"; my $cicurl = $map{$cicuri}; + my $extension; + if ($cicurl =~ /\.xml$/) { # non gzipped file + $extension = ".xml"; + } elsif ($cicurl =~ /\.xml\.gz$/) { # gzipped file + $extension = ".xml.gz"; + } else { # error: unknown extension + die "unexpected extension in url: $cicurl, might be '.xml'". + "or '.xml.gz'"; + } + $cicfilename = $helm_dir.$cicfilename.$extension; + if (!defined($cicurl)) { print "\nNOT FOUND!!!!!\n"; $c->send_error(RC_NOT_FOUND) @@ -114,13 +154,41 @@ while (my $c = $d->accept) { $cicfilename =~ s/theory:(.*)/$1/; $cicfilename = $helm_dir.$cicfilename; - my $typesfilename = $cicfilename.".types.xml" if $typesuri; - my $annfilename = $cicfilename.$annsuffix.".xml" if $annuri; - $cicfilename .= ".xml"; - my $cicurl = $map{$cicuri}; - my $typesurl = $map{$typesuri} if $typesuri; - my $annurl = $map{$annuri} if $annuri; + my $typesurl = $map{$typesuri} if (defined($typesuri)); + my $annurl = $map{$annuri} if (defined($annuri)); + my ($cicext, $typesext, $annext); + if ($cicurl =~ /\.xml$/) { # normal file + $cicext = ".xml"; + } elsif ($cicurl =~ /\.xml\.gz$/) { # gzipped file + $cicext = ".xml.gz"; + } else { + die "unexpected extension in url: $cicurl;". + "might be '.xml' or '.xml.gz'"; + } + if (defined($typesuri)) { # extension selection for types file + if ($typesurl =~ /\.xml$/) { # normal file + $typesext = ".types.xml"; + } elsif ($typesurl =~ /\.xml\.gz$/) { # gzipped file + $typesext = ".types.xml.gz"; + } else { + die "unexpected extension in url: $typesurl;". + "might be '.xml' or '.xml.gz'"; + } + } + if (defined($annuri)) { # extension selection for annotation file + if ($annurl =~ /\.xml$/) { # normal file + $annext = ".xml"; + } elsif ($annurl =~ /\.xml\.gz$/) { # gzipped file + $annext = ".xml.gz"; + } else { + die "unexpected extension in url: $annurl". + "might be '.xml' or '.xml.gz'"; + } + } + my $typesfilename = $cicfilename.$typesext if $typesuri; + my $annfilename = $cicfilename.$annsuffix.$annext if $annuri; + $cicfilename .= $cicext; if (!defined($cicurl) || (!defined($typesurl) && $typesuri) || @@ -163,14 +231,54 @@ EOT print "DTD: $inputuri ==> ($filename)\n"; if (stat($filename)) { print "Using local copy\n"; - open(FD, $filename); + open(FD, $filename) or die "Cannot open $filename\n"; $cont = ""; - while() { $cont .= $_; } + while() { + # Vary bad heuristic for substituion of absolute URLs + # for relative ones + s/ENTITY (.*) SYSTEM\s+"/ENTITY $1 SYSTEM "$myownurl\/getdtd?uri=/g; + $cont .= $_; + } close(FD); answer($c,$cont); } else { die "Could not find DTD!"; } + } elsif ($http_method eq 'GET' and $http_path eq "/getstyleconf") { + my $filename = $inputuri; + $filename = $style_dir."/config/".$filename; + if (stat($filename)) { + open(FD, $filename) or die "Cannot open $filename\n"; + $cont = ""; + while() { + s/DOCTYPE (.*) SYSTEM\s+"/DOCTYPE $1 SYSTEM "$myownurl\/getstyleconf?uri=/g; + $cont .= $_; + } + close(FD); + answer($c,$cont); + } else { + die "Could not find Style Configuration File!"; + } + } elsif ($http_method eq 'GET' and $http_path eq "/getxslt") { + my $filename = $inputuri; + $filename = $style_dir."/".$filename; + print "XSLT: $inputuri ==> ($filename)\n"; + if (stat($filename)) { + print "Using local copy\n"; + open(FD, $filename) or die "Cannot open $filename\n"; + $cont = ""; + while() { + # Vary bad heuristic for substituion of absolute URLs + # for relative ones + s/xsl:import\s+href="/xsl:import href="$myownurl\/getxslt?uri=/g ; + s/xsl:include\s+href="/xsl:include href="$myownurl\/getxslt?uri=/g ; + $cont .= $_; + } + close(FD); + answer($c,$cont); + } else { + die "Could not find XSLT!"; + } } elsif ($http_method eq 'GET' and $http_path eq "/conf") { my $quoted_html_link = $html_link; $quoted_html_link =~ s/&/&/g; @@ -187,6 +295,9 @@ EOT kill(USR1,getppid()); print " done\n"; answer($c,"

Update done

"); + } elsif ($http_method eq 'GET' and $http_path eq "/version") { + print "Version requested!"; + answer($c,"

HTTP Getter Version ".$VERSION."

"); } else { print "\nINVALID REQUEST!!!!!\n"; $c->send_error(RC_FORBIDDEN) @@ -231,26 +342,112 @@ sub callback $cont .= $data; } +sub gunzip { # gunzip a file and return the deflated content + my ($filename) = @_; + + my ($gz, $buffer, $cont); + + print "deflating $filename ...\n"; + $gz = gzopen($filename, "r") + or die "Cannot open gzip'ed file $filename: $gzerrno"; + $cont = ""; + while ( $gz->gzread($buffer) > 0 ) { + $cont .= $buffer; + } + die "Error while reading : $gzerrno\n" if $gzerrno != Z_STREAM_END ; + $gz->gzclose(); + + return $cont; +} + +sub gzip { # gzip the content argument and save it to filename argument + my ($cont, $filename) = @_; + + my ($gz, $cont); + + $gz = gzopen($filename, "w") + or die "Cannot gzopen for writing file $filename: $gzerrno"; + $gz->gzwrite($cont) or die "error writing: $gzerrno\n" ; + $gz->gzclose(); +} + sub download { my ($remove_headers,$str,$url,$filename) = @_; - $cont = ""; # modified by side-effect by the callback function - if (stat($filename)) { - print "Using local copy for the $str file\n"; - open(FD, $filename); - while() { $cont .= $_; } - close(FD); + my ($gz, $buffer); + + my $resourcetype; # retrieve mode: "normal" (.xml) or "gzipped" (.xml.gz) + if ($filename =~ /\.xml$/) { # set retrieve mode + $resourcetype = "normal"; + } elsif ($filename =~ /\.xml\.gz$/) { + $resourcetype = "gzipped"; } else { - print "Downloading the $str file\n"; + die "Unsupported download extension, might be '.gz' or '.xml'\n"; + } + my $basefname = $filename; + $basefname =~ s/\.gz$//; # get base resource name removing trailing .gz + $cont = ""; # modified by side-effect by the callback function + + my $localfname=""; + if (stat($basefname)) { + $localfname=$basefname; + } elsif (stat($basefname.".gz")) { + $localfname=$basefname.".gz"; + } + if ($localfname ne "") { # we already have local copy of requested file + # check both possible cache entry: gzipped or normal + print "Using local copy for the $str file\n"; + if ($localfname =~ /\.xml\.gz$/) { # deflating cached file and return it + $cont = gunzip($localfname); + } elsif ($localfname =~ /\.xml$/) { # just return cached file + open(FD, $localfname) or die "Cannot open $localfname"; + while() { $cont .= $_; } + close(FD); + } else { # error + die "Internal error: unexpected file name $localfname," + ."must end with '.gz' or '.xml.gz'\n"; + } + } else { # download file from net + print "Downloading the $str file\n"; # download file $ua = LWP::UserAgent->new; $request = HTTP::Request->new(GET => "$url"); $response = $ua->request($request, \&callback); - print "Storing the $str file\n"; - mkdirs($filename); - open(FD, ">".$filename); - print FD $cont; - close(FD); + # cache retrieved file to disk +# TODO: inefficent, I haven't yet undestood how to deflate +# in memory gzipped file, without call "gzopen" + print "Storing the $str file\n"; + mkdirs($filename); + open(FD, ">".$filename.".tmp") or die "Cannot open $filename.tmp\n"; + print FD $cont; + close(FD); + + # handle cache conversion normal->gzipped or gzipped->normal as user choice + if (($cachemode eq 'normal') and ($resourcetype eq 'normal')) { + # cache the file as is + rename "$filename.tmp", $filename; + } elsif (($cachemode eq 'gzipped') and ($resourcetype eq 'gzipped')) { + # cache the file as is + # and update the $cont variabile with deflated content + rename "$filename.tmp", $filename; + $cont = gunzip($filename); + } elsif (($cachemode eq 'normal') and ($resourcetype eq 'gzipped')) { + # deflate cache entry + # and update $cont + open(FD, "> $basefname") or die "cannot open $basefname\n"; + $cont = gunzip($filename.".tmp"); + print FD $cont; + close(FD); + unlink "$filename.tmp"; # delete old gzipped file + } elsif (($cachemode eq 'gzipped') and ($resourcetype eq 'normal')) { + # compress cache entry + gzip($cont, $basefname.".gz"); + unlink "$filename.tmp"; # delete old uncompressed file + } else { + die "Internal error, unsopported cachemode, resourcetype couple\n"; + } + # $cont now contained uncompressed data + } if ($remove_headers) { $cont =~ s/<\?xml [^?]*\?>//sg;