X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2Fhttp_getter%2Fhttp_getter.pl.in;h=fd72b75094014106160cd3a8ecae90176e8eb2ce;hb=9632260a013dabd108d9965d6faffd99246e1f47;hp=f0392de2caa9acd9cbafb1b4805ba6f71e28e1c3;hpb=d71338dee9f65981e827bca3b4d6d79c0197b4d9;p=helm.git diff --git a/helm/http_getter/http_getter.pl.in b/helm/http_getter/http_getter.pl.in index f0392de2c..fd72b7509 100755 --- a/helm/http_getter/http_getter.pl.in +++ b/helm/http_getter/http_getter.pl.in @@ -27,12 +27,26 @@ use Env; my $HELM_LIB_DIR = $ENV{"HELM_LIB_DIR"}; # this should be the only fixed constant -my $DEFAULT_HELM_LIB_DIR = "@DEFAULT_HELM_LIB_DIR@"; +my $DEFAULT_HELM_LIB_DIR = "@HELM_LIB_DIR@"; if (defined ($HELM_LIB_DIR)) { $HELM_LIB_PATH = $HELM_LIB_DIR."/configuration.pl"; } else { $HELM_LIB_PATH = $DEFAULT_HELM_LIB_DIR."/configuration.pl"; } + +# Let's override the configuration file +$style_dir = $ENV{"HELM_STYLE_DIR"} if (defined ($ENV{"HELM_STYLE_DIR"})); +$dtd_dir = $ENV{"HELM_DTD_DIR"} if (defined ($ENV{"HELM_DTD_DIR"})); + +# : TODO temporary, move this setting to configuration file +# set the cache mode, may be "gzipped" or "normal" +my $cachemode = $ENV{'HTTP_GETTER_CACHE_MODE'} || 'gzipped'; +if (($cachemode ne 'gzipped') and ($cachemode ne 'normal')) { + die "Invalid HTTP_GETTER_CACHE_MODE environment variable, must be". + "'normal' or 'gzipped'\n"; +} +# + # next require defines: $helm_dir, $html_link, $dtd_dir, $uris_dbm require $HELM_LIB_PATH; @@ -48,11 +62,24 @@ use Compress::Zlib; #CSC: altra roba da sistemare segnata con CSC my $d = new HTTP::Daemon LocalPort => 8081; +my $myownurl = $d->url; + +# Let's patch the returned URL +$myownurl =~ s/\/$//; # chop the final slash +my $myownport = $myownurl; +$myownport =~ s/http:\/\/(.*):(.*)/$2/; +$myownurl =~ s/http:\/\/(.*):(.*)/$1/; +($myownurl) = gethostbyname($myownurl); +$myownurl = "http://".$myownurl.":".$myownport; + tie(%map, 'DB_File', $uris_dbm.".db", O_RDONLY, 0664); -print "Please contact me at: url, ">\n"; +print "Please contact me at: \n"; print "helm_dir: $helm_dir\n"; +print "style_dir: $style_dir\n"; print "dtd_dir: $dtd_dir\n"; print "urls_of_uris.db: $uris_dbm.db\n"; +print "cache mode: $cachemode\n"; + $SIG{CHLD} = "IGNORE"; # do not accumulate defunct processes $SIG{USR1} = \&update; # sent by the child to make the parent update while (my $c = $d->accept) { @@ -73,21 +100,19 @@ while (my $c = $d->accept) { my $cicfilename = $cicuri; $cicfilename =~ s/cic:(.*)/$1/; $cicfilename =~ s/theory:(.*)/$1/; -# $cicfilename = $helm_dir.$cicfilename.".xml"; -# + my $cicurl = $map{$cicuri}; - my $extension; - if ($cicurl =~ /\.xml$/) { # non gzipped file - $extension = ".xml"; - } elsif ($cicurl =~ /\.xml\.gz$/) { # gzipped file - $extension = ".xml.gz"; - } else { # error: unknown extension - die "unexpected extension in url: $cicurl, might be '.xml' or '.xml.gz'"; - } + my $extension; + if ($cicurl =~ /\.xml$/) { # non gzipped file + $extension = ".xml"; + } elsif ($cicurl =~ /\.xml\.gz$/) { # gzipped file + $extension = ".xml.gz"; + } else { # error: unknown extension + die "unexpected extension in url: $cicurl, might be '.xml'". + "or '.xml.gz'"; + } $cicfilename = $helm_dir.$cicfilename.$extension; - #my $cicurl = $map{$cicuri}; -# if (!defined($cicurl)) { print "\nNOT FOUND!!!!!\n"; $c->send_error(RC_NOT_FOUND) @@ -127,45 +152,41 @@ while (my $c = $d->accept) { $cicfilename =~ s/theory:(.*)/$1/; $cicfilename = $helm_dir.$cicfilename; -# my $typesfilename = $cicfilename.".types.xml" if $typesuri; -# my $annfilename = $cicfilename.$annsuffix.".xml" if $annuri; -# $cicfilename .= ".xml"; - -# my $cicurl = $map{$cicuri}; my $typesurl = $map{$typesuri} if (defined($typesuri)); my $annurl = $map{$annuri} if (defined($annuri)); - my ($cicext, $typesext, $annext); - if ($cicurl =~ /\.xml$/) { # normal file - $cicext = ".xml"; - } elsif ($cicurl =~ /\.xml\.gz$/) { # gzipped file - $cicext = ".xml.gz"; - } else { - die "unexpected extension in url: $cicurl; might be '.xml' or '.xml.gz'"; - } - if (defined($typesuri)) { # extension selection for types file - if ($typesurl =~ /\.xml$/) { # normal file - $typesext = ".types.xml"; - } elsif ($typesurl =~ /\.xml\.gz$/) { # gzipped file - $typesext = ".types.xml.gz"; - } else { - die "unexpected extension in url: $typesurl; might be '.xml' or '.xml.gz'"; - } - } - if (defined($annuri)) { # extension selection for annotation file - if ($annurl =~ /\.xml$/) { # normal file - $annext = ".xml"; - } elsif ($annurl =~ /\.xml\.gz$/) { # gzipped file - $annext = ".xml.gz"; - } else { - die "unexpected extension in url: $annurl might be '.xml' or '.xml.gz'"; - } - } + my ($cicext, $typesext, $annext); + if ($cicurl =~ /\.xml$/) { # normal file + $cicext = ".xml"; + } elsif ($cicurl =~ /\.xml\.gz$/) { # gzipped file + $cicext = ".xml.gz"; + } else { + die "unexpected extension in url: $cicurl;". + "might be '.xml' or '.xml.gz'"; + } + if (defined($typesuri)) { # extension selection for types file + if ($typesurl =~ /\.xml$/) { # normal file + $typesext = ".types.xml"; + } elsif ($typesurl =~ /\.xml\.gz$/) { # gzipped file + $typesext = ".types.xml.gz"; + } else { + die "unexpected extension in url: $typesurl;". + "might be '.xml' or '.xml.gz'"; + } + } + if (defined($annuri)) { # extension selection for annotation file + if ($annurl =~ /\.xml$/) { # normal file + $annext = ".xml"; + } elsif ($annurl =~ /\.xml\.gz$/) { # gzipped file + $annext = ".xml.gz"; + } else { + die "unexpected extension in url: $annurl". + "might be '.xml' or '.xml.gz'"; + } + } my $typesfilename = $cicfilename.$typesext if $typesuri; my $annfilename = $cicfilename.$annsuffix.$annext if $annuri; $cicfilename .= $cicext; -# - if (!defined($cicurl) || (!defined($typesurl) && $typesuri) || @@ -208,14 +229,54 @@ EOT print "DTD: $inputuri ==> ($filename)\n"; if (stat($filename)) { print "Using local copy\n"; - open(FD, $filename); + open(FD, $filename) or die "Cannot open $filename\n"; $cont = ""; - while() { $cont .= $_; } + while() { + # Vary bad heuristic for substituion of absolute URLs + # for relative ones + s/ENTITY (.*) SYSTEM\s+"/ENTITY $1 SYSTEM "$myownurl\/getdtd?uri=/g; + $cont .= $_; + } close(FD); answer($c,$cont); } else { die "Could not find DTD!"; } + } elsif ($http_method eq 'GET' and $http_path eq "/getstyleconf") { + my $filename = $inputuri; + $filename = $style_dir."/config/".$filename; + if (stat($filename)) { + open(FD, $filename) or die "Cannot open $filename\n"; + $cont = ""; + while() { + s/DOCTYPE (.*) SYSTEM\s+"/DOCTYPE $1 SYSTEM "$myownurl\/getstyleconf?uri=/g; + $cont .= $_; + } + close(FD); + answer($c,$cont); + } else { + die "Could not find Style Configuration File!"; + } + } elsif ($http_method eq 'GET' and $http_path eq "/getxslt") { + my $filename = $inputuri; + $filename = $style_dir."/".$filename; + print "XSLT: $inputuri ==> ($filename)\n"; + if (stat($filename)) { + print "Using local copy\n"; + open(FD, $filename) or die "Cannot open $filename\n"; + $cont = ""; + while() { + # Vary bad heuristic for substituion of absolute URLs + # for relative ones + s/xsl:import\s+href="/xsl:import href="$myownurl\/getxslt?uri=/g ; + s/xsl:include\s+href="/xsl:include href="$myownurl\/getxslt?uri=/g ; + $cont .= $_; + } + close(FD); + answer($c,$cont); + } else { + die "Could not find XSLT!"; + } } elsif ($http_method eq 'GET' and $http_path eq "/conf") { my $quoted_html_link = $html_link; $quoted_html_link =~ s/&/&/g; @@ -276,65 +337,112 @@ sub callback $cont .= $data; } +sub gunzip { # gunzip a file and return the deflated content + my ($filename) = @_; + + my ($gz, $buffer, $cont); + + print "deflating $filename ...\n"; + $gz = gzopen($filename, "r") + or die "Cannot open gzip'ed file $filename: $gzerrno"; + $cont = ""; + while ( $gz->gzread($buffer) > 0 ) { + $cont .= $buffer; + } + die "Error while reading : $gzerrno\n" if $gzerrno != Z_STREAM_END ; + $gz->gzclose(); + + return $cont; +} + +sub gzip { # gzip the content argument and save it to filename argument + my ($cont, $filename) = @_; + + my ($gz, $cont); + + $gz = gzopen($filename, "w") + or die "Cannot gzopen for writing file $filename: $gzerrno"; + $gz->gzwrite($cont) or die "error writing: $gzerrno\n" ; + $gz->gzclose(); +} + sub download { my ($remove_headers,$str,$url,$filename) = @_; -# my ($gz, $buffer); - my $mode; # retrieve mode: "normal" (.xml) or "gzipped" (.xml.gz) + my $resourcetype; # retrieve mode: "normal" (.xml) or "gzipped" (.xml.gz) if ($filename =~ /\.xml$/) { # set retrieve mode - $mode = "normal"; + $resourcetype = "normal"; } elsif ($filename =~ /\.xml\.gz$/) { - $mode = "gzipped"; + $resourcetype = "gzipped"; } else { die "Unsupported download extension, might be '.gz' or '.xml'\n"; } -# + my $basefname = $filename; + $basefname =~ s/\.gz$//; # get base resource name removing trailing .gz $cont = ""; # modified by side-effect by the callback function - if (stat($filename)) { # we already have local copy of requested file - print "Using local copy for the $str file\n"; -# - if ($mode eq "gzipped") { # deflating cached file - print "deflating local file ...\n"; - $gz = gzopen($filename, "r") or die "Cannot open gzip'ed file $filename: $gzerrno"; - while ( $gz->gzread($buffer) > 0 ) { - $cont .= $buffer; - } - die "Error while reading : $gzerrno\n" if $gzerrno != Z_STREAM_END ; - $gz->gzclose(); - } elsif ($mode eq "normal") { # return cached file - open(FD, $filename); - while() { $cont .= $_; } - close(FD); - } else { # error - die "Internal error: unexpected mode: $mode, might be 'normal' or 'gzipped'"; - } -# - } else { # download file from net - print "Downloading the $str file\n"; # download file + + my $localfname=""; + if (stat($basefname)) { + $localfname=$basefname; + } elsif (stat($basefname.".gz")) { + $localfname=$basefname.".gz"; + } + if ($localfname ne "") { # we already have local copy of requested file + # check both possible cache entry: gzipped or normal + print "Using local copy for the $str file\n"; + if ($localfname =~ /\.xml\.gz$/) { # deflating cached file and return it + $cont = gunzip($localfname); + } elsif ($localfname =~ /\.xml$/) { # just return cached file + open(FD, $localfname) or die "Cannot open $localfname"; + while() { $cont .= $_; } + close(FD); + } else { # error + die "Internal error: unexpected file name $localfname," + ."must end with '.gz' or '.xml.gz'\n"; + } + } else { # download file from net + print "Downloading the $str file\n"; # download file $ua = LWP::UserAgent->new; $request = HTTP::Request->new(GET => "$url"); $response = $ua->request($request, \&callback); - print "Storing the $str file\n"; - mkdirs($filename); - open(FD, ">".$filename); - print FD $cont; - close(FD); -# - if ($mode eq "gzipped") { # deflate gzipped retrieved file - print "deflating just retrieved file ...\n"; - $cont = ""; # reset $cont, cause $cont actually contain gzipped data - $gz = gzopen($filename, "r") or die "Cannot open gzip'ed file $filename: $gzerrno"; - while ( $gz->gzread($buffer) > 0 ) { - $cont .= $buffer; - } - die "Error while reading : $gzerrno\n" if $gzerrno != Z_STREAM_END ; - $gz->gzclose(); - # now $cont contain deflated, clear text data - } -# + # cache retrieved file to disk +# TODO: inefficent, I haven't yet undestood how to deflate +# in memory gzipped file, without call "gzopen" + print "Storing the $str file\n"; + mkdirs($filename); + open(FD, ">".$filename.".tmp") or die "Cannot open $filename.tmp\n"; + print FD $cont; + close(FD); + + # handle cache conversion normal->gzipped or gzipped->normal as user choice + if (($cachemode eq 'normal') and ($resourcetype eq 'normal')) { + # cache the file as is + rename "$filename.tmp", $filename; + } elsif (($cachemode eq 'gzipped') and ($resourcetype eq 'gzipped')) { + # cache the file as is + # and update the $cont variabile with deflated content + rename "$filename.tmp", $filename; + $cont = gunzip($filename); + } elsif (($cachemode eq 'normal') and ($resourcetype eq 'gzipped')) { + # deflate cache entry + # and update $cont + open(FD, "> $basefname") or die "cannot open $basefname\n"; + $cont = gunzip($filename.".tmp"); + print FD $cont; + close(FD); + unlink "$filename.tmp"; # delete old gzipped file + } elsif (($cachemode eq 'gzipped') and ($resourcetype eq 'normal')) { + # compress cache entry + gzip($cont, $basefname.".gz"); + unlink "$filename.tmp"; # delete old uncompressed file + } else { + die "Internal error, unsopported cachemode, resourcetype couple\n"; + } + # $cont now contained uncompressed data + } if ($remove_headers) { $cont =~ s/<\?xml [^?]*\?>//sg;