From 2743bf654edf44411fb0c0a64bbe485c6bc5c864 Mon Sep 17 00:00:00 2001 From: Stefano Zacchiroli Date: Tue, 26 Jun 2001 14:48:51 +0000 Subject: [PATCH] * added preliminary support for rdf metadata - "getxml" method return also rdf metadata - "update" method update both urls_of_uris and rdf db - "resolve" method resolve both normal and rdf uris * added sub isRdfUri * added sub resolve * changed VERSION var in configure.in, now in sync with cvs repository version .. :-) --- helm/http_getter/configure.in | 4 +- helm/http_getter/http_getter.pl.in | 172 ++++++++++++++++++++++------- 2 files changed, 134 insertions(+), 42 deletions(-) diff --git a/helm/http_getter/configure.in b/helm/http_getter/configure.in index 13ba5aee7..a678fb2d0 100644 --- a/helm/http_getter/configure.in +++ b/helm/http_getter/configure.in @@ -2,8 +2,8 @@ AC_INIT(http_getter.pl.in) PACKAGE=helm_http_getter MAJOR_VERSION=0 -MINOR_VERSION=0 -MICRO_VERSION=2 +MINOR_VERSION=1 +MICRO_VERSION=45 VERSION=$MAJOR_VERSION.$MINOR_VERSION.$MICRO_VERSION DEFAULT_HELM_LIB_DIR=/usr/local/lib/helm diff --git a/helm/http_getter/http_getter.pl.in b/helm/http_getter/http_getter.pl.in index 7017d1957..1daadef7b 100755 --- a/helm/http_getter/http_getter.pl.in +++ b/helm/http_getter/http_getter.pl.in @@ -30,7 +30,7 @@ my $VERSION = "@VERSION@"; # various variables my ($HELM_LIB_PATH); my $cgi_dir = "@HELM_CGI_DIR@"; -my (%map); +my (%map, %rdf_map); # First of all, let's load HELM configuration use Env; @@ -52,9 +52,16 @@ if (($cachemode ne 'gzipped') and ($cachemode ne 'normal')) { } # -# next require defines: $helm_dir, $html_link, $dtd_dir, $uris_dbm +# next require defines: $helm_dir, $html_link, $dtd_dir, $uris_dbm, $indexname require $HELM_LIB_PATH; +# TEMP: TODO put these vars in configuration file configuration.xml +# +$helm_rdf_dir = "/usr/local/helm/rdf"; +$rdf_dbm = "/usr/local/helm/rdf_urls"; +$rdf_indexname = "rdf_index.txt"; +# + # Let's override the configuration file $style_dir = $ENV{"HELM_STYLE_DIR"} if (defined ($ENV{"HELM_STYLE_DIR"})); $dtd_dir = $ENV{"HELM_DTD_DIR"} if (defined ($ENV{"HELM_DTD_DIR"})); @@ -83,13 +90,22 @@ $myownurl =~ s/http:\/\/(.*):(.*)/$1/; ($myownurl) = gethostbyname($myownurl); $myownurl = "http://".$myownurl.":".$myownport; -tie(%map, 'DB_File', $uris_dbm.".db", O_RDWR, 0664); +tie(%map, 'DB_File', $uris_dbm.".db", O_RDWR, 0664); # open dbs +tie(%rdf_map, 'DB_File', $rdf_dbm.".db", O_RDWR, 0664); + +print "\n"; +print "HTTP Getter $VERSION\n"; # print hello information print "Please contact me at: \n"; +print "\n"; print "helm_dir: $helm_dir\n"; +print "helm_rdf_dir: $helm_rdf_dir\n"; print "style_dir: $style_dir\n"; print "dtd_dir: $dtd_dir\n"; print "urls_of_uris.db: $uris_dbm.db\n"; print "cache mode: $cachemode\n"; +print "indexname: $indexname\n"; +print "rdf_indexname: $rdf_indexname\n"; +print "\n"; $SIG{CHLD} = "IGNORE"; # do not accumulate defunct processes $SIG{USR1} = \&update; # sent by the child to make the parent update @@ -107,9 +123,10 @@ while (my $c = $d->accept) { print "\nUnescaped query: ".$http_query."\n"; + # "getxml" works with rdf uris if ($http_method eq 'GET' and $http_path eq "/getxml") { # finds the uri, url and filename - my $cicuri = $inputuri; + my $answerformat = $cgi->param('format'); my $patch_dtd = $cgi->param('patch_dtd'); $answerformat = "" if (not defined($answerformat)); @@ -124,41 +141,53 @@ while (my $c = $d->accept) { die "Wrong param, patch_dtd must be 'yes' or 'no'\n"; } - my $cicfilename = $cicuri; - $cicfilename =~ s/cic:(.*)/$1/; - $cicfilename =~ s/theory:(.*)/$1/; + my $filename = $inputuri; + if (not isRdfUri($inputuri)) { # standad cic: or theory: uri + $filename =~ s/^cic:(.*)/$1/; + $filename =~ s/^theory:(.*)/$1/; + } else { # rdf uri + $filename =~ s/^(.*)\/\/cic:(.*)/$2/; + $filename =~ s/^(.*)\/\/theory:(.*)/$2/; + } - my $cicurl = $map{$cicuri}; - if (not defined($cicurl)) { - die "uri \"$cicuri\" can't be resolved\n"; + my $url = resolve ($inputuri); # resolve uri in url + if (not defined($url)) { + die "uri \"$inputuri\" can't be resolved\n"; } - my $extension; - if ($cicurl =~ /\.xml$/) { # non gzipped file + + my $extension; # file extension + if ($url =~ /\.xml$/) { # non gzipped file $extension = ".xml"; - } elsif ($cicurl =~ /\.xml\.gz$/) { # gzipped file + } elsif ($url =~ /\.xml\.gz$/) { # gzipped file $extension = ".xml.gz"; } else { # error: unknown extension - die "unexpected extension in url: $cicurl, might be '.xml'". + die "unexpected extension in url: $url, might be '.xml'". "or '.xml.gz'"; } - $cicfilename = $helm_dir.$cicfilename.$extension; - if (!defined($cicurl)) { + if (not isRdfUri ($inputuri)) { + $filename = $helm_dir.$filename.$extension; + } else { + $filename = $helm_rdf_dir.$filename.$extension; + } + + if (!defined($url)) { print "\nNOT FOUND!!!!!\n"; $c->send_error(RC_NOT_FOUND) } else { - print_request("cic",$cicuri,$cicurl,$cicfilename); + print_request("cic",$inputuri,$url,$filename); # Retrieves the file - my $ciccontent = download($patch_dtd,"cic",$cicurl,$cicfilename,$answerformat); + my $ciccontent = download($patch_dtd,"cic",$url,$filename,$answerformat); # Answering the client - if ($answerformat eq "normal") { + if ($answerformat eq "normal") { answer($c,$ciccontent,"text/xml",""); } else { answer($c,$ciccontent,"text/xml","x-gzip"); } } + # "/register" does not work with rdf uris } elsif ($http_method eq 'GET' and $http_path eq "/register") { my $inputurl = $cgi->param('url'); print "Register requested...\n"; @@ -177,8 +206,10 @@ while (my $c = $d->accept) { untie %map; print "done\n"; html_nice_answer($c,"Register done"); + # "/resolve" works with rdf uri } elsif ($http_method eq 'GET' and $http_path eq "/resolve") { - my $outputurl = $map{$inputuri}; + #my $outputurl = $map{$inputuri}; + my $outputurl = resolve($inputuri); $outputurl = "" if (not defined($outputurl)); $cont = "\n\n"; if ($outputurl eq "") { @@ -242,12 +273,13 @@ while (my $c = $d->accept) { die "Could not find XSLT!"; } } elsif ($http_method eq 'GET' and $http_path eq "/update") { - # rebuild urls_of_uris.db + # rebuild urls_of_uris db and rdf uris db print "Update requested...\n"; - mk_urls_of_uris(); + update_dbs(); kill(USR1,getppid()); # signal changes to parent print " done\n"; html_nice_answer($c,"Update done"); + # "/ls" does not work with rdf uris } elsif ($http_method eq 'GET' and $http_path eq "/ls") { # send back keys that begin with a given uri my ($uritype,$uripattern,$uriprefix); @@ -286,9 +318,11 @@ while (my $c = $d->accept) { "'cic:' or 'theory:'"); } } elsif ($http_method eq 'GET' and $http_path eq "/help") { + # help request print "Help requested!\n"; html_nice_answer($c,"HTTP Getter Version: $VERSION"); } elsif ($http_method eq 'GET' and $http_path =~ /\.cgi$/) { + # cgi handling print "CGI requested!\n"; if ($http_path !~ /^\/[^\/]*\.cgi$/) { html_nice_answer($c,"Invalid CGI name: $http_path, ". @@ -309,7 +343,7 @@ while (my $c = $d->accept) { html_nice_answer($c,"CGI '$http_path' not found ". "in CGI dir '$cgi_dir'"); } - } else { + } else { # unsupported request print "\n"; print "INVALID REQUEST!!!!!\n"; print "(PATH: ",$http_path,", "; @@ -327,6 +361,31 @@ while (my $c = $d->accept) { #================================ +sub isRdfUri { # return true if the uri is an rdf uri, false otherwise +# typycal rdf uri: +# helm:rdf/cic:www.cs.unibo.it/helm/rdf/foo_schema//cic:\ +# /Coq/Init/Logic/True_rec.con.types.xml.gz +# +# the format is "helm:rdf/://" +# + my ($uri) = @_; + if ($uri =~ /^helm:rdf\/(.*):?(.*)\/\/(.*)/) { + return 1; + } else { + return 0; + } +} + +sub resolve { # resolve an uri in a url, work both with standard cic: or theory: + # uris and rdf uris + my ($uri) = @_; + if (isRdfUri ($uri)) { # rdf uri, resolve using rdf db + return ($rdf_map{$uri}); + } else { # standard cic: or theory: uri, resolve using std uri db + return ($map{$uri}); + } +} + sub finduris { # find uris for cic and theory trees generation my ($uritype,$uripattern,$format) = @_; my $content = ""; @@ -656,35 +715,45 @@ sub helm_wget { sub update { untie %map; tie(%map, 'DB_File', $uris_dbm.".db", O_RDWR, 0664); + untie %rdf_map; + tie(%rdf_map, 'DB_File', $uris_dbm.".db", O_RDWR, 0664); } -sub mk_urls_of_uris { -#rebuild $uris_dbm.db fetching resource indexes from servers +sub update_dbs { +# rebuild dbs fetching resource indexes from servers. +# Rebuild urls_of_uris db (1) and rdf db (2) my ( - $server, $idxfile, $uri, $url, $comp, $line, + $server, $idxfile, $rdf_idxfile, $uri, $url, $comp, $line, @servers, - %urls_of_uris + %urls_of_uris, %rdf # local proxy of $map and $rdf_map ); untie %map; - if (stat $uris_dbm.".db") { # remove old db file - unlink($uris_dbm.".db") or - die "cannot unlink old db file: $uris_dbm.db\n"; + untie %rdf_map; + foreach my $file ($uris_dbm, $rdf_dbm) { # remove old db file + if (stat $file.".db") { # remove if exists + unlink ($file.".db") or die "can't unlink old db file: $file.db\n"; + } } tie(%urls_of_uris, 'DB_File', $uris_dbm.".db", O_RDWR|O_CREAT, 0664); + tie(%rdf, 'DB_File', $rdf_dbm.".db", O_RDWR|O_CREAT, 0664); open (SRVS, "< $servers_file") or die "cannot open servers file: $servers_file\n"; - @servers = ; + @servers = ; # read all servers close (SRVS); - while ($server = pop @servers) { #cicle on servers in reverse order + + while ($server = pop @servers) { # cicle on servers in _reverse_ order print "processing server: $server ...\n"; chomp $server; - helm_wget($tmp_dir, $server."/".$indexname); #get index + helm_wget($tmp_dir, $server."/".$indexname); # get index + helm_wget($tmp_dir, $server."/".$rdf_indexname); # get rdf index $idxfile = $tmp_dir."/".$indexname; - open (INDEX, "< $idxfile") or + $rdf_idxfile = $tmp_dir."/".$rdf_indexname; + + open (INDEX, "< $idxfile") or # (1) REBUILD URLS_OF_URIS DB die "cannot open temporary index file: $idxfile\n"; - while ($line = ) { #parse index and add entry to urls_of_uris + while ($line = ) { # parse index and add entry to urls_of_uris db chomp $line; ($uri,$comp) = split /[ \t]+/, $line; # build url: @@ -697,12 +766,35 @@ sub mk_urls_of_uris { $url =~ s/theory:/$server/; $urls_of_uris{$uri} = $url; } - close INDEX; - die "cannot unlink temporary file: $idxfile\n" + + open (RDF_INDEX, "< $rdf_idxfile") or # (2) REBUILD RDF DB + die "cannot open temporary rdf index file: $rdf_idxfile\n"; + while ($line = ) { # parse index and add entry to rdf db + chomp $line; + ($uri,$comp) = split /[ \t]+/, $line; # comp is "gz" or nothing + # build url: + if ($comp =~ /gz/) { + $url = $uri . ".xml" . ".gz"; + } else { + $url = $uri . ".xml"; + } + $url =~ s/^helm:rdf(.*)\/\/cic:/$server/; + $url =~ s/^helm:rdf(.*)\/\/theory:/$server/; + $rdf{$uri} = $url; + } + + close INDEX; # close indexes + close RDF_INDEX; + die "cannot unlink temporary file: $idxfile\n" # remove temp files if (unlink $idxfile) != 1; + die "cannot unlink temporary file: $rdf_idxfile\n" + if (unlink $rdf_idxfile) != 1; + } - untie(%urls_of_uris); - tie(%map, 'DB_File', $uris_dbm.".db", O_RDWR, 0664); -} + untie(%urls_of_uris); # untie local proxies + untie(%rdf); + tie(%map, 'DB_File', $uris_dbm.".db", O_RDWR, 0664); # retie global ones + tie(%rdf_map, 'DB_File', $rdf_dbm.".db", O_RDWR, 0664); +} # update_dbs -- 2.39.2