X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2Fhttp_getter%2Fhttp_getter.pl.in;h=fd88194bf417b1860a675cd7c094cd5405cd1cae;hb=53ae76208da0cf33862c64e60b76a441e1d78863;hp=1daadef7b37c68a405bd025d1c51eb4c3b640f4e;hpb=2743bf654edf44411fb0c0a64bbe485c6bc5c864;p=helm.git diff --git a/helm/http_getter/http_getter.pl.in b/helm/http_getter/http_getter.pl.in index 1daadef7b..fd88194bf 100755 --- a/helm/http_getter/http_getter.pl.in +++ b/helm/http_getter/http_getter.pl.in @@ -43,23 +43,23 @@ if (defined ($HELM_LIB_DIR)) { $HELM_LIB_PATH = $DEFAULT_HELM_LIB_DIR."/configuration.pl"; } -# : TODO temporary, move this setting to configuration file -# set the cache mode, may be "gzipped" or "normal" -my $cachemode = $ENV{'HTTP_GETTER_CACHE_MODE'} || 'gzipped'; -if (($cachemode ne 'gzipped') and ($cachemode ne 'normal')) { - die "Invalid HTTP_GETTER_CACHE_MODE environment variable, must be". - "'normal' or 'gzipped'\n"; -} -# - # next require defines: $helm_dir, $html_link, $dtd_dir, $uris_dbm, $indexname require $HELM_LIB_PATH; # TEMP: TODO put these vars in configuration file configuration.xml # -$helm_rdf_dir = "/usr/local/helm/rdf"; -$rdf_dbm = "/usr/local/helm/rdf_urls"; -$rdf_indexname = "rdf_index.txt"; +my $cachemode = $ENV{'HTTP_GETTER_CACHE_MODE'} || + 'gzipped'; +if (($cachemode ne 'gzipped') and ($cachemode ne 'normal')) { + die "Invalid HTTP_GETTER_CACHE_MODE environment variable, must be". + "'normal' or 'gzipped'\n"; +} +my $helm_rdf_dir = $ENV{'HTTP_GETTER_RDF_DIR'} || + "/usr/local/helm/rdf_library"; +my $rdf_dbm = $ENV{'HTTP_GETTER_RDF_DBM'} || + "/usr/local/helm/rdf_urls_of_uris"; +my $rdf_indexname = $ENV{'HTTP_GETTER_RDF_INDEXNAME'} || + "rdf_index.txt"; # # Let's override the configuration file @@ -101,7 +101,8 @@ print "helm_dir: $helm_dir\n"; print "helm_rdf_dir: $helm_rdf_dir\n"; print "style_dir: $style_dir\n"; print "dtd_dir: $dtd_dir\n"; -print "urls_of_uris.db: $uris_dbm.db\n"; +print "urls_of_uris db: $uris_dbm.db\n"; +print "rdf db: $rdf_dbm.db\n"; print "cache mode: $cachemode\n"; print "indexname: $indexname\n"; print "rdf_indexname: $rdf_indexname\n"; @@ -109,6 +110,7 @@ print "\n"; $SIG{CHLD} = "IGNORE"; # do not accumulate defunct processes $SIG{USR1} = \&update; # sent by the child to make the parent update + while (my $c = $d->accept) { if (fork() == 0) { while (my $r = $c->get_request) { @@ -121,7 +123,7 @@ while (my $c = $d->accept) { my $inputuri = $cgi->param('uri'); print "\nRequest: ".$r->url."\n\n"; - print "\nUnescaped query: ".$http_query."\n"; + print "\nUnescaped query: ".$http_query."\n"; # "getxml" works with rdf uris if ($http_method eq 'GET' and $http_path eq "/getxml") { @@ -142,52 +144,56 @@ while (my $c = $d->accept) { } my $filename = $inputuri; + my $prefix; if (not isRdfUri($inputuri)) { # standad cic: or theory: uri $filename =~ s/^cic:(.*)/$1/; $filename =~ s/^theory:(.*)/$1/; } else { # rdf uri + # The "helm:rdf/<<...>>//cic:" prefix is kept, but quoted + # (so that a "helm:rdf/<<...>>//cic:" dir is created + $prefix = $filename; $filename =~ s/^(.*)\/\/cic:(.*)/$2/; $filename =~ s/^(.*)\/\/theory:(.*)/$2/; + $prefix =~ s/\Q$filename\E//; + $prefix =~ s/_/__/g; + $prefix =~ s/\//_/g; + $filename = $prefix.$filename; } my $url = resolve ($inputuri); # resolve uri in url - if (not defined($url)) { - die "uri \"$inputuri\" can't be resolved\n"; - } - - my $extension; # file extension - if ($url =~ /\.xml$/) { # non gzipped file - $extension = ".xml"; - } elsif ($url =~ /\.xml\.gz$/) { # gzipped file - $extension = ".xml.gz"; - } else { # error: unknown extension - die "unexpected extension in url: $url, might be '.xml'". - "or '.xml.gz'"; - } - - if (not isRdfUri ($inputuri)) { - $filename = $helm_dir.$filename.$extension; - } else { - $filename = $helm_rdf_dir.$filename.$extension; - } - - if (!defined($url)) { - print "\nNOT FOUND!!!!!\n"; + if (not defined($url)) { # uri not found in uri2url map + die "NOT FOUND: uri \"$inputuri\" can't be resolved\n"; $c->send_error(RC_NOT_FOUND) - } else { - print_request("cic",$inputuri,$url,$filename); - - # Retrieves the file - my $ciccontent = download($patch_dtd,"cic",$url,$filename,$answerformat); - - # Answering the client - if ($answerformat eq "normal") { - answer($c,$ciccontent,"text/xml",""); - } else { - answer($c,$ciccontent,"text/xml","x-gzip"); - } + } else { # uri found and mapped to url + my $extension; # file extension + if ($url =~ /\.xml$/) { # non gzipped file + $extension = ".xml"; + } elsif ($url =~ /\.xml\.gz$/) { # gzipped file + $extension = ".xml.gz"; + } else { # error: unknown extension + die "unexpected extension in url: $url, might be '.xml'". + "or '.xml.gz'"; + } + + if (not isRdfUri ($inputuri)) { # save in uri std cache dir + $filename = $helm_dir.$filename.$extension; + } else { # save in rdf metadata cache dir + $filename = $helm_rdf_dir."/".$filename.$extension; + } + + print_request($inputuri,$url,$filename); + + # Retrieves the file + my $ciccontent = download($patch_dtd,$url,$filename,$answerformat); + + if ($answerformat eq "normal") { # answer the client in text/xml + answer($c,$ciccontent,"text/xml",""); + } else { # answer the client in text/xml, gzip encoding + answer($c,$ciccontent,"text/xml","x-gzip"); + } } - # "/register" does not work with rdf uris + + # "/register" does not work with rdf uris } elsif ($http_method eq 'GET' and $http_path eq "/register") { my $inputurl = $cgi->param('url'); print "Register requested...\n"; @@ -204,9 +210,10 @@ while (my $c = $d->accept) { kill(USR1,getppid()); # signal changes to parent untie %map; - print "done\n"; + print "done.\n"; html_nice_answer($c,"Register done"); - # "/resolve" works with rdf uri + + # "/resolve" works with rdf uri } elsif ($http_method eq 'GET' and $http_path eq "/resolve") { #my $outputurl = $map{$inputuri}; my $outputurl = resolve($inputuri); @@ -218,6 +225,7 @@ while (my $c = $d->accept) { $cont .= "\n"; } answer($c,$cont,"text/xml",""); + } elsif ($http_method eq 'GET' and $http_path eq "/getdtd") { my $filename = $inputuri; $filename = $dtd_dir."/".$filename; @@ -237,6 +245,7 @@ while (my $c = $d->accept) { } else { die "Could not find DTD!"; } + } elsif ($http_method eq 'GET' and $http_path eq "/getstyleconf") { my $filename = $inputuri; $filename = $style_dir."/config/".$filename; @@ -252,6 +261,7 @@ while (my $c = $d->accept) { } else { die "Could not find Style Configuration File!"; } + } elsif ($http_method eq 'GET' and $http_path eq "/getxslt") { my $filename = $inputuri; $filename = $style_dir."/".$filename; @@ -272,6 +282,8 @@ while (my $c = $d->accept) { } else { die "Could not find XSLT!"; } + + # "/update" works with rdf uri } elsif ($http_method eq 'GET' and $http_path eq "/update") { # rebuild urls_of_uris db and rdf uris db print "Update requested...\n"; @@ -279,13 +291,15 @@ while (my $c = $d->accept) { kill(USR1,getppid()); # signal changes to parent print " done\n"; html_nice_answer($c,"Update done"); - # "/ls" does not work with rdf uris + + # "/ls" does not work with rdf uris } elsif ($http_method eq 'GET' and $http_path eq "/ls") { # send back keys that begin with a given uri my ($uritype,$uripattern,$uriprefix); my $baseuri = $cgi->param('baseuri'); chop $baseuri if ($baseuri =~ /.*\/$/); # remove trailing "/" - my $outype = $cgi->param('format'); # output type, might be 'txt' or 'xml' + # output type, might be 'txt' or 'xml': + my $outype = $cgi->param('format'); $uripattern = $baseuri; $uripattern =~ s/^.*:(.*)/$1/; if ($baseuri =~ /^cic:/) { @@ -317,10 +331,12 @@ while (my $c = $d->accept) { html_nice_answer($c,"Invalid uri , must begin with ". "'cic:' or 'theory:'"); } + } elsif ($http_method eq 'GET' and $http_path eq "/help") { # help request print "Help requested!\n"; html_nice_answer($c,"HTTP Getter Version: $VERSION"); + } elsif ($http_method eq 'GET' and $http_path =~ /\.cgi$/) { # cgi handling print "CGI requested!\n"; @@ -343,6 +359,7 @@ while (my $c = $d->accept) { html_nice_answer($c,"CGI '$http_path' not found ". "in CGI dir '$cgi_dir'"); } + } else { # unsupported request print "\n"; print "INVALID REQUEST!!!!!\n"; @@ -350,8 +367,10 @@ while (my $c = $d->accept) { print "QUERY: ",$http_query,")\n"; $c->send_error(RC_FORBIDDEN) } + print "\nRequest solved: ".$r->url."\n\n"; } + $c->close; undef($c); print "\nCONNECTION CLOSED\n\n"; @@ -364,12 +383,12 @@ while (my $c = $d->accept) { sub isRdfUri { # return true if the uri is an rdf uri, false otherwise # typycal rdf uri: # helm:rdf/cic:www.cs.unibo.it/helm/rdf/foo_schema//cic:\ -# /Coq/Init/Logic/True_rec.con.types.xml.gz +# /Coq/Init/Logic/True_rec.con.types # # the format is "helm:rdf/://" # my ($uri) = @_; - if ($uri =~ /^helm:rdf\/(.*):?(.*)\/\/(.*)/) { + if ($uri =~ /^helm:rdf(.*):(.*)\/\/(.*)/) { return 1; } else { return 0; @@ -378,10 +397,16 @@ sub isRdfUri { # return true if the uri is an rdf uri, false otherwise sub resolve { # resolve an uri in a url, work both with standard cic: or theory: # uris and rdf uris + print "RESOLVE subroutine\n"; my ($uri) = @_; + print "GIVEN URI: \"$uri\"\n"; if (isRdfUri ($uri)) { # rdf uri, resolve using rdf db + print "IS A RDF URI\n"; + print "I WILL RETURN '$rdf_map{$uri}'\n"; return ($rdf_map{$uri}); } else { # standard cic: or theory: uri, resolve using std uri db + print "IS NOT A RDF URI\n"; + print "I WILL RETURN '$map{$uri}'\n"; return ($map{$uri}); } } @@ -392,7 +417,7 @@ sub finduris { # find uris for cic and theory trees generation my ($uri,$localpart,$basepart,$dirname,$suffix,$flags,$key); my (@itemz,@already_pushed_dir); my (%objects,%dirs); # map uris to suffixes' flags - #my $debug=1; # for debug + my $debug=1; # for debug print "FINDURIS, uritype: $uritype, uripattern: $uripattern, ". "format: $format\n\n" if defined($debug); @@ -528,10 +553,12 @@ sub mkdirs sub print_request { - my ($str,$uri,$url,$filename) = @_; - print $str."uri: $uri\n"; - print $str."url: $url\n"; - print $str."filename: $filename\n\n"; + my ($uri,$url,$filename) = @_; + print "\n"; + print "uri: $uri\n"; + print "url: $url\n"; + print "filename: $filename\n\n"; + print "\n"; } sub callback @@ -572,10 +599,11 @@ sub gzip { # gzip the content argument and save it to filename argument } sub download { - my ($patch_dtd,$str,$url,$filename,$format) = @_; + my ($patch_dtd,$url,$filename,$format) = @_; my ($gz, $buffer); - #my $debug=1; # for DEBUG only +# print "DOWNLOAD subs receives url: \"$url\"\n"; +# print "DOWNLOAD subs receives filename: \"$filename\"\n"; my $resourcetype; # retrieve mode: "normal" (.xml) or "gzipped" (.xml.gz) if ($filename =~ /\.xml$/) { # set retrieve mode @@ -597,7 +625,7 @@ sub download { } if ($localfname ne "") { # we already have local copy of requested file # check both possible cache entry: gzipped or normal - print "Using local copy for the $str file\n"; + print "Using local copy.\n"; if ($localfname =~ /\.xml\.gz$/) { # deflating cached file and return it $cont = gunzip($localfname); } elsif ($localfname =~ /\.xml$/) { # just return cached file @@ -609,7 +637,7 @@ sub download { ."must end with '.gz' or '.xml.gz'\n"; } } else { # download file from net - print "Downloading the $str file\n"; # download file + print "Downloading file ...\n"; # download file my $ua = LWP::UserAgent->new; my $request = HTTP::Request->new(GET => "$url"); my $response = $ua->request($request, \&callback); @@ -617,18 +645,19 @@ sub download { # cache retrieved file to disk # TODO: inefficent, I haven't yet undestood how to deflate # in memory gzipped file, without call "gzopen" - print "Storing the $str file\n"; - print "Making dirs ...\n" if (defined($debug)); + print "Storing file ...\n"; +# print "Making dirs ...\n"; mkdirs($filename); - print "Opening tmp file for writing ...\n" if (defined($debug)); +# print "Opening tmp file for writing ...\n"; open(FD, ">".$filename.".tmp") or die "Cannot open $filename.tmp\n"; - print "Writing on tmp file ...\n" if (defined($debug)); +# print "Writing on tmp file ...\n"; print FD $cont; - print "Closing tmp file ...\n" if (defined($debug)); +# print "Closing tmp file ...\n"; close(FD); # handle cache conversion normal->gzipped or gzipped->normal as user choice - print "cachemode:$cachemode, resourcetype:$resourcetype\n" if (defined($debug)); + print "cachemode:$cachemode, resourcetype:$resourcetype\n" + if (defined($debug)); if (($cachemode eq 'normal') and ($resourcetype eq 'normal')) { # cache the file as is rename "$filename.tmp", $filename; @@ -713,10 +742,11 @@ sub helm_wget { } sub update { +# retie dbs untie %map; tie(%map, 'DB_File', $uris_dbm.".db", O_RDWR, 0664); untie %rdf_map; - tie(%rdf_map, 'DB_File', $uris_dbm.".db", O_RDWR, 0664); + tie(%rdf_map, 'DB_File', $rdf_dbm.".db", O_RDWR, 0664); } sub update_dbs { @@ -798,3 +828,6 @@ sub update_dbs { tie(%rdf_map, 'DB_File', $rdf_dbm.".db", O_RDWR, 0664); } # update_dbs +# vim modline: do not remove! +# vim: set ft=perl: +