]> matita.cs.unibo.it Git - helm.git/commitdiff
New framework for metadata generation.
authorClaudio Sacerdoti Coen <claudio.sacerdoticoen@unibo.it>
Mon, 23 Feb 2004 13:12:40 +0000 (13:12 +0000)
committerClaudio Sacerdoti Coen <claudio.sacerdoticoen@unibo.it>
Mon, 23 Feb 2004 13:12:40 +0000 (13:12 +0000)
NOTE: from this release on the metadata are inserted into a MySQL DB.

14 files changed:
helm/metadata/Makefile [new file with mode: 0644]
helm/metadata/dc/Makefile [new file with mode: 0644]
helm/metadata/dc/fill_db.pl [new file with mode: 0755]
helm/metadata/dc/sql/create_dc_tables.sql [new file with mode: 0644]
helm/metadata/dc/sql/drop_dc_tables.sql [new file with mode: 0644]
helm/metadata/extractor/.cvsignore [new file with mode: 0644]
helm/metadata/extractor/Makefile [new file with mode: 0644]
helm/metadata/extractor/README [new file with mode: 0644]
helm/metadata/extractor/meta_lex.l [new file with mode: 0644]
helm/metadata/extractor/meta_lex_ind.l [new file with mode: 0644]
helm/metadata/extractor/sthandler.c [new file with mode: 0644]
helm/metadata/extractor/sthandler.h [new file with mode: 0644]
helm/metadata/extractor/sthandler_ind.c [new file with mode: 0644]
helm/metadata/extractor/sthandler_ind.h [new file with mode: 0644]

diff --git a/helm/metadata/Makefile b/helm/metadata/Makefile
new file mode 100644 (file)
index 0000000..4708eb2
--- /dev/null
@@ -0,0 +1,6 @@
+all:
+       rm -f fill_db.sql
+       time for i in `cat tipi_induttivi.txt` ; do (cd tmp ; wget -t 1 -O downloaded.xml.gz "http://mowgli.cs.unibo.it:58081/getxml?format=gz&uri=$$i") ; zcat tmp/downloaded.xml.gz > tmp/inductive_type.xml ; extractor/meta_ind $$i "tmp/inductive_type.xml" >> fill_db.sql ; rm -f tmp/downloaded.xml.gz tmp/inductive_type.xml; done > log 2>&1
+       time for i in `cat costanti_e_variabili.txt` ; do (cd tmp ; wget -t 1 -O downloaded.xml.gz "http://mowgli.cs.unibo.it:58081/getxml?format=gz&uri=$$i" ; wget -t 1 -O downloaded_body.xml.gz "http://mowgli.cs.unibo.it:58081/getxml?format=gz&uri=$$i.body"); zcat tmp/downloaded.xml.gz > tmp/type.xml ; zcat tmp/downloaded_body.xml.gz > tmp/body.xml ; extractor/meta $$i "tmp/body.xml" "tmp/type.xml" >> fill_db.sql ; rm -f tmp/downloaded.xml.gz tmp/downloaded_body.xml.gz tmp/type.xml tmp/body.xml ; done > log 2>&1
+
+.PHONY: all
diff --git a/helm/metadata/dc/Makefile b/helm/metadata/dc/Makefile
new file mode 100644 (file)
index 0000000..c9a7c73
--- /dev/null
@@ -0,0 +1,32 @@
+
+SQL_DB = mowgli
+SQL_DIR = sql
+SQL_HOST = localhost
+SQL_PASSWORD = bjIcRpru
+SQL_USER = helmadmin
+DC_DIR = /projects/helm/library/dc/
+
+SQL_ENGINE = \
+       mysql -h $(SQL_HOST) -u $(SQL_USER) --password=$(SQL_PASSWORD) $(SQL_DB)
+
+all:
+       @echo "try one of:"
+       @echo "  make connect"
+       @echo "  make create_tables"
+       @echo "  make fill_db"
+       @echo "  make drop_tables"
+
+connect:
+       $(SQL_ENGINE)
+
+create_tables:
+       $(SQL_ENGINE) < $(SQL_DIR)/create_dc_tables.sql
+
+fill_db:
+       find $(DC_DIR) -name "*.xml" -exec ./fill_db.pl {} \; | $(SQL_ENGINE)
+
+drop_tables:
+       $(SQL_ENGINE) < $(SQL_DIR)/drop_dc_tables.sql
+
+.PHONY: all connect create_tables fill_db drop_tables
+
diff --git a/helm/metadata/dc/fill_db.pl b/helm/metadata/dc/fill_db.pl
new file mode 100755 (executable)
index 0000000..fed21ee
--- /dev/null
@@ -0,0 +1,53 @@
+#!/usr/bin/perl -w
+use strict;
+
+use XML::Parser;
+
+my $skipped = 0;
+my $open = 0;
+my $content = "";
+my $uri = "";
+
+sub handle_start($$@) {
+   my ($expat, $element, @attrs) = @_;
+   if ($skipped == 1) {
+      $uri = $attrs[1];
+      $uri =~ s/'/''/g;
+   }
+   if ($skipped < 2) { $skipped++ ; }
+   else {
+      $open++;
+      $content = "";
+   }
+}
+
+sub handle_end($$) {
+   my ($expat,$element) = @_;
+   $open--;
+   if ($open >= 0) {
+      $content =~ s/'/''/g;
+      $element =~ s/(\w+):(\w+)/$1$2/;
+      my $query = "INSERT INTO $element VALUES ('$content','$uri');";
+                       print $query, "\n";
+   }
+}
+
+sub handle_char($$) {
+   my ($expat,$char) = @_;
+   if ($open >= 1) {
+      chomp($char);
+      $char =~ s/ +/ /g;
+      $char =~ s/^ //g;
+      $char =~ s/ $//g;
+      $content .= $char;
+   }
+}
+
+my $p = new XML::Parser(
+  Handlers =>
+    { Start => \&handle_start,
+      End   => \&handle_end,
+      Char  => \&handle_char});
+
+$p->parsefile($ARGV[0]);
+
diff --git a/helm/metadata/dc/sql/create_dc_tables.sql b/helm/metadata/dc/sql/create_dc_tables.sql
new file mode 100644 (file)
index 0000000..ca53c5b
--- /dev/null
@@ -0,0 +1,58 @@
+
+create table dccreator (value varchar(255), uri varchar(255));
+create table dcdate (value varchar(255), uri varchar(255));
+create table dcdescription (value varchar(255), uri varchar(255));
+create table dcformat (value varchar(255), uri varchar(255));
+create table dcidentifier (value varchar(255), uri varchar(255));
+create table dclanguage (value varchar(255), uri varchar(255));
+create table dcpublisher (value varchar(255), uri varchar(255));
+create table dcqRelationType (value varchar(255), uri varchar(255));
+create table dcrelation (value varchar(255), uri varchar(255));
+create table dcrights (value varchar(255), uri varchar(255));
+create table dcsource (value varchar(255), uri varchar(255));
+create table dcsubject (value varchar(255), uri varchar(255));
+create table dctitle (value varchar(255), uri varchar(255));
+create table hthResourceFormat (value varchar(255), uri varchar(255));
+create table hthcontact (value varchar(255), uri varchar(255));
+create table hthfirstVersion (value varchar(255), uri varchar(255));
+create table hthinstitution (value varchar(255), uri varchar(255));
+create table hthmodified (value varchar(255), uri varchar(255));
+
+create index dccreator_index on dccreator (value);
+create index dcdate_index on dcdate (value);
+create index dcdescription_index on dcdescription (value);
+create index dcformat_index on dcformat (value);
+create index dcidentifier_index on dcidentifier (value);
+create index dclanguage_index on dclanguage (value);
+create index dcpublisher_index on dcpublisher (value);
+create index dcqRelationType_index on dcqRelationType (value);
+create index dcrelation_index on dcrelation (value);
+create index dcrights_index on dcrights (value);
+create index dcsource_index on dcsource (value);
+create index dcsubject_index on dcsubject (value);
+create index dctitle_index on dctitle (value);
+create index hthResourceFormat_index on hthResourceFormat (value);
+create index hthcontact_index on hthcontact (value);
+create index hthfirstVersion_index on hthfirstVersion (value);
+create index hthinstitution_index on hthinstitution (value);
+create index hthmodified_index on hthmodified (value);
+
+create index dccreator_rev_index on dccreator (uri);
+create index dcdate_rev_index on dcdate (uri);
+create index dcdescription_rev_index on dcdescription (uri);
+create index dcformat_rev_index on dcformat (uri);
+create index dcidentifier_rev_index on dcidentifier (uri);
+create index dclanguage_rev_index on dclanguage (uri);
+create index dcpublisher_rev_index on dcpublisher (uri);
+create index dcqRelationType_rev_index on dcqRelationType (uri);
+create index dcrelation_rev_index on dcrelation (uri);
+create index dcrights_rev_index on dcrights (uri);
+create index dcsource_rev_index on dcsource (uri);
+create index dcsubject_rev_index on dcsubject (uri);
+create index dctitle_rev_index on dctitle (uri);
+create index hthResourceFormat_rev_index on hthResourceFormat (uri);
+create index hthcontact_rev_index on hthcontact (uri);
+create index hthfirstVersion_rev_index on hthfirstVersion (uri);
+create index hthinstitution_rev_index on hthinstitution (uri);
+create index hthmodified_rev_index on hthmodified (uri);
+
diff --git a/helm/metadata/dc/sql/drop_dc_tables.sql b/helm/metadata/dc/sql/drop_dc_tables.sql
new file mode 100644 (file)
index 0000000..d13a2a8
--- /dev/null
@@ -0,0 +1,18 @@
+drop table dccreator;
+drop table dcdate;
+drop table dcdescription;
+drop table dcformat;
+drop table dcidentifier;
+drop table dclanguage;
+drop table dcpublisher;
+drop table dcqRelationType;
+drop table dcrelation;
+drop table dcrights;
+drop table dcsource;
+drop table dcsubject;
+drop table dctitle;
+drop table hthResourceFormat;
+drop table hthcontact;
+drop table hthfirstVersion;
+drop table hthinstitution;
+drop table hthmodified;
diff --git a/helm/metadata/extractor/.cvsignore b/helm/metadata/extractor/.cvsignore
new file mode 100644 (file)
index 0000000..1d5ca6b
--- /dev/null
@@ -0,0 +1,4 @@
+meta
+meta_ind
+lex.yy.c
+lex.yy_ind.c
diff --git a/helm/metadata/extractor/Makefile b/helm/metadata/extractor/Makefile
new file mode 100644 (file)
index 0000000..4b6f01a
--- /dev/null
@@ -0,0 +1,28 @@
+CC = gcc -Wall
+
+all: meta meta_ind
+
+meta: lex.yy.o sthandler.o
+       $(CC) lex.yy.o sthandler.o -lpq -o meta
+
+meta_ind: lex.yy_ind.o sthandler.o
+       $(CC) lex.yy_ind.o sthandler.o -lpq -o meta_ind
+
+lex.yy.c: meta_lex.l sthandler.h
+       flex meta_lex.l 
+
+lex.yy_ind.c: meta_lex_ind.l sthandler.h
+       flex -olex.yy_ind.c meta_lex_ind.l 
+
+sthandler.o: sthandler.c sthandler.h
+
+lex.yy.o: lex.yy.c sthandler.h
+       $(CC) -c lex.yy.c 
+
+lex.yy_ind.o: lex.yy_ind.c sthandler.h
+       $(CC) -c lex.yy_ind.c 
+
+clean:
+       -rm -f *.o 
+       -rm -f lex.yy.c lex.yy_ind.c
+       -rm -f meta meta_ind
diff --git a/helm/metadata/extractor/README b/helm/metadata/extractor/README
new file mode 100644 (file)
index 0000000..6c9fe0b
--- /dev/null
@@ -0,0 +1,9 @@
+Note:
+ - LetIn e Variabili con corpo: da pensarci (capita solamente una 30ina
+   di volte... per ora!) Per il momento ci mettiamo una pezza.
+ - Variabili: non consideriamo l'occorrenza di una variabile come una
+    vera occorrenza (perche' puo' essere istanziata). In ogni caso c'e'
+    l'attributo @params che fornisce questa informazione.
+ - META e IMPLICIT non trattati
+ - CAST non considerati ==> di default vado in ricorsione sia sul tipo
+   che sul corpo
diff --git a/helm/metadata/extractor/meta_lex.l b/helm/metadata/extractor/meta_lex.l
new file mode 100644 (file)
index 0000000..35bfcc9
--- /dev/null
@@ -0,0 +1,363 @@
+ /******************************************************************/
+ /*  Copyright (C) 2000, HELM Team                                 */ 
+ /*                                                                */
+ /* This file is part of HELM, an Hypertextual, Electronic         */
+ /* Library of Mathematics, developed at the Computer Science      */
+ /* Department, University of Bologna, Italy.                      */
+ /*                                                                */
+ /* HELM is free software; you can redistribute it and/or          */
+ /* modify it under the terms of the GNU General Public License    */
+ /* as published by the Free Software Foundation; either version   */
+ /* 2 of the License, or (at your option) any later version.       */
+ /*                                                                */
+ /* HELM is distributed in the hope that it will be useful,        */
+ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
+ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the   */
+ /* GNU General Public License for more details.                   */
+ /*                                                                */
+ /* You should have received a copy of the GNU General Public      */
+ /* License along with HELM; if not, write to the Free Software    */
+ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston,         */
+ /* MA  02111-1307, USA.                                           */
+ /*                                                                */
+ /* For details, see the HELM World-Wide-Web page,                 */
+ /* http://cs.unibo.it/helm/.                                      */
+ /******************************************************************/
+
+ /***************************************************************/
+ /*                       META_LEXAN                           */
+ /*                 Automatic Metadata Extractor                */
+ /*           First draft 11/12/2001, by Andrea Asperti         */
+ /*      more bugs added by domenico lordi on mon 12/17/2001    */
+ /***************************************************************/
+
+ /***************************************************************/
+ /* 1. Inclusion of header files.                              */
+ /***************************************************************/
+
+%{
+#include                <string.h>
+#include                <stdlib.h>
+#include                <sys/stat.h>
+#include                "sthandler.h"
+%}
+
+ /***************************************************************/
+ /* 2. Constants and Variables Definitions                      */
+ /***************************************************************/
+
+%{
+#define                 NOWHERE   0
+#define                 CONST     1
+#define                 MUTIND    2
+#define                 MUTCONSTRUCT  3
+#define                 SORT      4
+
+#define                 INBODY    0
+#define                 MAINHYP   1
+#define                 INHYP     2
+#define                 INCONCL   3
+#define                 MAINCONCL 4
+#define                 INTYPE    5
+#define                 NOTFOUND  6
+
+#define                 HERE      0     
+#define                 AFTER     1
+
+
+int                    where = NOWHERE;
+int                     found = NOTFOUND;
+int                     position = INBODY;
+int                     first_child = HERE;
+int                    skip = 0;     // boolean to skip the insertion of a URI
+int                     no_open_source = 0;
+int                     spine_depth = 0;
+int                     depth = 0;
+int                     tmp_n;
+char                    sep = '"';
+char                    *xpointer = "#xpointer(1/";
+char                    *uri;
+char                    *tmp;
+
+void search(char *uri, int first_child, int position, int depth);
+%}
+
+ /***************************************************************/
+ /* 3. Regular definitions.                                    */
+ /***************************************************************/
+
+uri                     [^"]+
+digits                  [0-9]+ 
+value                   [^"]+                  
+
+ /***************************************************************/
+ /* 4. Rules.                                                  */
+ /***************************************************************/
+
+
+%%
+
+"<Variable"[^>]*">"(" "|\n)*"<body" {
+                    position = INBODY; // Variables have both a body and a type
+                   }
+
+"</body>"(" "|\n)*"<type" {
+                    position = INTYPE; // Variables have both a body and a type
+                    first_child = HERE;
+                    no_open_source = 0;
+                    spine_depth = 0;
+                    depth = 0;   
+                   }
+
+"<decl"            |
+"<def"             {
+                    if (position == INTYPE)
+                       position = MAINHYP;
+                    else if (position == MAINHYP)
+                        { position = INHYP;
+                          no_open_source++;};
+                   }
+
+"</decl>"          |
+"</def>"           {
+                    if (position == INHYP)
+                     {
+                      no_open_source--;
+                      if (no_open_source == 0) 
+                        {
+                         position = MAINHYP;
+                         depth++;
+                         first_child = HERE;
+                        }
+                     }
+                    else if (position == MAINHYP)
+                      {
+                       position = INTYPE;
+                       spine_depth++;
+                       depth = 0;
+                       first_child = HERE;
+                      }
+                    /* bug? first_child = HERE; */
+                   }
+
+
+.|\n               {
+                   }
+
+"<LAMBDA"          |
+"<MUTCASE"         |
+"<FIX"             |
+"<COFIX"           { 
+                          first_child = AFTER;
+                   }
+
+"<REL"             {
+                    if (((position == INTYPE) | (position == MAINHYP)) &&
+                       (first_child == HERE))
+                     {
+                       if (position == INTYPE) /* REL on the spine */
+                         {
+                           position = INCONCL;
+                           search("Rel",first_child,position,spine_depth);
+                         }
+                       else search("Rel",first_child,position,depth);
+                       first_child = AFTER;
+                     }
+                   }
+
+"<SORT"(" "|\n)+"value=\""{value}   {         
+                    if (((position == INTYPE) | (position == MAINHYP)) &&
+                       (first_child == HERE))
+                     {
+                       tmp=(char *)malloc((sizeof('a')*200)); 
+                       strcpy(tmp,yytext);
+                       strsep(&tmp,&sep); 
+                       if (position == INTYPE) /* SORT on the spine */
+                         { 
+                           position = INCONCL;
+                           search(tmp,first_child,position,spine_depth);
+                         }
+                       else search(tmp,first_child,position,depth);
+                       first_child = AFTER;
+                     }
+                   }
+
+"<VAR"             {
+                     skip = 1;
+                     first_child = AFTER;
+                   }
+
+"<CONST"           { 
+                     if (position == INTYPE) /* CONST on the spine */
+                        position = INCONCL;
+                     where = CONST;
+                   }
+
+"<MUTIND"          { 
+                     if (position == INTYPE) /* MUTIND on the spine */
+                        position = INCONCL;
+                     where = MUTIND;
+                   }
+
+"<MUTCONSTRUCT"    { 
+                     if (position == INTYPE) /* MUTCONSTRUCT on the spine */
+                        position = INCONCL;
+                     where = MUTCONSTRUCT;
+                   }
+
+"uri=\""{uri}      {     
+                         if (!skip) {
+                            uri=(char *)malloc((sizeof('a')*200)); 
+                            strcpy(uri,yytext);
+                            strsep(&uri,&sep);
+                            if (where == CONST)
+                              {
+                                if (position == INCONCL)
+                                  search(uri,first_child,position,spine_depth);
+                                else search(uri,first_child,position,depth);
+                                where = NOWHERE;
+                                first_child = AFTER;
+                                free(uri); 
+                              };
+                         } else skip = 0;
+                   } 
+
+"noType=\""{digits} {
+                         if ((where == MUTIND) || (where == MUTCONSTRUCT))
+                          { strsep(&yytext,&sep);
+                            tmp=(char *)malloc((sizeof(sep)*(strlen(yytext)+1)));
+                            strcpy(tmp,yytext);
+                            tmp_n = atoi(tmp)+1;
+                            sprintf(tmp,"%d",tmp_n);
+                            strcat(uri,"#xpointer(1/"); 
+                            strcat(uri,tmp); 
+                          };
+                         if (where == MUTIND) 
+                             { 
+                               strcat(uri,")");
+                               if (position == INCONCL)
+                                  search(uri,first_child,position,spine_depth);
+                               else search(uri,first_child,position,depth);
+                               free(uri);
+                               free(tmp);
+                               where = NOWHERE; 
+                               first_child = AFTER;};
+                   } 
+
+"noConstr=\""{digits} {
+                         if (where == MUTCONSTRUCT)
+                          { strsep(&yytext,&sep);
+                            tmp=(char *)malloc((sizeof(sep)*(strlen(yytext)+1)));
+                            strcpy(tmp,yytext);
+                            strcat(uri,"/");
+                            strcat(uri,tmp);
+                            strcat(uri,")");
+                            if (position == INCONCL)
+                              search(uri,first_child,position,spine_depth);
+                            else search(uri,first_child,position,depth);
+                            free(uri);
+                            free(tmp);
+                            where = NOWHERE; 
+                            first_child = AFTER;};
+                   } 
+
+
+
+%%
+
+ /***************************************************************/
+ /* 6. Auxiliary functions.                                    */
+ /***************************************************************/
+
+int main(int argc, char *argv[])
+{                  
+    struct stat buf;
+
+    /* FILE *debug; */
+
+    if (argc != 4)
+    {
+        fprintf(stderr, "Usage: meta <object_uri> <body_file> <type_file>\n");
+        exit(1);
+    }
+
+
+    /* initialize the symbol table */
+    init_symbol_table();
+
+    // We process the body
+    if (!stat(argv[2],&buf)) 
+    {
+        yyin = fopen(argv[2], "r");
+        position = INBODY;
+        yylex();
+        fclose(yyin);
+     }
+
+    // We process the type
+    yyin = fopen(argv[3], "r");
+    position = INTYPE;
+    first_child = HERE;
+    no_open_source = 0;
+    spine_depth = 0;
+    depth = 0;
+    yylex(); 
+    fclose(yyin);
+    print_all(argv[1]);
+
+    return 0;
+} 
+
+
+void search(uri,first_child,position,depth)
+char               *uri;
+int                first_child;
+int                position; 
+{                  
+                   if (position == MAINHYP)
+                      { 
+                       if (first_child == HERE) 
+                           found = search_bucket(uri,MAINHYP,depth);
+                       else 
+                           found = search_bucket(uri,INHYP,0);
+                      }
+                   else if (position == INCONCL)
+                      { 
+                       if (first_child == HERE) 
+                           found = search_bucket(uri,MAINCONCL,depth);
+                       else
+                           found = search_bucket(uri,INCONCL,0);
+                      }
+                        
+                   else 
+                      found = search_bucket(uri,position,depth);
+                   /*
+                   if (found == NOTFOUND)
+                         fprintf(stderr,"here = %d, pos = %d, uri = %s\n", first_child,position, uri); */
+} 
+/*                  
+                      (first_child == HERE) 
+                      {
+                       if (position == MAINHYP)
+                          found = search_bucket(uri,MAINHYP,depth);
+                       else if (position == INCONCL)
+                          found = search_bucket(uri,MAINCONCL,0);
+                       else if (position == INHYP)
+                          found = search_bucket(uri,INHYP,0);
+                          if (found == NOTFOUND)
+                          printf( "pos = %d, uri = %s\n", MAINCONCL, uri); 
+                       }
+                   else if ((position == MAINHYP) && (first_child == AFTER))
+                        found = search_bucket(uri,INHYP,0);
+                   else found = search_bucket(uri,position,0);
+                   if (found == NOTFOUND)
+                         printf( "pos = %d, uri = %s\n", position, uri); 
+                   } */
+
+int yywrap() {
+               return 1;
+             }
+
+
+
+
diff --git a/helm/metadata/extractor/meta_lex_ind.l b/helm/metadata/extractor/meta_lex_ind.l
new file mode 100644 (file)
index 0000000..2f844f9
--- /dev/null
@@ -0,0 +1,357 @@
+ /******************************************************************/
+ /*  Copyright (C) 2000, HELM Team                                 */ 
+ /*                                                                */
+ /* This file is part of HELM, an Hypertextual, Electronic         */
+ /* Library of Mathematics, developed at the Computer Science      */
+ /* Department, University of Bologna, Italy.                      */
+ /*                                                                */
+ /* HELM is free software; you can redistribute it and/or          */
+ /* modify it under the terms of the GNU General Public License    */
+ /* as published by the Free Software Foundation; either version   */
+ /* 2 of the License, or (at your option) any later version.       */
+ /*                                                                */
+ /* HELM is distributed in the hope that it will be useful,        */
+ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
+ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the   */
+ /* GNU General Public License for more details.                   */
+ /*                                                                */
+ /* You should have received a copy of the GNU General Public      */
+ /* License along with HELM; if not, write to the Free Software    */
+ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston,         */
+ /* MA  02111-1307, USA.                                           */
+ /*                                                                */
+ /* For details, see the HELM World-Wide-Web page,                 */
+ /* http://cs.unibo.it/helm/.                                      */
+ /******************************************************************/
+
+ /***************************************************************/
+ /*                       META_LEXAN                           */
+ /*                 Automatic Metadata Extractor                */
+ /*           First draft 11/12/2001, by Andrea Asperti         */
+ /*      more bugs added by domenico lordi on mon 12/17/2001    */
+ /***************************************************************/
+
+ /***************************************************************/
+ /* 1. Inclusion of header files.                              */
+ /***************************************************************/
+
+%{
+#include                <string.h>
+#include                <stdlib.h>
+#include                <sys/stat.h>
+#include                "sthandler.h"
+%}
+
+ /***************************************************************/
+ /* 2. Constants and Variables Definitions                      */
+ /***************************************************************/
+
+%{
+#define                 NOWHERE   0
+#define                 CONST     1
+#define                 MUTIND    2
+#define                 MUTCONSTRUCT  3
+#define                 SORT      4
+
+#define                 INBODY    0
+#define                 MAINHYP   1
+#define                 INHYP     2
+#define                 INCONCL   3
+#define                 MAINCONCL 4
+#define                 INTYPE    5
+#define                 NOTFOUND  6
+
+#define                 HERE      0     
+#define                 AFTER     1
+
+
+int                    where = NOWHERE;
+int                     found = NOTFOUND;
+int                     position = INBODY;
+int                     first_child = HERE;
+int                    skip = 0;     // boolean to skip the insertion of a URI
+int                     no_open_source =0;
+int                     spine_depth = 0;
+int                     depth = 0;
+int                     tmp_n;
+int                     inductive_type = 0;
+int                     constructor = 0;
+int                     deep_type = 0;
+char                    sep = '"';
+char                    *xpointer = "#xpointer(1/";
+char                    *uri;
+char                    *tmp;
+char                    *source_uri; 
+char                    *source_uri_prefix;
+
+void search(char *uri, int first_child, int position, int depth);
+%}
+
+ /***************************************************************/
+ /* 3. Regular definitions.                                    */
+ /***************************************************************/
+
+uri                     [^"]+
+digits                  [0-9]+ 
+value                   [^"]+                  
+
+ /***************************************************************/
+ /* 4. Rules.                                                  */
+ /***************************************************************/
+
+
+%%
+
+"<InductiveType"   { 
+                     /* fprintf(stderr,"uno"); */
+                     init_symbol_table();
+                     no_open_source = 0;
+                     depth = 0;
+                     spine_depth = 0;
+                     /* fprintf(stderr,"due"); */
+                     inductive_type++;
+                     constructor=0;
+                     position = INTYPE;
+                     first_child = HERE;
+                   }
+
+"</arity>"         { tmp = (char *)malloc(sizeof('a')*128);
+                     strcpy(source_uri,source_uri_prefix);
+                     sprintf(tmp,"#xpointer(1/%d)", inductive_type);
+                     strcat(source_uri,tmp);
+                     /* fprintf(stderr,"cinque"); */
+                     free(tmp);
+                     print_all(source_uri);
+                     /* print_file(); */
+                   }
+
+"<Constructor"     { init_symbol_table();
+                     no_open_source = 0;
+                     depth = 0;
+                     spine_depth = 0;
+                     constructor++;
+                     position = INTYPE;
+                     first_child = HERE;
+                   }
+
+"</Constructor>"   { tmp = (char *)malloc(sizeof('a')*128);
+                     strcpy(source_uri,source_uri_prefix);
+                     sprintf(tmp,"#xpointer(1/%d/%d)",inductive_type,constructor);
+                     strcat(source_uri,tmp);
+                     free(tmp);
+                     print_all(source_uri);
+                     /* print_file(); */
+                   }
+
+"<decl"            |
+"<def"            {
+                    if (position == INTYPE)
+                       position = MAINHYP;
+                    else if (position == MAINHYP)
+                        { position = INHYP;
+                          no_open_source++;};
+                   }
+
+"</decl>"          |
+"</def>"           {
+                    if (position == INHYP)
+                     {
+                      no_open_source--;
+                      if (no_open_source == 0) 
+                        {
+                         position = MAINHYP;
+                         depth++;
+                         first_child = HERE;
+                        }
+                     }
+                    else if (position == MAINHYP)
+                      {
+                       position = INTYPE;
+                       spine_depth++;
+                       depth = 0;
+                       first_child = HERE;
+                      }
+                   }
+
+
+.|\n               {
+                   }
+
+"<LAMBDA"          |
+"<MUTCASE"         |
+"<FIX"             |
+"<COFIX"           { 
+                          first_child = AFTER;
+                   }
+
+"<REL"             {
+                    if (((position == INTYPE) | (position == MAINHYP)) &&
+                       (first_child == HERE))
+                     {
+                       if (position == INTYPE) /* REL on the spine */
+                         {
+                           position = INCONCL;
+                           search("Rel",first_child,position,spine_depth);
+                         }
+                       else search("Rel",first_child,position,depth);
+                       first_child = AFTER;
+                     }
+                   }
+
+"<SORT"(" "|\n)+"value=\""{value}   {         
+                    if (((position == INTYPE) | (position == MAINHYP)) &&
+                       (first_child == HERE))
+                     {
+                       tmp=(char *)malloc((sizeof('a')*200)); 
+                       strcpy(tmp,yytext);
+                       strsep(&tmp,&sep); 
+                       if (position == INTYPE) /* SORT on the spine */
+                         { 
+                           position = INCONCL;
+                           search(tmp,first_child,position,spine_depth);
+                         }
+                       else search(tmp,first_child,position,depth);
+                       first_child = AFTER;
+                     }
+                   }
+
+"<VAR"             {
+                     skip = 1;
+                     first_child = AFTER;
+                   }
+
+"<CONST"           { 
+                     if (position == INTYPE) /* CONST on the spine */
+                        position = INCONCL;
+                     where = CONST;
+                   }
+
+"<MUTIND"          { 
+                     if (position == INTYPE) /* MUTIND on the spine */
+                        position = INCONCL;
+                     where = MUTIND;
+                   }
+
+"<MUTCONSTRUCT"    { 
+                     if (position == INTYPE) /* MUTCONSTRUCT on the spine */
+                        position = INCONCL;
+                     where = MUTCONSTRUCT;
+                   }
+
+"uri=\""{uri}      {     
+                         if (!skip) {
+                            uri=(char *)malloc((sizeof('a')*200)); 
+                            strcpy(uri,yytext);
+                            strsep(&uri,&sep);
+                            if (where == CONST)
+                              {
+                                if (position == INCONCL)
+                                  search(uri,first_child,position,spine_depth);
+                                else search(uri,first_child,position,depth);
+                                where = NOWHERE;
+                                first_child = AFTER;
+                                free(uri); 
+                              };
+                         } else skip = 0;
+                   } 
+
+"noType=\""{digits} {
+                         if ((where == MUTIND) || (where == MUTCONSTRUCT))
+                          { strsep(&yytext,&sep);
+                            tmp=(char *)malloc((sizeof(sep)*(strlen(yytext)+1)));
+                            strcpy(tmp,yytext);
+                            tmp_n = atoi(tmp)+1;
+                            sprintf(tmp,"%d",tmp_n);
+                            strcat(uri,"#xpointer(1/"); 
+                            strcat(uri,tmp); 
+                          };
+                         if (where == MUTIND) 
+                             { 
+                               strcat(uri,")");
+                               if (position == INCONCL)
+                                  search(uri,first_child,position,spine_depth);
+                               else search(uri,first_child,position,depth);
+                               free(uri);
+                               free(tmp);
+                               where = NOWHERE; 
+                               first_child = AFTER;};
+                   } 
+
+"noConstr=\""{digits} {
+                         if (where == MUTCONSTRUCT)
+                          { strsep(&yytext,&sep);
+                            tmp=(char *)malloc((sizeof(sep)*(strlen(yytext)+1)));
+                            strcpy(tmp,yytext);
+                            strcat(uri,"/");
+                            strcat(uri,tmp);
+                            strcat(uri,")");
+                            if (position == INCONCL)
+                              search(uri,first_child,position,spine_depth);
+                            else search(uri,first_child,position,depth);
+                            free(uri);
+                            free(tmp);
+                            where = NOWHERE; 
+                            first_child = AFTER;};
+                   } 
+
+
+
+%%
+
+ /***************************************************************/
+ /* 6. Auxiliary functions.                                    */
+ /***************************************************************/
+
+int main(int argc, char *argv[])
+{
+    /* FILE *debug; */
+
+    if (argc != 3)
+    {
+        fprintf(stderr, "Usage: meta_ind <object_uri> <inductive_type_file>\n");
+        exit(1);
+    }
+
+    source_uri = malloc((sizeof('a')*2000));
+    source_uri_prefix=argv[1];
+    /* fprintf(stderr,"qua"); */
+    yyin = fopen(argv[2], "r");
+    yylex();
+
+    return 0;
+}
+
+void search(uri,first_child,position,depth)
+char               *uri;
+int                first_child;
+int                position; 
+{                  
+                   if (position == MAINHYP)
+                      { 
+                       if (first_child == HERE) 
+                           found = search_bucket(uri,MAINHYP,depth);
+                       else 
+                           found = search_bucket(uri,INHYP,0);
+                      }
+                   else if (position == INCONCL)
+                      { 
+                       if (first_child == HERE) 
+                           found = search_bucket(uri,MAINCONCL,depth);
+                       else
+                           found = search_bucket(uri,INCONCL,0);
+                      }
+                        
+                   else 
+                      found = search_bucket(uri,position,depth);
+                   /* if (found == NOTFOUND)
+                         printf( "pos = %d, uri = %s\n", position, uri);  */
+}
+
+
+int yywrap() {
+               return 1;
+             }
+
+
+
+
diff --git a/helm/metadata/extractor/sthandler.c b/helm/metadata/extractor/sthandler.c
new file mode 100644 (file)
index 0000000..6f87cdf
--- /dev/null
@@ -0,0 +1,453 @@
+/*********************************************************************/
+/*  Copyright (C) 2000, HELM Team                                    */ 
+/*                                                                   */
+/* This file is part of HELM, an Hypertextual, Electronic            */
+/* Library of Mathematics, developed at the Computer Science         */
+/* Department, University of Bologna, Italy.                         */
+/*                                                                   */
+/* HELM is free software; you can redistribute it and/or             */
+/* modify it under the terms of the GNU General Public License       */
+/* as published by the Free Software Foundation; either version 2    */
+/* of the License, or (at your option) any later version.            */
+/*                                                                   */
+/* HELM is distributed in the hope that it will be useful,           */
+/* but WITHOUT ANY WARRANTY; without even the implied warranty of    */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the     */
+/* GNU General Public License for more details.                      */
+/*                                                                   */
+/* You should have received a copy of the GNU General Public License */
+/* along with HELM; if not, write to the Free Software               */
+/* Foundation, Inc., 59 Temple Place - Suite 330, Boston,            */
+/* MA  02111-1307, USA.                                              */
+/*                                                                   */
+/* For details, see the HELM World-Wide-Web page,                    */
+/* http://cs.unibo.it/helm/.                                         */
+ /*********************************************************************/
+
+/****************************************************************/
+/*                       STHANDLER.C                           */
+/****************************************************************/
+/* This module supplies routines for symbol table handling.    */
+/* - init_symbol_table(): it initializes the symbol table      */
+/*                       to void.                              */
+/* - search_bucket(): it searches the symbol table for the     */
+/*                   bucket containing a given identifier, and */
+/*                   inserts it if it is not present;          */
+/****************************************************************/
+/*           First draft 11/12/2001, by Andrea Asperti          */
+/****************************************************************/
+
+/****************************************************************/
+/* 1. Inclusion of header files.                               */
+/****************************************************************/
+
+#include               <stdio.h>
+#include               <malloc.h>
+#include               <string.h>
+
+/****************************************************************/
+/* 2. Declarations                                              */
+/****************************************************************/
+
+
+#define         DICTSIZE                        211
+#define         HASH1                           4
+#define         HASH2                           0xf0000000
+#define         HASH3                           28
+#define         EOS                             '\0'
+
+#define                 INBODY    0
+#define                 MAINHYP   1
+#define                 INHYP     2
+#define                 INCONCL   3
+#define                 MAINCONCL 4
+#define                 INTYPE    5
+#define                 NOTFOUND  6
+
+/****************************************************************/
+/* 3. Types.                                                   */
+/****************************************************************/
+
+struct int_list {
+               int                     val;
+                struct int_list                *next;
+                  }; 
+
+struct st_bucket {
+               char                    *id;
+                                                /* identifier */
+                int                     main_depth;
+                struct int_list                *depths;
+               struct st_bucket        *next_st_bucket;
+                                               /* next bucket in the list */
+                struct st_bucket       *all_next;
+                                               /* all buckets in symbol
+                                                  table are linked together */
+                int                     pos[5];
+
+                  };                              
+
+struct st_bucket    *dictionary[DICTSIZE];
+                              /* pointers to bucket lists */
+
+/****************************************************************/
+/* 4. Local functions.                                         */
+/****************************************************************/
+struct int_list  *add(struct int_list  *,int);
+void allocate_bucket(struct st_bucket **st, char *id, int where);
+void print_mainhyp(char *about, char *uri,struct int_list *l);
+void print_mainconcl(char *about, char *uri, int depth);
+void move_bucket(struct st_bucket *st, int dict_index);
+void print_one(char *about, char *uri, int pos);
+int hash_pjw(char *id);
+
+/* This function is copied from the file fe-exec.c of PostgreSQL. */
+/* Copyright (c) 1996-2003, PostgreSQL Global Development Group   */
+/* Copyright (c) 1994, Regents of the University of California    */
+size_t
+PQescapeString(char *to, const char *from, size_t length)
+{
+  const char *source = from;
+  char     *target = to;
+  size_t    remaining = length;
+                                                                                
+  while (remaining > 0 && *source != '\0')
+  {
+    switch (*source)
+    {
+      case '\\':
+        *target++ = '\\';
+        *target++ = '\\';
+        break;
+                                                                                
+      case '\'':
+        *target++ = '\'';
+        *target++ = '\'';
+        break;
+                                                                                
+      default:
+        *target++ = *source;
+        break;
+    }
+    source++;
+    remaining--;
+  }
+
+  /* Write the terminating NUL character. */
+  *target = '\0';
+                                                                                
+  return target - to;
+}
+
+
+/****************************************************************/
+/* 5. Definitions of functions to be exported.                 */
+/****************************************************************/
+
+struct st_bucket       *all;
+
+ /* The following function initializes the symbol table to NULL */
+void init_symbol_table()
+{
+       int                     i;
+
+       /* initialize the dictionary */
+       for (i = 0; i < DICTSIZE; i++)
+               dictionary[i] = NULL;
+        all = NULL;
+}
+
+ /* The following function searches the symbol table for an identifier */
+ /* and inserts it if it is not present. */
+ /* The bucket associated with the given identifier */
+ /* becomes the first one in its list. */
+
+int search_bucket(id, where, depth)
+       char            *id;
+                                       /* identifier */
+        int             where;
+        int             depth;
+{
+       int             dict_index;
+                                       /* value returned by the */
+                                       /* hash function */
+       struct st_bucket
+                       *prev,
+                       *curr;
+
+        struct st_bucket *st;
+
+        /* apply the hash function */
+        dict_index = hash_pjw(id); 
+        /* fprintf(stderr,"%d\n", dict_index); */
+        
+        /* scan the bucket list indicated by the hash function */
+        prev = curr = dictionary[dict_index];
+        while ((curr != NULL) && (strcmp(id, curr->id)))
+          {
+            prev = curr;
+            curr = curr->next_st_bucket;
+          }
+       if (curr == NULL)
+          /* the identifier is not in the list */
+          {
+            allocate_bucket(&st,id,where);
+            if (where == MAINCONCL)
+             st->main_depth = depth;
+            else if (where == MAINHYP)
+              st->depths = add(st->depths,depth);
+           move_bucket(st,dict_index);
+            return NOTFOUND;
+          }
+       else
+          /*
+            printf("uno=%s\n", id);
+            printf("st=%s\n", curr->id); fflush(stdout) */
+
+          /* the identifier is already in the list */
+          {
+            /* st = curr; */
+            curr->pos[where] = 1;
+            if (where >= 1) 
+             curr->pos[INBODY] = 0; /* it will never be set again to 1 */
+            if (where == MAINHYP)
+              curr->depths=add(curr->depths,depth); 
+           else if (where == MAINCONCL)
+             curr->main_depth = depth; 
+            if (prev != curr)
+              /* the identifier is not in the first position */
+              {
+                prev->next_st_bucket = curr->next_st_bucket;
+                move_bucket(curr,dict_index);
+              };
+            return where;
+          }
+}
+
+void print_all(about,conn)
+     char       *about;
+{
+
+        int i;
+        struct st_bucket *curr;
+        curr = all;
+        while (curr != NULL)
+         {
+            for (i = 0; i < 5; ++i)
+             if ((curr->pos[i]) == 1)
+               {
+                 if (i == MAINHYP)
+                   print_mainhyp(about,curr->id,curr->depths);
+                  else if (i == MAINCONCL)
+                    print_mainconcl(about,curr->id,curr->main_depth);
+                 else
+                   print_one(about,curr->id,i);
+               }
+            curr = curr->all_next;
+          }
+}
+
+
+/****************************************************************/
+/* 5. Definitions of functions local to the module.            */
+/****************************************************************/
+
+struct int_list  *add(l,m)
+     struct int_list    *l;
+     int               m;
+{
+       struct int_list *curr;
+        /* scan the list looking for m */
+        curr = l;
+        while ((curr != NULL) && (m != (curr->val)))
+            curr = curr->next;
+       if (curr == NULL)
+          /* m is not in the list */
+          {
+            curr = (struct int_list *)malloc(sizeof(struct int_list));
+            curr->val = m;
+           curr->next = l;
+            return curr;
+          }
+       else
+       return l;
+       
+}
+
+void print_mainhyp(about,uri,l)
+     char                *about;
+     char                *uri;
+     struct int_list     *l;
+{
+    struct int_list *curr;
+    curr = l;
+    if (!strcmp(uri,"Rel"))
+      {
+        /* scan the list */
+        while (curr != NULL)
+         {
+            size_t len = strlen(about) + 1;
+            char *qabout = malloc (sizeof(char) * len * 2);
+            PQescapeString(qabout,about,len);
+            printf("INSERT INTO refRel VALUES ('%s', 'http://www.cs.unibo.it/helm/schemas/schema-helm#MainHypothesis', %d);\n",qabout,curr->val);
+            curr = curr->next;
+         }
+      }
+   else if ((!strcmp(uri,"Prop")) || (!strcmp(uri,"Type")) ||
+            (!strcmp(uri,"Set")))
+      {
+        /* scan the list */
+        while (curr != NULL)
+         {
+            size_t len = strlen(about) + 1;
+            char *qabout = malloc (sizeof(char) * len * 2);
+            PQescapeString(qabout,about,len);
+            printf("INSERT INTO refSort VALUES ('%s', 'http://www.cs.unibo.it/helm/schemas/schema-helm#MainHypothesis', %d, '%s');\n",qabout,curr->val,uri);
+            curr = curr->next;
+         }
+      }
+    else 
+     {
+        /* scan the list */
+        while (curr != NULL)
+         {
+            size_t len = strlen(about) + 1;
+            char *qabout = malloc (sizeof(char) * len * 2);
+            char *quri;
+            PQescapeString(qabout,about,len);
+            len = strlen(uri) + 1;
+            quri = malloc (sizeof(char) * len * 2);
+            PQescapeString(quri,uri,len);
+            printf("INSERT INTO refObj VALUES ('%s', '%s', 'http://www.cs.unibo.it/helm/schemas/schema-helm#MainHypothesis', %d);\n",qabout,quri,curr->val);
+            curr = curr->next;
+         }
+      }
+}
+
+void print_mainconcl(about,uri,depth)
+     char    *about;
+     char    *uri;
+     int     depth;
+     
+{
+    /* fprintf(stderr,"about = %s\n",about); */
+    if (!strcmp(uri,"Rel"))
+      { 
+        size_t len = strlen(about) + 1;
+        char *qabout = malloc (sizeof(char) * len * 2);
+        PQescapeString(qabout,about,len);
+        printf("INSERT INTO refRel VALUES ('%s', 'http://www.cs.unibo.it/helm/schemas/schema-helm#MainConclusion', %d);\n",qabout,depth);
+      }
+    else if ((!strcmp(uri,"Prop")) || (!strcmp(uri,"Type")) ||
+            (!strcmp(uri,"Set")))
+      {
+        size_t len = strlen(about) + 1;
+        char *qabout = malloc (sizeof(char) * len * 2);
+        PQescapeString(qabout,about,len);
+        printf("INSERT INTO refSort VALUES ('%s', 'http://www.cs.unibo.it/helm/schemas/schema-helm#MainConclusion', %d, '%s');\n",qabout,depth,uri);
+      }
+    else
+      {
+        size_t len = strlen(about) + 1;
+        char *qabout = malloc (sizeof(char) * len * 2);
+        char *quri;
+        PQescapeString(qabout,about,len);
+        len = strlen(uri) + 1;
+        quri = malloc (sizeof(char) * len * 2);
+        PQescapeString(quri,uri,len);
+        printf("INSERT INTO refObj VALUES ('%s', '%s','http://www.cs.unibo.it/helm/schemas/schema-helm#MainConclusion', %d);\n",qabout,quri,depth); 
+      }
+}
+
+// dome: cambiata per usare il modello con position
+void print_one(about,uri,pos)
+     char    *about,
+             *uri;
+     int     pos;
+{
+    char       *position = (char *)malloc((sizeof('a')*20));
+    size_t len = strlen(about) + 1;
+    char *qabout = malloc (sizeof(char) * len * 2);
+    char *quri;
+    PQescapeString(qabout,about,len);
+    len = strlen(uri) + 1;
+    quri = malloc (sizeof(char) * len * 2);
+    PQescapeString(quri,uri,len);
+    if (pos == INBODY)
+       position="InBody";
+    else if (pos == MAINHYP)
+       position="MainHypothesis";  /* This should never happen */
+    else if (pos == INHYP)
+       position="InHypothesis";
+    else if (pos == INCONCL)
+       position="InConclusion";
+    else if (pos == MAINCONCL)
+       position="MainConclusion";  /* This should never happen */
+    printf("INSERT INTO refObj VALUES ('%s', '%s', \
+    'http://www.cs.unibo.it/helm/schemas/schema-helm#%s', NULL);\n",qabout,quri,position);
+}
+
+ /* The following function allocates a bucket for an identifier. */
+void allocate_bucket(st, id, where)
+       struct st_bucket
+                        **st;
+                                       /* pointer to the bucket to be */
+                                       /* allocated */
+       char            *id;
+                                       /* identifier */
+        int             where;
+{
+        int i;
+
+       *st = (struct st_bucket *)malloc(sizeof(struct st_bucket));
+       (*st)->id = (char *)malloc(sizeof('a')*(strlen(id) + 1));
+        strcpy((*st)->id,id);
+        (*st)->main_depth = 0;
+        (*st)->depths = NULL;
+       (*st)->next_st_bucket = NULL;
+        (*st)->all_next = all;
+        all = *st;
+        for (i = 0; i < 5; ++i)
+         (*st)->pos[i] = 0;
+        (*st)->pos[where] = 1;
+}
+
+ /* The following function moves a bucket to the head of the */
+ /* list in which it lies. */
+void move_bucket(st, dict_index)
+       struct st_bucket 
+                        *st;
+                                       /* pointer to the bucket to */
+                                       /* be moved */
+       int             dict_index;
+                                       /* index corresponding to */
+                                       /* the list in which the */
+                                       /* bucket lies */
+{
+       st->next_st_bucket = dictionary[dict_index];
+       dictionary[dict_index] = st;
+}
+
+ /* The following function implements Weinberger's hash function. */
+int
+hash_pjw(id)
+       char            *id;
+                                       /* identifier to be hashed */
+{
+       unsigned        h,
+                       g;
+
+       for (h = 0; *id != EOS; id++)
+       {
+               h = (h << HASH1) + (*id);
+               if ((g = h) & HASH2)
+                       h = h ^ (g >> HASH3) ^ g;
+       }
+       return(h % DICTSIZE);
+}
+
+
+
+
+
+
diff --git a/helm/metadata/extractor/sthandler.h b/helm/metadata/extractor/sthandler.h
new file mode 100644 (file)
index 0000000..b0305d1
--- /dev/null
@@ -0,0 +1,8 @@
+/****************************************************************/
+/*                          STHANDLER.H                        */
+/****************************************************************/
+
+            
+extern  void            init_symbol_table();
+extern  void            print_all(char *);
+extern  int             search_bucket(char *, int, int);
diff --git a/helm/metadata/extractor/sthandler_ind.c b/helm/metadata/extractor/sthandler_ind.c
new file mode 100644 (file)
index 0000000..ee035ef
--- /dev/null
@@ -0,0 +1,424 @@
+/*********************************************************************/
+/*  Copyright (C) 2000, HELM Team                                    */ 
+/*                                                                   */
+/* This file is part of HELM, an Hypertextual, Electronic            */
+/* Library of Mathematics, developed at the Computer Science         */
+/* Department, University of Bologna, Italy.                         */
+/*                                                                   */
+/* HELM is free software; you can redistribute it and/or             */
+/* modify it under the terms of the GNU General Public License       */
+/* as published by the Free Software Foundation; either version 2    */
+/* of the License, or (at your option) any later version.            */
+/*                                                                   */
+/* HELM is distributed in the hope that it will be useful,           */
+/* but WITHOUT ANY WARRANTY; without even the implied warranty of    */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the     */
+/* GNU General Public License for more details.                      */
+/*                                                                   */
+/* You should have received a copy of the GNU General Public License */
+/* along with HELM; if not, write to the Free Software               */
+/* Foundation, Inc., 59 Temple Place - Suite 330, Boston,            */
+/* MA  02111-1307, USA.                                              */
+/*                                                                   */
+/* For details, see the HELM World-Wide-Web page,                    */
+/* http://cs.unibo.it/helm/.                                         */
+ /*********************************************************************/
+
+/****************************************************************/
+/*                       STHANDLER.C                           */
+/****************************************************************/
+/* This module supplies routines for symbol table handling.    */
+/* - init_symbol_table(): it initializes the symbol table      */
+/*                       to void.                              */
+/* - search_bucket(): it searches the symbol table for the     */
+/*                   bucket containing a given identifier, and */
+/*                   inserts it if it is not present;          */
+/****************************************************************/
+/*           First draft 11/12/2001, by Andrea Asperti          */
+/****************************************************************/
+
+/****************************************************************/
+/* 1. Inclusion of header files.                               */
+/****************************************************************/
+
+#include               <stdio.h>
+#include               <malloc.h>
+
+/****************************************************************/
+/* 2. Declarations                                              */
+/****************************************************************/
+
+
+#define         DICTSIZE                        211
+#define         HASH1                           4
+#define         HASH2                           0xf0000000
+#define         HASH3                           24
+#define         EOS                             '\0'
+
+#define                 INBODY    0
+#define                 MAINHYP   1
+#define                 INHYP     2
+#define                 INCONCL   3
+#define                 MAINCONCL 4
+#define                 INTYPE    5
+#define                 NOTFOUND  6
+
+/****************************************************************/
+/* 3. Types.                                                   */
+/****************************************************************/
+
+struct int_list {
+               int                     val;
+                struct int_list                *next;
+                  }; 
+
+struct st_bucket {
+               char                    *id;
+                                                /* identifier */
+                int                     main_depth;
+                struct int_list                *depths;
+               struct st_bucket        *next_st_bucket;
+                                               /* next bucket in the list */
+                struct st_bucket       *all_next;
+                                               /* all buckets in symbol
+                                                  table are linked together */
+                int                     pos[5];
+
+                  };                              
+
+struct st_bucket    *dictionary[DICTSIZE];
+                              /* pointers to bucket lists */
+
+/****************************************************************/
+/* 4. Local functions.                                         */
+/****************************************************************/
+struct int_list  *add(struct int_list  *,int);
+int hash_pjw(char *id);
+
+/****************************************************************/
+/* 5. Definitions of functions to be exported.                 */
+/****************************************************************/
+
+struct st_bucket       *all;
+
+ /* The following function initializes the symbol table to NULL */
+void init_symbol_table()
+{
+       struct st_bucket        *st;
+       int                     i;
+
+       /* initialize the dictionary */
+       for (i = 0; i < DICTSIZE; i++)
+               dictionary[i] = NULL;
+        all = NULL;
+}
+
+ /* The following function searches the symbol table for an identifier */
+ /* and inserts it if it is not present. 
+ /* The bucket associated with the given identifier */
+ /* becomes the first one in its list. */
+
+search_bucket(id, where, depth)
+       char            *id;
+                                       /* identifier */
+        int             where;
+        int             depth;
+{
+       int             dict_index;
+                                       /* value returned by the */
+                                       /* hash function */
+       struct st_bucket
+                       *prev,
+                       *curr;
+
+        struct st_bucket *st;
+
+        /* apply the hash function */
+        dict_index = hash_pjw(id);
+        /* fprintf(stderr,"%d\n", dict_index); fflush(stdout); */
+        
+        /* scan the bucket list indicated by the hash function */
+        prev = curr = dictionary[dict_index];
+        while ((curr != NULL) && (strcmp(id, curr->id)))
+          {
+            prev = curr;
+            curr = curr->next_st_bucket;
+          }
+       if (curr == NULL)
+          /* the identifier is not in the list */
+          {
+            allocate_bucket(&st,id,where);
+            if (where == MAINCONCL)
+             st->main_depth = depth;
+            else if (where == MAINHYP)
+              st->depths = add(st->depths,depth);
+           move_bucket(st,dict_index);
+            return NOTFOUND;
+          }
+       else
+          /*
+            fprintf(stderr,"uno=%s\n", id);
+            fprintf(stderr,"st=%s\n", curr->id); fflush(stdout) */
+
+          /* the identifier is already in the list */
+          {
+            /* st = curr; */
+            curr->pos[where] = 1;
+            if (where >= 1) 
+             curr->pos[INBODY] = 0; /* it will never be set again to 1 */
+            if (where == MAINHYP)
+              curr->depths=add(curr->depths,depth); 
+           else if (where == MAINCONCL)
+             curr->main_depth = depth; 
+            if (prev != curr)
+              /* the identifier is not in the first position */
+              {
+                prev->next_st_bucket = curr->next_st_bucket;
+                move_bucket(curr,
+                            dict_index);
+              };
+            return where;
+          }
+}
+
+print_all(about,out,outrel,outsort)
+     char       *about;
+     FILE       *out,
+                *outrel,
+                *outsort;
+{
+
+        int i;
+        struct st_bucket *curr;
+        curr = all;
+        while (curr != NULL)
+         {
+            for (i = 0; i < 5; ++i)
+             if ((curr->pos[i]) == 1)
+               {
+                 if (i == MAINHYP)
+                   print_mainhyp(about,out,outrel,outsort,curr->id,curr->depths);
+                  else if (i == MAINCONCL)
+                    print_mainconcl(about,out,outrel,outsort,curr->id,curr->main_depth);
+                 else
+                   print_one(out,curr->id,i);
+               }
+            curr = curr->all_next;
+          }
+}
+
+
+/****************************************************************/
+/* 5. Definitions of functions local to the module.            */
+/****************************************************************/
+
+struct int_list  *add(l,m)
+     struct int_list    *l;
+     int               m;
+{
+       struct int_list *curr;
+        /* scan the list looking for m */
+        curr = l;
+        while ((curr != NULL) && (m != (curr->val)))
+            curr = curr->next;
+       if (curr == NULL)
+          /* m is not in the list */
+          {
+            curr = (struct int_list *)malloc(sizeof(struct int_list));
+            curr->val = m;
+           curr->next = l;
+            return curr;
+          }
+       else
+       return l;
+       
+}
+
+print_mainhyp(about,out,outrel,outsort,uri,l)
+     char                *about;
+     FILE                *out,
+                         *outrel,
+                         *outsort;
+     char                *uri;
+     struct int_list     *l;
+{
+    struct int_list *curr;
+    curr = l;
+    if (!strcmp(uri,"Rel"))
+      {
+        /* scan the list */
+        while (curr != NULL)
+         {
+            fprintf(outrel,"\t<h:Object rdf:about=\"");
+            fprintf(outrel,"%s",about);
+            fprintf(outrel,"\">\n");
+           fprintf(outrel,"\t\t<h:refRel rdf:parseType=\"Resource\">");
+            fprintf(outrel,"\n\t\t\t\t<h:position rdf:resource=\"&hns;MainHypothesis\"/>");
+            fprintf(outrel,"\n\t\t\t\t<h:depth>%d</h:depth>",curr->val);
+            fprintf(outrel,"\n\t\t</h:refRel>\n");
+           fprintf(outrel,"\t</h:Object>\n");
+            curr = curr->next;
+         }
+      }
+   else if ((!strcmp(uri,"Prop")) || (!strcmp(uri,"Type")) ||
+            (!strcmp(uri,"Set")))
+      {
+        /* scan the list */
+        while (curr != NULL)
+         {
+            fprintf(outsort,"\t<h:Object rdf:about=\"");
+            fprintf(outsort,"%s",about);
+            fprintf(outsort,"\">\n");
+            fprintf(outsort,"\t\t<h:refSort rdf:parseType=\"Resource\">");
+            fprintf(outsort,"\n\t\t\t\t<h:position rdf:resource=\"&hns;MainHypothesis\"/>");
+            fprintf(outsort,"\n\t\t\t\t<h:sort rdf:resource=\"&hns;%s\"/>",uri);
+            fprintf(outsort,"\n\t\t\t\t<h:depth>%d</h:depth>",curr->val);
+            fprintf(outsort,"\n\t\t</h:refSort>\n");
+           fprintf(outsort,"\t</h:Object>\n");
+            curr = curr->next;
+         }
+      }
+    else 
+     {
+        /* scan the list */
+        while (curr != NULL)
+         {
+           fprintf(out,"\t\t<h:refObj rdf:parseType=\"Resource\">");
+            fprintf(out,"\n\t\t\t\t<h:position rdf:resource=\"&hns;MainHypothesis\"/>");
+            fprintf(out,"\n\t\t\t\t<h:depth>%d</h:depth>",curr->val);
+            fprintf(out,"\n\t\t\t\t<h:occurrence><h:Object rdf:about=\"%s\"/></h:occurrence>",uri);
+            fprintf(out,"\n\t\t</h:refObj>\n");
+            curr = curr->next;
+         }
+      }
+}
+
+print_mainconcl(about,out,outrel,outsort,uri,depth)
+     char    *about;
+     FILE    *out,
+             *outrel,
+             *outsort;
+     char    *uri;
+     int     depth;
+     
+{
+    if (!strcmp(uri,"Rel"))
+      { 
+        fprintf(outrel,"\t<h:Object rdf:about=\"");
+        fprintf(outrel,"%s",about);
+        fprintf(outrel,"\">\n");
+       fprintf(outrel,"\t\t<h:refRel rdf:parseType=\"Resource\">");
+       fprintf(outrel,"\n\t\t\t\t<h:position rdf:resource=\"&hns;MainConclusion\"/>");
+       fprintf(outrel,"\n\t\t\t\t<h:depth>%d</h:depth>",depth);
+       fprintf(outrel,"\n\t\t</h:refRel>\n");
+        fprintf(outrel,"\t</h:Object>\n");
+      }
+    else if ((!strcmp(uri,"Prop")) || (!strcmp(uri,"Type")) ||
+            (!strcmp(uri,"Set")))
+      {
+        fprintf(outsort,"\t<h:Object rdf:about=\"");
+        fprintf(outsort,"%s",about);
+        fprintf(outsort,"\">\n");
+       fprintf(outsort,"\t\t<h:refSort rdf:parseType=\"Resource\">");
+       fprintf(outsort,"\n\t\t\t\t<h:position rdf:resource=\"&hns;MainConclusion\"/>");
+        fprintf(outsort,"\n\t\t\t\t<h:sort rdf:resource=\"&hns;%s\"/>",uri);
+       fprintf(outsort,"\n\t\t\t\t<h:depth>%d</h:depth>",depth);
+       fprintf(outsort,"\n\t\t</h:refSort>\n");
+        fprintf(outsort,"\t</h:Object>\n");
+      }
+    else
+      {
+       fprintf(out,"\t\t<h:refObj rdf:parseType=\"Resource\">");
+       fprintf(out,"\n\t\t\t\t<h:position rdf:resource=\"&hns;MainConclusion\"/>");
+       fprintf(out,"\n\t\t\t\t<h:depth>%d</h:depth>",depth);
+       fprintf(out,"\n\t\t\t\t<h:occurrence><h:Object rdf:about=\"%s\"/></h:occurrence>",uri);
+       fprintf(out,"\n\t\t</h:refObj>\n");
+      }
+}
+
+// dome: cambiata per usare il modello con position
+print_one(out,uri,pos)
+     FILE    *out;
+     char    *uri;
+     int     pos;
+{
+    fprintf(out,"\t\t<h:refObj df:parseType=\"Resource\">");
+    fprintf(out,"\n\t\t\t\t<h:position rdf:resource=\"&hns;");
+    if (pos == INBODY)
+       fprintf(out,"InBody");
+    else if (pos == MAINHYP)
+       fprintf(out,"MainHypothesis");
+    else if (pos == INHYP)
+       fprintf(out,"InHypothesis");
+    else if (pos == INCONCL)
+       fprintf(out,"InConclusion");
+    else if (pos == MAINCONCL)
+       fprintf(out,"MainConclusion");
+    fprintf(out,"\"/>\n\t\t\t\t<h:occurrence><h:Object rdf:about=\"%s\"/></h:occurrence>\n\t\t</h:refObj>\n", uri);
+    
+}
+
+ /* The following function allocates a bucket for an identifier. */
+allocate_bucket(st, id, where)
+       struct st_bucket
+                        **st;
+                                       /* pointer to the bucket to be */
+                                       /* allocated */
+       char            *id;
+                                       /* identifier */
+        int             where;
+{
+        int i;
+
+       *st = (struct st_bucket *)malloc(sizeof(struct st_bucket));
+       (*st)->id = (char *)malloc(sizeof('a')*(strlen(id) + 1));
+        strcpy((*st)->id,id);
+        (*st)->main_depth = 0;
+        (*st)->depths = NULL;
+       (*st)->next_st_bucket = NULL;
+        (*st)->all_next = all;
+        all = *st;
+        for (i = 0; i < 5; ++i)
+         (*st)->pos[i] = 0;
+        (*st)->pos[where] = 1;
+}
+
+ /* The following function moves a bucket to the head of the */
+ /* list in which it lies. */
+move_bucket(st, dict_index)
+       struct st_bucket 
+                        *st;
+                                       /* pointer to the bucket to */
+                                       /* be moved */
+       int             dict_index;
+                                       /* index corresponding to */
+                                       /* the list in which the */
+                                       /* bucket lies */
+{
+       st->next_st_bucket = dictionary[dict_index];
+       dictionary[dict_index] = st;
+}
+
+ /* The following function implements Weinberger's hash function. */
+int
+hash_pjw(id)
+       char            *id;
+                                       /* identifier to be hashed */
+{
+       unsigned        h,
+                       g;
+
+       for (h = 0; *id != EOS; id++)
+       {
+               h = (h << HASH1) + (*id);
+               if (g = h & HASH2)
+                       h = h ^ (g >> HASH3) ^ g;
+       }
+       return(h % DICTSIZE);
+}
+
+
+
+
+
+
diff --git a/helm/metadata/extractor/sthandler_ind.h b/helm/metadata/extractor/sthandler_ind.h
new file mode 100644 (file)
index 0000000..2536b0f
--- /dev/null
@@ -0,0 +1,8 @@
+/****************************************************************/
+/*                          STHANDLER.H                        */
+/****************************************************************/
+
+            
+extern  void            init_symbol_table();
+extern  void            print_all(char *, FILE *, FILE *, FILE *);
+extern  int             search_bucket(char *, int, int);