From 407c75f92f1a641dc49243209cd246f6affd924d Mon Sep 17 00:00:00 2001 From: Claudio Sacerdoti Coen Date: Mon, 23 Feb 2004 13:12:40 +0000 Subject: [PATCH] New framework for metadata generation. NOTE: from this release on the metadata are inserted into a MySQL DB. --- helm/metadata/Makefile | 6 + helm/metadata/dc/Makefile | 32 ++ helm/metadata/dc/fill_db.pl | 53 +++ helm/metadata/dc/sql/create_dc_tables.sql | 58 +++ helm/metadata/dc/sql/drop_dc_tables.sql | 18 + helm/metadata/extractor/.cvsignore | 4 + helm/metadata/extractor/Makefile | 28 ++ helm/metadata/extractor/README | 9 + helm/metadata/extractor/meta_lex.l | 363 +++++++++++++++++ helm/metadata/extractor/meta_lex_ind.l | 357 +++++++++++++++++ helm/metadata/extractor/sthandler.c | 453 ++++++++++++++++++++++ helm/metadata/extractor/sthandler.h | 8 + helm/metadata/extractor/sthandler_ind.c | 424 ++++++++++++++++++++ helm/metadata/extractor/sthandler_ind.h | 8 + 14 files changed, 1821 insertions(+) create mode 100644 helm/metadata/Makefile create mode 100644 helm/metadata/dc/Makefile create mode 100755 helm/metadata/dc/fill_db.pl create mode 100644 helm/metadata/dc/sql/create_dc_tables.sql create mode 100644 helm/metadata/dc/sql/drop_dc_tables.sql create mode 100644 helm/metadata/extractor/.cvsignore create mode 100644 helm/metadata/extractor/Makefile create mode 100644 helm/metadata/extractor/README create mode 100644 helm/metadata/extractor/meta_lex.l create mode 100644 helm/metadata/extractor/meta_lex_ind.l create mode 100644 helm/metadata/extractor/sthandler.c create mode 100644 helm/metadata/extractor/sthandler.h create mode 100644 helm/metadata/extractor/sthandler_ind.c create mode 100644 helm/metadata/extractor/sthandler_ind.h diff --git a/helm/metadata/Makefile b/helm/metadata/Makefile new file mode 100644 index 000000000..4708eb22d --- /dev/null +++ b/helm/metadata/Makefile @@ -0,0 +1,6 @@ +all: + rm -f fill_db.sql + time for i in `cat tipi_induttivi.txt` ; do (cd tmp ; wget -t 1 -O downloaded.xml.gz "http://mowgli.cs.unibo.it:58081/getxml?format=gz&uri=$$i") ; zcat tmp/downloaded.xml.gz > tmp/inductive_type.xml ; extractor/meta_ind $$i "tmp/inductive_type.xml" >> fill_db.sql ; rm -f tmp/downloaded.xml.gz tmp/inductive_type.xml; done > log 2>&1 + time for i in `cat costanti_e_variabili.txt` ; do (cd tmp ; wget -t 1 -O downloaded.xml.gz "http://mowgli.cs.unibo.it:58081/getxml?format=gz&uri=$$i" ; wget -t 1 -O downloaded_body.xml.gz "http://mowgli.cs.unibo.it:58081/getxml?format=gz&uri=$$i.body"); zcat tmp/downloaded.xml.gz > tmp/type.xml ; zcat tmp/downloaded_body.xml.gz > tmp/body.xml ; extractor/meta $$i "tmp/body.xml" "tmp/type.xml" >> fill_db.sql ; rm -f tmp/downloaded.xml.gz tmp/downloaded_body.xml.gz tmp/type.xml tmp/body.xml ; done > log 2>&1 + +.PHONY: all diff --git a/helm/metadata/dc/Makefile b/helm/metadata/dc/Makefile new file mode 100644 index 000000000..c9a7c73d6 --- /dev/null +++ b/helm/metadata/dc/Makefile @@ -0,0 +1,32 @@ + +SQL_DB = mowgli +SQL_DIR = sql +SQL_HOST = localhost +SQL_PASSWORD = bjIcRpru +SQL_USER = helmadmin +DC_DIR = /projects/helm/library/dc/ + +SQL_ENGINE = \ + mysql -h $(SQL_HOST) -u $(SQL_USER) --password=$(SQL_PASSWORD) $(SQL_DB) + +all: + @echo "try one of:" + @echo " make connect" + @echo " make create_tables" + @echo " make fill_db" + @echo " make drop_tables" + +connect: + $(SQL_ENGINE) + +create_tables: + $(SQL_ENGINE) < $(SQL_DIR)/create_dc_tables.sql + +fill_db: + find $(DC_DIR) -name "*.xml" -exec ./fill_db.pl {} \; | $(SQL_ENGINE) + +drop_tables: + $(SQL_ENGINE) < $(SQL_DIR)/drop_dc_tables.sql + +.PHONY: all connect create_tables fill_db drop_tables + diff --git a/helm/metadata/dc/fill_db.pl b/helm/metadata/dc/fill_db.pl new file mode 100755 index 000000000..fed21ee5a --- /dev/null +++ b/helm/metadata/dc/fill_db.pl @@ -0,0 +1,53 @@ +#!/usr/bin/perl -w +use strict; + +use XML::Parser; + +my $skipped = 0; +my $open = 0; +my $content = ""; +my $uri = ""; + +sub handle_start($$@) { + my ($expat, $element, @attrs) = @_; + if ($skipped == 1) { + $uri = $attrs[1]; + $uri =~ s/'/''/g; + } + if ($skipped < 2) { $skipped++ ; } + else { + $open++; + $content = ""; + } +} + +sub handle_end($$) { + my ($expat,$element) = @_; + $open--; + if ($open >= 0) { + $content =~ s/'/''/g; + $element =~ s/(\w+):(\w+)/$1$2/; + my $query = "INSERT INTO $element VALUES ('$content','$uri');"; + print $query, "\n"; + } +} + +sub handle_char($$) { + my ($expat,$char) = @_; + if ($open >= 1) { + chomp($char); + $char =~ s/ +/ /g; + $char =~ s/^ //g; + $char =~ s/ $//g; + $content .= $char; + } +} + +my $p = new XML::Parser( + Handlers => + { Start => \&handle_start, + End => \&handle_end, + Char => \&handle_char}); + +$p->parsefile($ARGV[0]); + diff --git a/helm/metadata/dc/sql/create_dc_tables.sql b/helm/metadata/dc/sql/create_dc_tables.sql new file mode 100644 index 000000000..ca53c5b36 --- /dev/null +++ b/helm/metadata/dc/sql/create_dc_tables.sql @@ -0,0 +1,58 @@ + +create table dccreator (value varchar(255), uri varchar(255)); +create table dcdate (value varchar(255), uri varchar(255)); +create table dcdescription (value varchar(255), uri varchar(255)); +create table dcformat (value varchar(255), uri varchar(255)); +create table dcidentifier (value varchar(255), uri varchar(255)); +create table dclanguage (value varchar(255), uri varchar(255)); +create table dcpublisher (value varchar(255), uri varchar(255)); +create table dcqRelationType (value varchar(255), uri varchar(255)); +create table dcrelation (value varchar(255), uri varchar(255)); +create table dcrights (value varchar(255), uri varchar(255)); +create table dcsource (value varchar(255), uri varchar(255)); +create table dcsubject (value varchar(255), uri varchar(255)); +create table dctitle (value varchar(255), uri varchar(255)); +create table hthResourceFormat (value varchar(255), uri varchar(255)); +create table hthcontact (value varchar(255), uri varchar(255)); +create table hthfirstVersion (value varchar(255), uri varchar(255)); +create table hthinstitution (value varchar(255), uri varchar(255)); +create table hthmodified (value varchar(255), uri varchar(255)); + +create index dccreator_index on dccreator (value); +create index dcdate_index on dcdate (value); +create index dcdescription_index on dcdescription (value); +create index dcformat_index on dcformat (value); +create index dcidentifier_index on dcidentifier (value); +create index dclanguage_index on dclanguage (value); +create index dcpublisher_index on dcpublisher (value); +create index dcqRelationType_index on dcqRelationType (value); +create index dcrelation_index on dcrelation (value); +create index dcrights_index on dcrights (value); +create index dcsource_index on dcsource (value); +create index dcsubject_index on dcsubject (value); +create index dctitle_index on dctitle (value); +create index hthResourceFormat_index on hthResourceFormat (value); +create index hthcontact_index on hthcontact (value); +create index hthfirstVersion_index on hthfirstVersion (value); +create index hthinstitution_index on hthinstitution (value); +create index hthmodified_index on hthmodified (value); + +create index dccreator_rev_index on dccreator (uri); +create index dcdate_rev_index on dcdate (uri); +create index dcdescription_rev_index on dcdescription (uri); +create index dcformat_rev_index on dcformat (uri); +create index dcidentifier_rev_index on dcidentifier (uri); +create index dclanguage_rev_index on dclanguage (uri); +create index dcpublisher_rev_index on dcpublisher (uri); +create index dcqRelationType_rev_index on dcqRelationType (uri); +create index dcrelation_rev_index on dcrelation (uri); +create index dcrights_rev_index on dcrights (uri); +create index dcsource_rev_index on dcsource (uri); +create index dcsubject_rev_index on dcsubject (uri); +create index dctitle_rev_index on dctitle (uri); +create index hthResourceFormat_rev_index on hthResourceFormat (uri); +create index hthcontact_rev_index on hthcontact (uri); +create index hthfirstVersion_rev_index on hthfirstVersion (uri); +create index hthinstitution_rev_index on hthinstitution (uri); +create index hthmodified_rev_index on hthmodified (uri); + diff --git a/helm/metadata/dc/sql/drop_dc_tables.sql b/helm/metadata/dc/sql/drop_dc_tables.sql new file mode 100644 index 000000000..d13a2a8ed --- /dev/null +++ b/helm/metadata/dc/sql/drop_dc_tables.sql @@ -0,0 +1,18 @@ +drop table dccreator; +drop table dcdate; +drop table dcdescription; +drop table dcformat; +drop table dcidentifier; +drop table dclanguage; +drop table dcpublisher; +drop table dcqRelationType; +drop table dcrelation; +drop table dcrights; +drop table dcsource; +drop table dcsubject; +drop table dctitle; +drop table hthResourceFormat; +drop table hthcontact; +drop table hthfirstVersion; +drop table hthinstitution; +drop table hthmodified; diff --git a/helm/metadata/extractor/.cvsignore b/helm/metadata/extractor/.cvsignore new file mode 100644 index 000000000..1d5ca6be6 --- /dev/null +++ b/helm/metadata/extractor/.cvsignore @@ -0,0 +1,4 @@ +meta +meta_ind +lex.yy.c +lex.yy_ind.c diff --git a/helm/metadata/extractor/Makefile b/helm/metadata/extractor/Makefile new file mode 100644 index 000000000..4b6f01aa8 --- /dev/null +++ b/helm/metadata/extractor/Makefile @@ -0,0 +1,28 @@ +CC = gcc -Wall + +all: meta meta_ind + +meta: lex.yy.o sthandler.o + $(CC) lex.yy.o sthandler.o -lpq -o meta + +meta_ind: lex.yy_ind.o sthandler.o + $(CC) lex.yy_ind.o sthandler.o -lpq -o meta_ind + +lex.yy.c: meta_lex.l sthandler.h + flex meta_lex.l + +lex.yy_ind.c: meta_lex_ind.l sthandler.h + flex -olex.yy_ind.c meta_lex_ind.l + +sthandler.o: sthandler.c sthandler.h + +lex.yy.o: lex.yy.c sthandler.h + $(CC) -c lex.yy.c + +lex.yy_ind.o: lex.yy_ind.c sthandler.h + $(CC) -c lex.yy_ind.c + +clean: + -rm -f *.o + -rm -f lex.yy.c lex.yy_ind.c + -rm -f meta meta_ind diff --git a/helm/metadata/extractor/README b/helm/metadata/extractor/README new file mode 100644 index 000000000..6c9fe0bcb --- /dev/null +++ b/helm/metadata/extractor/README @@ -0,0 +1,9 @@ +Note: + - LetIn e Variabili con corpo: da pensarci (capita solamente una 30ina + di volte... per ora!) Per il momento ci mettiamo una pezza. + - Variabili: non consideriamo l'occorrenza di una variabile come una + vera occorrenza (perche' puo' essere istanziata). In ogni caso c'e' + l'attributo @params che fornisce questa informazione. + - META e IMPLICIT non trattati + - CAST non considerati ==> di default vado in ricorsione sia sul tipo + che sul corpo diff --git a/helm/metadata/extractor/meta_lex.l b/helm/metadata/extractor/meta_lex.l new file mode 100644 index 000000000..35bfcc9ba --- /dev/null +++ b/helm/metadata/extractor/meta_lex.l @@ -0,0 +1,363 @@ + /******************************************************************/ + /* Copyright (C) 2000, HELM Team */ + /* */ + /* This file is part of HELM, an Hypertextual, Electronic */ + /* Library of Mathematics, developed at the Computer Science */ + /* Department, University of Bologna, Italy. */ + /* */ + /* HELM is free software; you can redistribute it and/or */ + /* modify it under the terms of the GNU General Public License */ + /* as published by the Free Software Foundation; either version */ + /* 2 of the License, or (at your option) any later version. */ + /* */ + /* HELM is distributed in the hope that it will be useful, */ + /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ + /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ + /* GNU General Public License for more details. */ + /* */ + /* You should have received a copy of the GNU General Public */ + /* License along with HELM; if not, write to the Free Software */ + /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, */ + /* MA 02111-1307, USA. */ + /* */ + /* For details, see the HELM World-Wide-Web page, */ + /* http://cs.unibo.it/helm/. */ + /******************************************************************/ + + /***************************************************************/ + /* META_LEXAN */ + /* Automatic Metadata Extractor */ + /* First draft 11/12/2001, by Andrea Asperti */ + /* more bugs added by domenico lordi on mon 12/17/2001 */ + /***************************************************************/ + + /***************************************************************/ + /* 1. Inclusion of header files. */ + /***************************************************************/ + +%{ +#include +#include +#include +#include "sthandler.h" +%} + + /***************************************************************/ + /* 2. Constants and Variables Definitions */ + /***************************************************************/ + +%{ +#define NOWHERE 0 +#define CONST 1 +#define MUTIND 2 +#define MUTCONSTRUCT 3 +#define SORT 4 + +#define INBODY 0 +#define MAINHYP 1 +#define INHYP 2 +#define INCONCL 3 +#define MAINCONCL 4 +#define INTYPE 5 +#define NOTFOUND 6 + +#define HERE 0 +#define AFTER 1 + + +int where = NOWHERE; +int found = NOTFOUND; +int position = INBODY; +int first_child = HERE; +int skip = 0; // boolean to skip the insertion of a URI +int no_open_source = 0; +int spine_depth = 0; +int depth = 0; +int tmp_n; +char sep = '"'; +char *xpointer = "#xpointer(1/"; +char *uri; +char *tmp; + +void search(char *uri, int first_child, int position, int depth); +%} + + /***************************************************************/ + /* 3. Regular definitions. */ + /***************************************************************/ + +uri [^"]+ +digits [0-9]+ +value [^"]+ + + /***************************************************************/ + /* 4. Rules. */ + /***************************************************************/ + + +%% + +"]*">"(" "|\n)*""(" "|\n)*"" | +"" { + if (position == INHYP) + { + no_open_source--; + if (no_open_source == 0) + { + position = MAINHYP; + depth++; + first_child = HERE; + } + } + else if (position == MAINHYP) + { + position = INTYPE; + spine_depth++; + depth = 0; + first_child = HERE; + } + /* bug? first_child = HERE; */ + } + + +.|\n { + } + +" \n"); + exit(1); + } + + + /* initialize the symbol table */ + init_symbol_table(); + + // We process the body + if (!stat(argv[2],&buf)) + { + yyin = fopen(argv[2], "r"); + position = INBODY; + yylex(); + fclose(yyin); + } + + // We process the type + yyin = fopen(argv[3], "r"); + position = INTYPE; + first_child = HERE; + no_open_source = 0; + spine_depth = 0; + depth = 0; + yylex(); + fclose(yyin); + print_all(argv[1]); + + return 0; +} + + +void search(uri,first_child,position,depth) +char *uri; +int first_child; +int position; +{ + if (position == MAINHYP) + { + if (first_child == HERE) + found = search_bucket(uri,MAINHYP,depth); + else + found = search_bucket(uri,INHYP,0); + } + else if (position == INCONCL) + { + if (first_child == HERE) + found = search_bucket(uri,MAINCONCL,depth); + else + found = search_bucket(uri,INCONCL,0); + } + + else + found = search_bucket(uri,position,depth); + /* + if (found == NOTFOUND) + fprintf(stderr,"here = %d, pos = %d, uri = %s\n", first_child,position, uri); */ +} +/* + (first_child == HERE) + { + if (position == MAINHYP) + found = search_bucket(uri,MAINHYP,depth); + else if (position == INCONCL) + found = search_bucket(uri,MAINCONCL,0); + else if (position == INHYP) + found = search_bucket(uri,INHYP,0); + if (found == NOTFOUND) + printf( "pos = %d, uri = %s\n", MAINCONCL, uri); + } + else if ((position == MAINHYP) && (first_child == AFTER)) + found = search_bucket(uri,INHYP,0); + else found = search_bucket(uri,position,0); + if (found == NOTFOUND) + printf( "pos = %d, uri = %s\n", position, uri); + } */ + +int yywrap() { + return 1; + } + + + + diff --git a/helm/metadata/extractor/meta_lex_ind.l b/helm/metadata/extractor/meta_lex_ind.l new file mode 100644 index 000000000..2f844f9b6 --- /dev/null +++ b/helm/metadata/extractor/meta_lex_ind.l @@ -0,0 +1,357 @@ + /******************************************************************/ + /* Copyright (C) 2000, HELM Team */ + /* */ + /* This file is part of HELM, an Hypertextual, Electronic */ + /* Library of Mathematics, developed at the Computer Science */ + /* Department, University of Bologna, Italy. */ + /* */ + /* HELM is free software; you can redistribute it and/or */ + /* modify it under the terms of the GNU General Public License */ + /* as published by the Free Software Foundation; either version */ + /* 2 of the License, or (at your option) any later version. */ + /* */ + /* HELM is distributed in the hope that it will be useful, */ + /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ + /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ + /* GNU General Public License for more details. */ + /* */ + /* You should have received a copy of the GNU General Public */ + /* License along with HELM; if not, write to the Free Software */ + /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, */ + /* MA 02111-1307, USA. */ + /* */ + /* For details, see the HELM World-Wide-Web page, */ + /* http://cs.unibo.it/helm/. */ + /******************************************************************/ + + /***************************************************************/ + /* META_LEXAN */ + /* Automatic Metadata Extractor */ + /* First draft 11/12/2001, by Andrea Asperti */ + /* more bugs added by domenico lordi on mon 12/17/2001 */ + /***************************************************************/ + + /***************************************************************/ + /* 1. Inclusion of header files. */ + /***************************************************************/ + +%{ +#include +#include +#include +#include "sthandler.h" +%} + + /***************************************************************/ + /* 2. Constants and Variables Definitions */ + /***************************************************************/ + +%{ +#define NOWHERE 0 +#define CONST 1 +#define MUTIND 2 +#define MUTCONSTRUCT 3 +#define SORT 4 + +#define INBODY 0 +#define MAINHYP 1 +#define INHYP 2 +#define INCONCL 3 +#define MAINCONCL 4 +#define INTYPE 5 +#define NOTFOUND 6 + +#define HERE 0 +#define AFTER 1 + + +int where = NOWHERE; +int found = NOTFOUND; +int position = INBODY; +int first_child = HERE; +int skip = 0; // boolean to skip the insertion of a URI +int no_open_source =0; +int spine_depth = 0; +int depth = 0; +int tmp_n; +int inductive_type = 0; +int constructor = 0; +int deep_type = 0; +char sep = '"'; +char *xpointer = "#xpointer(1/"; +char *uri; +char *tmp; +char *source_uri; +char *source_uri_prefix; + +void search(char *uri, int first_child, int position, int depth); +%} + + /***************************************************************/ + /* 3. Regular definitions. */ + /***************************************************************/ + +uri [^"]+ +digits [0-9]+ +value [^"]+ + + /***************************************************************/ + /* 4. Rules. */ + /***************************************************************/ + + +%% + +"" { tmp = (char *)malloc(sizeof('a')*128); + strcpy(source_uri,source_uri_prefix); + sprintf(tmp,"#xpointer(1/%d)", inductive_type); + strcat(source_uri,tmp); + /* fprintf(stderr,"cinque"); */ + free(tmp); + print_all(source_uri); + /* print_file(); */ + } + +"" { tmp = (char *)malloc(sizeof('a')*128); + strcpy(source_uri,source_uri_prefix); + sprintf(tmp,"#xpointer(1/%d/%d)",inductive_type,constructor); + strcat(source_uri,tmp); + free(tmp); + print_all(source_uri); + /* print_file(); */ + } + +"" | +"" { + if (position == INHYP) + { + no_open_source--; + if (no_open_source == 0) + { + position = MAINHYP; + depth++; + first_child = HERE; + } + } + else if (position == MAINHYP) + { + position = INTYPE; + spine_depth++; + depth = 0; + first_child = HERE; + } + } + + +.|\n { + } + +" \n"); + exit(1); + } + + source_uri = malloc((sizeof('a')*2000)); + source_uri_prefix=argv[1]; + /* fprintf(stderr,"qua"); */ + yyin = fopen(argv[2], "r"); + yylex(); + + return 0; +} + +void search(uri,first_child,position,depth) +char *uri; +int first_child; +int position; +{ + if (position == MAINHYP) + { + if (first_child == HERE) + found = search_bucket(uri,MAINHYP,depth); + else + found = search_bucket(uri,INHYP,0); + } + else if (position == INCONCL) + { + if (first_child == HERE) + found = search_bucket(uri,MAINCONCL,depth); + else + found = search_bucket(uri,INCONCL,0); + } + + else + found = search_bucket(uri,position,depth); + /* if (found == NOTFOUND) + printf( "pos = %d, uri = %s\n", position, uri); */ +} + + +int yywrap() { + return 1; + } + + + + diff --git a/helm/metadata/extractor/sthandler.c b/helm/metadata/extractor/sthandler.c new file mode 100644 index 000000000..6f87cdfb5 --- /dev/null +++ b/helm/metadata/extractor/sthandler.c @@ -0,0 +1,453 @@ +/*********************************************************************/ +/* Copyright (C) 2000, HELM Team */ +/* */ +/* This file is part of HELM, an Hypertextual, Electronic */ +/* Library of Mathematics, developed at the Computer Science */ +/* Department, University of Bologna, Italy. */ +/* */ +/* HELM is free software; you can redistribute it and/or */ +/* modify it under the terms of the GNU General Public License */ +/* as published by the Free Software Foundation; either version 2 */ +/* of the License, or (at your option) any later version. */ +/* */ +/* HELM is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with HELM; if not, write to the Free Software */ +/* Foundation, Inc., 59 Temple Place - Suite 330, Boston, */ +/* MA 02111-1307, USA. */ +/* */ +/* For details, see the HELM World-Wide-Web page, */ +/* http://cs.unibo.it/helm/. */ + /*********************************************************************/ + +/****************************************************************/ +/* STHANDLER.C */ +/****************************************************************/ +/* This module supplies routines for symbol table handling. */ +/* - init_symbol_table(): it initializes the symbol table */ +/* to void. */ +/* - search_bucket(): it searches the symbol table for the */ +/* bucket containing a given identifier, and */ +/* inserts it if it is not present; */ +/****************************************************************/ +/* First draft 11/12/2001, by Andrea Asperti */ +/****************************************************************/ + +/****************************************************************/ +/* 1. Inclusion of header files. */ +/****************************************************************/ + +#include +#include +#include + +/****************************************************************/ +/* 2. Declarations */ +/****************************************************************/ + + +#define DICTSIZE 211 +#define HASH1 4 +#define HASH2 0xf0000000 +#define HASH3 28 +#define EOS '\0' + +#define INBODY 0 +#define MAINHYP 1 +#define INHYP 2 +#define INCONCL 3 +#define MAINCONCL 4 +#define INTYPE 5 +#define NOTFOUND 6 + +/****************************************************************/ +/* 3. Types. */ +/****************************************************************/ + +struct int_list { + int val; + struct int_list *next; + }; + +struct st_bucket { + char *id; + /* identifier */ + int main_depth; + struct int_list *depths; + struct st_bucket *next_st_bucket; + /* next bucket in the list */ + struct st_bucket *all_next; + /* all buckets in symbol + table are linked together */ + int pos[5]; + + }; + +struct st_bucket *dictionary[DICTSIZE]; + /* pointers to bucket lists */ + +/****************************************************************/ +/* 4. Local functions. */ +/****************************************************************/ +struct int_list *add(struct int_list *,int); +void allocate_bucket(struct st_bucket **st, char *id, int where); +void print_mainhyp(char *about, char *uri,struct int_list *l); +void print_mainconcl(char *about, char *uri, int depth); +void move_bucket(struct st_bucket *st, int dict_index); +void print_one(char *about, char *uri, int pos); +int hash_pjw(char *id); + +/* This function is copied from the file fe-exec.c of PostgreSQL. */ +/* Copyright (c) 1996-2003, PostgreSQL Global Development Group */ +/* Copyright (c) 1994, Regents of the University of California */ +size_t +PQescapeString(char *to, const char *from, size_t length) +{ + const char *source = from; + char *target = to; + size_t remaining = length; + + while (remaining > 0 && *source != '\0') + { + switch (*source) + { + case '\\': + *target++ = '\\'; + *target++ = '\\'; + break; + + case '\'': + *target++ = '\''; + *target++ = '\''; + break; + + default: + *target++ = *source; + break; + } + source++; + remaining--; + } + + /* Write the terminating NUL character. */ + *target = '\0'; + + return target - to; +} + + +/****************************************************************/ +/* 5. Definitions of functions to be exported. */ +/****************************************************************/ + +struct st_bucket *all; + + /* The following function initializes the symbol table to NULL */ +void init_symbol_table() +{ + int i; + + /* initialize the dictionary */ + for (i = 0; i < DICTSIZE; i++) + dictionary[i] = NULL; + all = NULL; +} + + /* The following function searches the symbol table for an identifier */ + /* and inserts it if it is not present. */ + /* The bucket associated with the given identifier */ + /* becomes the first one in its list. */ + +int search_bucket(id, where, depth) + char *id; + /* identifier */ + int where; + int depth; +{ + int dict_index; + /* value returned by the */ + /* hash function */ + struct st_bucket + *prev, + *curr; + + struct st_bucket *st; + + /* apply the hash function */ + dict_index = hash_pjw(id); + /* fprintf(stderr,"%d\n", dict_index); */ + + /* scan the bucket list indicated by the hash function */ + prev = curr = dictionary[dict_index]; + while ((curr != NULL) && (strcmp(id, curr->id))) + { + prev = curr; + curr = curr->next_st_bucket; + } + if (curr == NULL) + /* the identifier is not in the list */ + { + allocate_bucket(&st,id,where); + if (where == MAINCONCL) + st->main_depth = depth; + else if (where == MAINHYP) + st->depths = add(st->depths,depth); + move_bucket(st,dict_index); + return NOTFOUND; + } + else + /* + printf("uno=%s\n", id); + printf("st=%s\n", curr->id); fflush(stdout) */ + + /* the identifier is already in the list */ + { + /* st = curr; */ + curr->pos[where] = 1; + if (where >= 1) + curr->pos[INBODY] = 0; /* it will never be set again to 1 */ + if (where == MAINHYP) + curr->depths=add(curr->depths,depth); + else if (where == MAINCONCL) + curr->main_depth = depth; + if (prev != curr) + /* the identifier is not in the first position */ + { + prev->next_st_bucket = curr->next_st_bucket; + move_bucket(curr,dict_index); + }; + return where; + } +} + +void print_all(about,conn) + char *about; +{ + + int i; + struct st_bucket *curr; + curr = all; + while (curr != NULL) + { + for (i = 0; i < 5; ++i) + if ((curr->pos[i]) == 1) + { + if (i == MAINHYP) + print_mainhyp(about,curr->id,curr->depths); + else if (i == MAINCONCL) + print_mainconcl(about,curr->id,curr->main_depth); + else + print_one(about,curr->id,i); + } + curr = curr->all_next; + } +} + + +/****************************************************************/ +/* 5. Definitions of functions local to the module. */ +/****************************************************************/ + +struct int_list *add(l,m) + struct int_list *l; + int m; +{ + struct int_list *curr; + /* scan the list looking for m */ + curr = l; + while ((curr != NULL) && (m != (curr->val))) + curr = curr->next; + if (curr == NULL) + /* m is not in the list */ + { + curr = (struct int_list *)malloc(sizeof(struct int_list)); + curr->val = m; + curr->next = l; + return curr; + } + else + return l; + +} + +void print_mainhyp(about,uri,l) + char *about; + char *uri; + struct int_list *l; +{ + struct int_list *curr; + curr = l; + if (!strcmp(uri,"Rel")) + { + /* scan the list */ + while (curr != NULL) + { + size_t len = strlen(about) + 1; + char *qabout = malloc (sizeof(char) * len * 2); + PQescapeString(qabout,about,len); + printf("INSERT INTO refRel VALUES ('%s', 'http://www.cs.unibo.it/helm/schemas/schema-helm#MainHypothesis', %d);\n",qabout,curr->val); + curr = curr->next; + } + } + else if ((!strcmp(uri,"Prop")) || (!strcmp(uri,"Type")) || + (!strcmp(uri,"Set"))) + { + /* scan the list */ + while (curr != NULL) + { + size_t len = strlen(about) + 1; + char *qabout = malloc (sizeof(char) * len * 2); + PQescapeString(qabout,about,len); + printf("INSERT INTO refSort VALUES ('%s', 'http://www.cs.unibo.it/helm/schemas/schema-helm#MainHypothesis', %d, '%s');\n",qabout,curr->val,uri); + curr = curr->next; + } + } + else + { + /* scan the list */ + while (curr != NULL) + { + size_t len = strlen(about) + 1; + char *qabout = malloc (sizeof(char) * len * 2); + char *quri; + PQescapeString(qabout,about,len); + len = strlen(uri) + 1; + quri = malloc (sizeof(char) * len * 2); + PQescapeString(quri,uri,len); + printf("INSERT INTO refObj VALUES ('%s', '%s', 'http://www.cs.unibo.it/helm/schemas/schema-helm#MainHypothesis', %d);\n",qabout,quri,curr->val); + curr = curr->next; + } + } +} + +void print_mainconcl(about,uri,depth) + char *about; + char *uri; + int depth; + +{ + /* fprintf(stderr,"about = %s\n",about); */ + if (!strcmp(uri,"Rel")) + { + size_t len = strlen(about) + 1; + char *qabout = malloc (sizeof(char) * len * 2); + PQescapeString(qabout,about,len); + printf("INSERT INTO refRel VALUES ('%s', 'http://www.cs.unibo.it/helm/schemas/schema-helm#MainConclusion', %d);\n",qabout,depth); + } + else if ((!strcmp(uri,"Prop")) || (!strcmp(uri,"Type")) || + (!strcmp(uri,"Set"))) + { + size_t len = strlen(about) + 1; + char *qabout = malloc (sizeof(char) * len * 2); + PQescapeString(qabout,about,len); + printf("INSERT INTO refSort VALUES ('%s', 'http://www.cs.unibo.it/helm/schemas/schema-helm#MainConclusion', %d, '%s');\n",qabout,depth,uri); + } + else + { + size_t len = strlen(about) + 1; + char *qabout = malloc (sizeof(char) * len * 2); + char *quri; + PQescapeString(qabout,about,len); + len = strlen(uri) + 1; + quri = malloc (sizeof(char) * len * 2); + PQescapeString(quri,uri,len); + printf("INSERT INTO refObj VALUES ('%s', '%s','http://www.cs.unibo.it/helm/schemas/schema-helm#MainConclusion', %d);\n",qabout,quri,depth); + } +} + +// dome: cambiata per usare il modello con position +void print_one(about,uri,pos) + char *about, + *uri; + int pos; +{ + char *position = (char *)malloc((sizeof('a')*20)); + size_t len = strlen(about) + 1; + char *qabout = malloc (sizeof(char) * len * 2); + char *quri; + PQescapeString(qabout,about,len); + len = strlen(uri) + 1; + quri = malloc (sizeof(char) * len * 2); + PQescapeString(quri,uri,len); + if (pos == INBODY) + position="InBody"; + else if (pos == MAINHYP) + position="MainHypothesis"; /* This should never happen */ + else if (pos == INHYP) + position="InHypothesis"; + else if (pos == INCONCL) + position="InConclusion"; + else if (pos == MAINCONCL) + position="MainConclusion"; /* This should never happen */ + printf("INSERT INTO refObj VALUES ('%s', '%s', \ + 'http://www.cs.unibo.it/helm/schemas/schema-helm#%s', NULL);\n",qabout,quri,position); +} + + /* The following function allocates a bucket for an identifier. */ +void allocate_bucket(st, id, where) + struct st_bucket + **st; + /* pointer to the bucket to be */ + /* allocated */ + char *id; + /* identifier */ + int where; +{ + int i; + + *st = (struct st_bucket *)malloc(sizeof(struct st_bucket)); + (*st)->id = (char *)malloc(sizeof('a')*(strlen(id) + 1)); + strcpy((*st)->id,id); + (*st)->main_depth = 0; + (*st)->depths = NULL; + (*st)->next_st_bucket = NULL; + (*st)->all_next = all; + all = *st; + for (i = 0; i < 5; ++i) + (*st)->pos[i] = 0; + (*st)->pos[where] = 1; +} + + /* The following function moves a bucket to the head of the */ + /* list in which it lies. */ +void move_bucket(st, dict_index) + struct st_bucket + *st; + /* pointer to the bucket to */ + /* be moved */ + int dict_index; + /* index corresponding to */ + /* the list in which the */ + /* bucket lies */ +{ + st->next_st_bucket = dictionary[dict_index]; + dictionary[dict_index] = st; +} + + /* The following function implements Weinberger's hash function. */ +int +hash_pjw(id) + char *id; + /* identifier to be hashed */ +{ + unsigned h, + g; + + for (h = 0; *id != EOS; id++) + { + h = (h << HASH1) + (*id); + if ((g = h) & HASH2) + h = h ^ (g >> HASH3) ^ g; + } + return(h % DICTSIZE); +} + + + + + + diff --git a/helm/metadata/extractor/sthandler.h b/helm/metadata/extractor/sthandler.h new file mode 100644 index 000000000..b0305d1ec --- /dev/null +++ b/helm/metadata/extractor/sthandler.h @@ -0,0 +1,8 @@ +/****************************************************************/ +/* STHANDLER.H */ +/****************************************************************/ + + +extern void init_symbol_table(); +extern void print_all(char *); +extern int search_bucket(char *, int, int); diff --git a/helm/metadata/extractor/sthandler_ind.c b/helm/metadata/extractor/sthandler_ind.c new file mode 100644 index 000000000..ee035ef2a --- /dev/null +++ b/helm/metadata/extractor/sthandler_ind.c @@ -0,0 +1,424 @@ +/*********************************************************************/ +/* Copyright (C) 2000, HELM Team */ +/* */ +/* This file is part of HELM, an Hypertextual, Electronic */ +/* Library of Mathematics, developed at the Computer Science */ +/* Department, University of Bologna, Italy. */ +/* */ +/* HELM is free software; you can redistribute it and/or */ +/* modify it under the terms of the GNU General Public License */ +/* as published by the Free Software Foundation; either version 2 */ +/* of the License, or (at your option) any later version. */ +/* */ +/* HELM is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with HELM; if not, write to the Free Software */ +/* Foundation, Inc., 59 Temple Place - Suite 330, Boston, */ +/* MA 02111-1307, USA. */ +/* */ +/* For details, see the HELM World-Wide-Web page, */ +/* http://cs.unibo.it/helm/. */ + /*********************************************************************/ + +/****************************************************************/ +/* STHANDLER.C */ +/****************************************************************/ +/* This module supplies routines for symbol table handling. */ +/* - init_symbol_table(): it initializes the symbol table */ +/* to void. */ +/* - search_bucket(): it searches the symbol table for the */ +/* bucket containing a given identifier, and */ +/* inserts it if it is not present; */ +/****************************************************************/ +/* First draft 11/12/2001, by Andrea Asperti */ +/****************************************************************/ + +/****************************************************************/ +/* 1. Inclusion of header files. */ +/****************************************************************/ + +#include +#include + +/****************************************************************/ +/* 2. Declarations */ +/****************************************************************/ + + +#define DICTSIZE 211 +#define HASH1 4 +#define HASH2 0xf0000000 +#define HASH3 24 +#define EOS '\0' + +#define INBODY 0 +#define MAINHYP 1 +#define INHYP 2 +#define INCONCL 3 +#define MAINCONCL 4 +#define INTYPE 5 +#define NOTFOUND 6 + +/****************************************************************/ +/* 3. Types. */ +/****************************************************************/ + +struct int_list { + int val; + struct int_list *next; + }; + +struct st_bucket { + char *id; + /* identifier */ + int main_depth; + struct int_list *depths; + struct st_bucket *next_st_bucket; + /* next bucket in the list */ + struct st_bucket *all_next; + /* all buckets in symbol + table are linked together */ + int pos[5]; + + }; + +struct st_bucket *dictionary[DICTSIZE]; + /* pointers to bucket lists */ + +/****************************************************************/ +/* 4. Local functions. */ +/****************************************************************/ +struct int_list *add(struct int_list *,int); +int hash_pjw(char *id); + +/****************************************************************/ +/* 5. Definitions of functions to be exported. */ +/****************************************************************/ + +struct st_bucket *all; + + /* The following function initializes the symbol table to NULL */ +void init_symbol_table() +{ + struct st_bucket *st; + int i; + + /* initialize the dictionary */ + for (i = 0; i < DICTSIZE; i++) + dictionary[i] = NULL; + all = NULL; +} + + /* The following function searches the symbol table for an identifier */ + /* and inserts it if it is not present. + /* The bucket associated with the given identifier */ + /* becomes the first one in its list. */ + +search_bucket(id, where, depth) + char *id; + /* identifier */ + int where; + int depth; +{ + int dict_index; + /* value returned by the */ + /* hash function */ + struct st_bucket + *prev, + *curr; + + struct st_bucket *st; + + /* apply the hash function */ + dict_index = hash_pjw(id); + /* fprintf(stderr,"%d\n", dict_index); fflush(stdout); */ + + /* scan the bucket list indicated by the hash function */ + prev = curr = dictionary[dict_index]; + while ((curr != NULL) && (strcmp(id, curr->id))) + { + prev = curr; + curr = curr->next_st_bucket; + } + if (curr == NULL) + /* the identifier is not in the list */ + { + allocate_bucket(&st,id,where); + if (where == MAINCONCL) + st->main_depth = depth; + else if (where == MAINHYP) + st->depths = add(st->depths,depth); + move_bucket(st,dict_index); + return NOTFOUND; + } + else + /* + fprintf(stderr,"uno=%s\n", id); + fprintf(stderr,"st=%s\n", curr->id); fflush(stdout) */ + + /* the identifier is already in the list */ + { + /* st = curr; */ + curr->pos[where] = 1; + if (where >= 1) + curr->pos[INBODY] = 0; /* it will never be set again to 1 */ + if (where == MAINHYP) + curr->depths=add(curr->depths,depth); + else if (where == MAINCONCL) + curr->main_depth = depth; + if (prev != curr) + /* the identifier is not in the first position */ + { + prev->next_st_bucket = curr->next_st_bucket; + move_bucket(curr, + dict_index); + }; + return where; + } +} + +print_all(about,out,outrel,outsort) + char *about; + FILE *out, + *outrel, + *outsort; +{ + + int i; + struct st_bucket *curr; + curr = all; + while (curr != NULL) + { + for (i = 0; i < 5; ++i) + if ((curr->pos[i]) == 1) + { + if (i == MAINHYP) + print_mainhyp(about,out,outrel,outsort,curr->id,curr->depths); + else if (i == MAINCONCL) + print_mainconcl(about,out,outrel,outsort,curr->id,curr->main_depth); + else + print_one(out,curr->id,i); + } + curr = curr->all_next; + } +} + + +/****************************************************************/ +/* 5. Definitions of functions local to the module. */ +/****************************************************************/ + +struct int_list *add(l,m) + struct int_list *l; + int m; +{ + struct int_list *curr; + /* scan the list looking for m */ + curr = l; + while ((curr != NULL) && (m != (curr->val))) + curr = curr->next; + if (curr == NULL) + /* m is not in the list */ + { + curr = (struct int_list *)malloc(sizeof(struct int_list)); + curr->val = m; + curr->next = l; + return curr; + } + else + return l; + +} + +print_mainhyp(about,out,outrel,outsort,uri,l) + char *about; + FILE *out, + *outrel, + *outsort; + char *uri; + struct int_list *l; +{ + struct int_list *curr; + curr = l; + if (!strcmp(uri,"Rel")) + { + /* scan the list */ + while (curr != NULL) + { + fprintf(outrel,"\t\n"); + fprintf(outrel,"\t\t"); + fprintf(outrel,"\n\t\t\t\t"); + fprintf(outrel,"\n\t\t\t\t%d",curr->val); + fprintf(outrel,"\n\t\t\n"); + fprintf(outrel,"\t\n"); + curr = curr->next; + } + } + else if ((!strcmp(uri,"Prop")) || (!strcmp(uri,"Type")) || + (!strcmp(uri,"Set"))) + { + /* scan the list */ + while (curr != NULL) + { + fprintf(outsort,"\t\n"); + fprintf(outsort,"\t\t"); + fprintf(outsort,"\n\t\t\t\t"); + fprintf(outsort,"\n\t\t\t\t",uri); + fprintf(outsort,"\n\t\t\t\t%d",curr->val); + fprintf(outsort,"\n\t\t\n"); + fprintf(outsort,"\t\n"); + curr = curr->next; + } + } + else + { + /* scan the list */ + while (curr != NULL) + { + fprintf(out,"\t\t"); + fprintf(out,"\n\t\t\t\t"); + fprintf(out,"\n\t\t\t\t%d",curr->val); + fprintf(out,"\n\t\t\t\t",uri); + fprintf(out,"\n\t\t\n"); + curr = curr->next; + } + } +} + +print_mainconcl(about,out,outrel,outsort,uri,depth) + char *about; + FILE *out, + *outrel, + *outsort; + char *uri; + int depth; + +{ + if (!strcmp(uri,"Rel")) + { + fprintf(outrel,"\t\n"); + fprintf(outrel,"\t\t"); + fprintf(outrel,"\n\t\t\t\t"); + fprintf(outrel,"\n\t\t\t\t%d",depth); + fprintf(outrel,"\n\t\t\n"); + fprintf(outrel,"\t\n"); + } + else if ((!strcmp(uri,"Prop")) || (!strcmp(uri,"Type")) || + (!strcmp(uri,"Set"))) + { + fprintf(outsort,"\t\n"); + fprintf(outsort,"\t\t"); + fprintf(outsort,"\n\t\t\t\t"); + fprintf(outsort,"\n\t\t\t\t",uri); + fprintf(outsort,"\n\t\t\t\t%d",depth); + fprintf(outsort,"\n\t\t\n"); + fprintf(outsort,"\t\n"); + } + else + { + fprintf(out,"\t\t"); + fprintf(out,"\n\t\t\t\t"); + fprintf(out,"\n\t\t\t\t%d",depth); + fprintf(out,"\n\t\t\t\t",uri); + fprintf(out,"\n\t\t\n"); + } +} + +// dome: cambiata per usare il modello con position +print_one(out,uri,pos) + FILE *out; + char *uri; + int pos; +{ + fprintf(out,"\t\t"); + fprintf(out,"\n\t\t\t\t\n\t\t\t\t\n\t\t\n", uri); + +} + + /* The following function allocates a bucket for an identifier. */ +allocate_bucket(st, id, where) + struct st_bucket + **st; + /* pointer to the bucket to be */ + /* allocated */ + char *id; + /* identifier */ + int where; +{ + int i; + + *st = (struct st_bucket *)malloc(sizeof(struct st_bucket)); + (*st)->id = (char *)malloc(sizeof('a')*(strlen(id) + 1)); + strcpy((*st)->id,id); + (*st)->main_depth = 0; + (*st)->depths = NULL; + (*st)->next_st_bucket = NULL; + (*st)->all_next = all; + all = *st; + for (i = 0; i < 5; ++i) + (*st)->pos[i] = 0; + (*st)->pos[where] = 1; +} + + /* The following function moves a bucket to the head of the */ + /* list in which it lies. */ +move_bucket(st, dict_index) + struct st_bucket + *st; + /* pointer to the bucket to */ + /* be moved */ + int dict_index; + /* index corresponding to */ + /* the list in which the */ + /* bucket lies */ +{ + st->next_st_bucket = dictionary[dict_index]; + dictionary[dict_index] = st; +} + + /* The following function implements Weinberger's hash function. */ +int +hash_pjw(id) + char *id; + /* identifier to be hashed */ +{ + unsigned h, + g; + + for (h = 0; *id != EOS; id++) + { + h = (h << HASH1) + (*id); + if (g = h & HASH2) + h = h ^ (g >> HASH3) ^ g; + } + return(h % DICTSIZE); +} + + + + + + diff --git a/helm/metadata/extractor/sthandler_ind.h b/helm/metadata/extractor/sthandler_ind.h new file mode 100644 index 000000000..2536b0f60 --- /dev/null +++ b/helm/metadata/extractor/sthandler_ind.h @@ -0,0 +1,8 @@ +/****************************************************************/ +/* STHANDLER.H */ +/****************************************************************/ + + +extern void init_symbol_table(); +extern void print_all(char *, FILE *, FILE *, FILE *); +extern int search_bucket(char *, int, int); -- 2.39.2