From: Andrea Asperti Date: Wed, 1 Oct 2003 08:39:17 +0000 (+0000) Subject: * new version of metadata extraction X-Git-Tag: V_0_4_3_4~6 X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=commitdiff_plain;h=cdb45d73b98f6abaf670229b48a8442e1db902fc;p=helm.git * new version of metadata extraction * metadata are inserted directly without creating the RDF document --- diff --git a/helm/metadata/create_V7_mowgli/METADATA/Makefile b/helm/metadata/create_V7_mowgli/METADATA/Makefile index cb7d5d714..27b5fe5e6 100644 --- a/helm/metadata/create_V7_mowgli/METADATA/Makefile +++ b/helm/metadata/create_V7_mowgli/METADATA/Makefile @@ -3,25 +3,23 @@ CC = gcc all: meta meta_ind meta: lex.yy.o sthandler.o - gcc lex.yy.o sthandler.o -o meta + gcc lex.yy.o sthandler.o -lpq -o meta -meta_ind: lex.yy_ind.o sthandler_ind.o - gcc lex.yy_ind.o sthandler_ind.o -o meta_ind +meta_ind: lex.yy_ind.o sthandler.o + gcc lex.yy_ind.o sthandler.o -lpq -o meta_ind lex.yy.c: meta_lex.l sthandler.h flex meta_lex.l -lex.yy_ind.c: meta_lex_ind.l sthandler_ind.h +lex.yy_ind.c: meta_lex_ind.l sthandler.h flex -olex.yy_ind.c meta_lex_ind.l sthandler.o: sthandler.c sthandler.h -sthandler.o: sthandler_ind.c sthandler_ind.h - lex.yy.o: lex.yy.c sthandler.h gcc -c lex.yy.c -lex.yy_ind.o: lex.yy_ind.c sthandler_ind.h +lex.yy_ind.o: lex.yy_ind.c sthandler.h gcc -c lex.yy_ind.c clean: diff --git a/helm/metadata/create_V7_mowgli/METADATA/meta_lex.l b/helm/metadata/create_V7_mowgli/METADATA/meta_lex.l index eca7c383d..2f8c4b039 100644 --- a/helm/metadata/create_V7_mowgli/METADATA/meta_lex.l +++ b/helm/metadata/create_V7_mowgli/METADATA/meta_lex.l @@ -39,6 +39,7 @@ #include #include #include +#include #include "sthandler.h" %} @@ -70,7 +71,7 @@ int found = NOTFOUND; int position = INBODY; int first_child = HERE; int skip = 0; // boolean to skip the insertion of a URI -int no_open_source =0; +int no_open_source = 0; int spine_depth = 0; int depth = 0; int tmp_n; @@ -101,6 +102,10 @@ value [^"]+ ""(" "|\n)*"" | -"" { if (position == INHYP) { no_open_source--; @@ -129,8 +134,9 @@ value [^"]+ position = INTYPE; spine_depth++; depth = 0; + first_child = HERE; } - first_child = HERE; + /* bug? first_child = HERE; */ } @@ -264,54 +270,66 @@ value [^"]+ main(int argc, char *argv[]) { - struct stat buf; - FILE *outrel, *outsort; - - init_symbol_table(); - if (!(outrel = fopen("forward_rel.xml","a"))) - { - fprintf(stderr, "error in openinf file forward_rel.xml\n"); - exit(-1); - } - if (!(outsort = fopen("forward_sort.xml","a"))) - { - fprintf(stderr, "error in openinf file forward_rel.xml\n"); - exit(-1); - } - // We process the body - if (!stat("tmp/body.xml",&buf)) { - yyin = fopen("tmp/body.xml", "r"); - position = INBODY; - yylex(); - fclose(yyin); - } + struct stat buf; + + char *pghost, + *pgport, + *pgoptions, + *pgtty; + char *dbName; + + /* FILE *debug; */ + + PGconn *conn; + PGresult *res; + + /* + * begin, by setting the parameters for a backend connection if the + * parameters are null, then the system will try to use reasonable + * defaults by looking up environment variables or, failing that, + * using hardwired constants + */ + + /* make a connection to the database */ + conn = PQconnectdb("user=helm dbname=mowgli"); + + /* + * check to see that the backend connection was successfully made + */ + if (PQstatus(conn) == CONNECTION_BAD) + { + fprintf(stderr, "Connection to database '%s' failed.\n", dbName); + fprintf(stderr, "%s", PQerrorMessage(conn)); + exit_nicely(conn); + } + + /* debug = fopen("/tmp/trace.out","w"); */ + /* PQtrace(conn, debug); */ + + /* initialize the symbol table */ + init_symbol_table(); + + // We process the body + if (!stat("tmp/body.xml",&buf)) + { + yyin = fopen("tmp/body.xml", "r"); + position = INBODY; + yylex(); + fclose(yyin); + } + + // We process the type + yyin = fopen("tmp/type.xml", "r"); + position = INTYPE; + first_child = HERE; + no_open_source = 0; + spine_depth = 0; + depth = 0; + yylex(); + fclose(yyin); + print_all(argv[1],conn); +} - // We process the type - yyin = fopen("tmp/type.xml", "r"); - position = INTYPE; - first_child = HERE; - no_open_source = 0; - spine_depth = 0; - depth = 0; - yylex(); - - printf("\n\n"); -printf(" - - - - ]>\n"); - - printf("\n"); - printf("\t\n"); - print_all(argv[1],outrel,outsort); - printf("\t\n"); - printf("\n"); - fclose(yyin); - } search(uri,first_child,position,depth) char *uri; @@ -335,8 +353,9 @@ int position; else found = search_bucket(uri,position,depth); - /* if (found == NOTFOUND) - fprintf(stderr,"pos = %d, uri = %s\n", position, uri); */ + /* + if (found == NOTFOUND) + fprintf(stderr,"here = %d, pos = %d, uri = %s\n", first_child,position, uri); */ } /* (first_child == HERE) @@ -361,3 +380,6 @@ int yywrap() { return 1; } + + + diff --git a/helm/metadata/create_V7_mowgli/METADATA/meta_lex_ind.l b/helm/metadata/create_V7_mowgli/METADATA/meta_lex_ind.l index acf862446..ef0abac72 100644 --- a/helm/metadata/create_V7_mowgli/METADATA/meta_lex_ind.l +++ b/helm/metadata/create_V7_mowgli/METADATA/meta_lex_ind.l @@ -39,7 +39,8 @@ #include #include #include -#include "sthandler_ind.h" +#include +#include "sthandler.h" %} /***************************************************************/ @@ -81,11 +82,9 @@ char sep = '"'; char *xpointer = "#xpointer(1/"; char *uri; char *tmp; -char *filename; -char *file_uri; -char *inductive_uri; -char *filename_prefix; -char *file_uri_prefix; +char *source_uri; +char *source_uri_prefix; +PGconn *conn; %} /***************************************************************/ @@ -117,17 +116,13 @@ value [^"]+ } "" { tmp = (char *)malloc(sizeof('a')*128); - strcpy(filename,filename_prefix); - /* fprintf(stderr,"tre"); */ - strcpy(file_uri,file_uri_prefix); - sprintf(tmp,",%d.xml", inductive_type); - /* fprintf(stderr,"quattro"); */ - strcat(filename,tmp); + strcpy(source_uri,source_uri_prefix); sprintf(tmp,"#xpointer(1/%d)", inductive_type); - strcat(file_uri,tmp); + strcat(source_uri,tmp); /* fprintf(stderr,"cinque"); */ free(tmp); - print_file(); + print_all(source_uri,conn); + /* print_file(); */ } "" { tmp = (char *)malloc(sizeof('a')*128); - strcpy(filename,filename_prefix); - strcpy(file_uri,file_uri_prefix); - strcpy(inductive_uri,file_uri_prefix); - sprintf(tmp,",%d,%d.xml", inductive_type,constructor); - strcat(filename,tmp); + strcpy(source_uri,source_uri_prefix); sprintf(tmp,"#xpointer(1/%d/%d)",inductive_type,constructor); - strcat(file_uri,tmp); + strcat(source_uri,tmp); free(tmp); - print_file(); + print_all(source_uri,conn); + /* print_file(); */ } "" | -"" { if (position == INHYP) { no_open_source--; @@ -178,8 +169,8 @@ value [^"]+ position = INTYPE; spine_depth++; depth = 0; + first_child = HERE; } - first_child = HERE; } @@ -313,55 +304,46 @@ value [^"]+ main(int argc, char *argv[]) { - filename = malloc((sizeof('a')*2000)); - file_uri = malloc((sizeof('a')*2000)); - inductive_uri = malloc((sizeof('a')*2000)); - filename_prefix=argv[1]; - file_uri_prefix=argv[2]; - /* fprintf(stderr,"qua"); */ - yyin = fopen("tmp/inductive_type.xml", "r"); - yylex(); -} - -print_file() -{ - FILE *out, *outrel, *outsort; - - if (!(out = fopen(filename,"w"))) - { - fprintf(stderr, "error in openinf file %s\n", filename); - exit(-1); - } - if (!(outrel = fopen("forward_rel.xml","a"))) - { - fprintf(stderr, "error in openinf file forward_rel.xml\n"); - exit(-1); - } - if (!(outsort = fopen("forward_sort.xml","a"))) - { - fprintf(stderr, "error in openinf file forward_rel.xml\n"); - exit(-1); - } - - // We process the type - - fprintf(out,"\n\n"); -fprintf(out," - - - - ]>\n"); - fprintf(out,"\n"); - fprintf(out,"\t\n"); - print_all(file_uri,out,outrel,outsort); - fprintf(out,"\t\n"); - fprintf(out,"\n"); - fclose(out); - fclose(outrel); - fclose(outsort); + struct stat buf; + + char *pghost, + *pgport, + *pgoptions, + *pgtty; + char *dbName; + + /* FILE *debug; */ + + PGresult *res; + + /* + * begin, by setting the parameters for a backend connection if the + * parameters are null, then the system will try to use reasonable + * defaults by looking up environment variables or, failing that, + * using hardwired constants + */ + + /* make a connection to the database */ + conn = PQconnectdb("user=helm dbname=mowgli"); + + /* + * check to see that the backend connection was successfully made + */ + if (PQstatus(conn) == CONNECTION_BAD) + { + fprintf(stderr, "Connection to database '%s' failed.\n", dbName); + fprintf(stderr, "%s", PQerrorMessage(conn)); + exit_nicely(conn); + } + + /* debug = fopen("/tmp/trace.out","w"); */ + /* PQtrace(conn, debug); */ + + source_uri = malloc((sizeof('a')*2000)); + source_uri_prefix=argv[1]; + /* fprintf(stderr,"qua"); */ + yyin = fopen("tmp/inductive_type.xml", "r"); + yylex(); } search(uri,first_child,position,depth) @@ -386,8 +368,8 @@ int position; else found = search_bucket(uri,position,depth); - /* if (found == NOTFOUND) - printf( "pos = %d, uri = %s\n", position, uri); */ + if (found == NOTFOUND) + printf( "pos = %d, uri = %s\n", position, uri); } diff --git a/helm/metadata/create_V7_mowgli/METADATA/sthandler.c b/helm/metadata/create_V7_mowgli/METADATA/sthandler.c index 0f32e07c1..94b87c0c4 100644 --- a/helm/metadata/create_V7_mowgli/METADATA/sthandler.c +++ b/helm/metadata/create_V7_mowgli/METADATA/sthandler.c @@ -42,6 +42,7 @@ /****************************************************************/ #include +#include #include /****************************************************************/ @@ -173,17 +174,15 @@ search_bucket(id, where, depth) /* the identifier is not in the first position */ { prev->next_st_bucket = curr->next_st_bucket; - move_bucket(curr, - dict_index); + move_bucket(curr,dict_index); }; return where; } } -print_all(about,outrel,outsort) +print_all(about,conn) char *about; - FILE *outrel, - *outsort; + PGconn *conn; { int i; @@ -195,11 +194,11 @@ print_all(about,outrel,outsort) if ((curr->pos[i]) == 1) { if (i == MAINHYP) - print_mainhyp(about,outrel,outsort,curr->id,curr->depths); + print_mainhyp(about,conn,curr->id,curr->depths); else if (i == MAINCONCL) - print_mainconcl(about,outrel,outsort,curr->id,curr->main_depth); + print_mainconcl(about,conn,curr->id,curr->main_depth); else - print_one(curr->id,i); + print_one(conn,about,curr->id,i); } curr = curr->all_next; } @@ -210,6 +209,12 @@ print_all(about,outrel,outsort) /* 5. Definitions of functions local to the module. */ /****************************************************************/ +void exit_nicely(PGconn *conn) +{ + PQfinish(conn); + exit(1); +} + struct int_list *add(l,m) struct int_list *l; int m; @@ -232,13 +237,14 @@ struct int_list *add(l,m) } -print_mainhyp(about,outrel,outsort,uri,l) +print_mainhyp(about,conn,uri,l) char *about; - FILE *outrel, - *outsort; + PGconn *conn; char *uri; struct int_list *l; { + PGresult *res; + char *command = (char *)malloc((sizeof('a')*200)); struct int_list *curr; curr = l; if (!strcmp(uri,"Rel")) @@ -246,14 +252,18 @@ print_mainhyp(about,outrel,outsort,uri,l) /* scan the list */ while (curr != NULL) { - fprintf(outrel,"\t\n"); - fprintf(outrel,"\t\t"); - fprintf(outrel,"\n\t\t\t\t"); - fprintf(outrel,"\n\t\t\t\t%d",curr->val); - fprintf(outrel,"\n\t\t\n"); - fprintf(outrel,"\t\n"); + size_t len = strlen(about) + 1; + char *qabout = malloc (sizeof(char) * len * 2); + PQescapeString(qabout,about,len); + sprintf(command,"INSERT INTO refRel values ('%s', 'http://www.cs.unibo.it/helm/schemas/schema-helm#MainHypothesis', %d)",qabout,curr->val); + /* fprintf(stderr, "%s\n", command); */ + res = PQexec(conn, command); + if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) + { + fprintf(stderr, "BEGIN command failed\n"); + PQclear(res); + exit_nicely(conn); + } curr = curr->next; } } @@ -263,15 +273,18 @@ print_mainhyp(about,outrel,outsort,uri,l) /* scan the list */ while (curr != NULL) { - fprintf(outsort,"\t\n"); - fprintf(outsort,"\t\t"); - fprintf(outsort,"\n\t\t\t\t"); - fprintf(outsort,"\n\t\t\t\t",uri); - fprintf(outsort,"\n\t\t\t\t%d",curr->val); - fprintf(outsort,"\n\t\t\n"); - fprintf(outsort,"\t\n"); + size_t len = strlen(about) + 1; + char *qabout = malloc (sizeof(char) * len * 2); + PQescapeString(qabout,about,len); + sprintf(command,"INSERT INTO refSort values ('%s', 'http://www.cs.unibo.it/helm/schemas/schema-helm#MainHypothesis', %d, '%s')",qabout,curr->val,uri); + /* fprintf(stderr, "%s\n", command); */ + res = PQexec(conn, command); + if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) + { + fprintf(stderr, "BEGIN command failed\n"); + PQclear(res); + exit_nicely(conn); + } curr = curr->next; } } @@ -280,76 +293,114 @@ print_mainhyp(about,outrel,outsort,uri,l) /* scan the list */ while (curr != NULL) { - printf("\t\t"); - printf("\n\t\t\t\t"); - printf("\n\t\t\t\t%d",curr->val); - printf("\n\t\t\t\t",uri); - printf("\n\t\t\n"); + size_t len = strlen(about) + 1; + char *qabout = malloc (sizeof(char) * len * 2); + char *quri; + PQescapeString(qabout,about,len); + len = strlen(uri) + 1; + quri = malloc (sizeof(char) * len * 2); + PQescapeString(quri,uri,len); + sprintf(command,"INSERT INTO refObj values ('%s', '%s', 'http://www.cs.unibo.it/helm/schemas/schema-helm#MainHypothesis', %d)",qabout,quri,curr->val); + /* fprintf(stderr, "%s\n", command); */ + res = PQexec(conn, command); + if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) + { + fprintf(stderr, "BEGIN command failed\n"); + PQclear(res); + exit_nicely(conn); + } curr = curr->next; } } } -print_mainconcl(about,outrel,outsort,uri,depth) +print_mainconcl(about,conn,uri,depth) char *about; - FILE *outrel, - *outsort; + PGconn *conn; char *uri; int depth; { + PGresult *res; + char *command = (char *)malloc((sizeof('a')*200)); + /* fprintf(stderr,"about = %s\n",about); */ if (!strcmp(uri,"Rel")) { - fprintf(outrel,"\t\n"); - fprintf(outrel,"\t\t"); - fprintf(outrel,"\n\t\t\t\t"); - fprintf(outrel,"\n\t\t\t\t%d",depth); - fprintf(outrel,"\n\t\t\n"); - fprintf(outrel,"\t\n"); + size_t len = strlen(about) + 1; + char *qabout = malloc (sizeof(char) * len * 2); + PQescapeString(qabout,about,len); + sprintf(command,"INSERT INTO refRel values ('%s', 'http://www.cs.unibo.it/helm/schemas/schema-helm#MainConclusion', %d)",qabout,depth); + /* fprintf(stderr, "%s\n", command); */ + res = PQexec(conn, command); } else if ((!strcmp(uri,"Prop")) || (!strcmp(uri,"Type")) || (!strcmp(uri,"Set"))) { - fprintf(outsort,"\t\n"); - fprintf(outsort,"\t\t"); - fprintf(outsort,"\n\t\t\t\t"); - fprintf(outsort,"\n\t\t\t\t",uri); - fprintf(outsort,"\n\t\t\t\t%d",depth); - fprintf(outsort,"\n\t\t\n"); - fprintf(outsort,"\t\n"); + size_t len = strlen(about) + 1; + char *qabout = malloc (sizeof(char) * len * 2); + PQescapeString(qabout,about,len); + sprintf(command,"INSERT INTO refSort values ('%s', 'http://www.cs.unibo.it/helm/schemas/schema-helm#MainConclusion', %d, '%s')",qabout,depth,uri); + /* fprintf(stderr, "%s\n", command); */ + res = PQexec(conn, command); } else { - printf("\t\t"); - printf("\n\t\t\t\t"); - printf("\n\t\t\t\t%d",depth); - printf("\n\t\t\t\t",uri); - printf("\n\t\t\n"); + size_t len = strlen(about) + 1; + char *qabout = malloc (sizeof(char) * len * 2); + char *quri; + PQescapeString(qabout,about,len); + len = strlen(uri) + 1; + quri = malloc (sizeof(char) * len * 2); + PQescapeString(quri,uri,len); + sprintf(command,"INSERT INTO refObj values ('%s', '%s','http://www.cs.unibo.it/helm/schemas/schema-helm#MainConclusion', %d)",qabout,quri,depth); + /* fprintf(stderr, "%s\n", command); */ + res = PQexec(conn, command); + } + if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) + { + fprintf(stderr, "BEGIN command failed\n"); + PQclear(res); + exit_nicely(conn); } + /* fprintf(stderr,"FINITO\n"); */ } // dome: cambiata per usare il modello con position -print_one(uri,pos) - char *uri; +print_one(conn,about,uri,pos) + PGconn *conn; + char *about, + *uri; int pos; { - printf("\t\t"); - printf("\n\t\t\t\t\n\t\t\t\t\n\t\t\n", uri); + position="MainConclusion"; /* This should never happen */ + sprintf(command,"INSERT INTO refObj values ('%s', '%s', 'http://www.cs.unibo.it/helm/schemas/schema-helm#%s')",qabout,quri,position); + /* fprintf(stderr, "%s\n", command); */ + res = PQexec(conn, command); + if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) + { + fprintf(stderr, "BEGIN command failed\n"); + PQclear(res); + exit_nicely(conn); + } } /* The following function allocates a bucket for an identifier. */ diff --git a/helm/metadata/create_V7_mowgli/METADATA/sthandler.h b/helm/metadata/create_V7_mowgli/METADATA/sthandler.h index 5a9cd7143..e8ee4849f 100644 --- a/helm/metadata/create_V7_mowgli/METADATA/sthandler.h +++ b/helm/metadata/create_V7_mowgli/METADATA/sthandler.h @@ -4,5 +4,5 @@ extern void init_symbol_table(); -extern void print_all(char *, FILE *, FILE *); +extern void print_all(char *, PGconn *); extern int search_bucket(char *, int, int); diff --git a/helm/metadata/create_V7_mowgli/Makefile b/helm/metadata/create_V7_mowgli/Makefile index 1397397ee..4f871e46a 100644 --- a/helm/metadata/create_V7_mowgli/Makefile +++ b/helm/metadata/create_V7_mowgli/Makefile @@ -3,11 +3,8 @@ all: @echo " forward, backward, compress, clean-forward, clean-backward" forward: - time for i in `cat costanti_e_variabili.txt` ; do (cd tmp && wget -t 1 -O downloaded.xml.gz "http://mowgli.cs.unibo.it:58081/getxml?format=gz&uri=$$i" && wget -t 1 -O downloaded_body.xml.gz "http://mowgli.cs.unibo.it:58081/getxml?format=gz&uri=$$i.body") ; mkdir -p forward/`dirname $$i | sed "s/cic:\///"` && zcat tmp/downloaded.xml.gz > tmp/type.xml && zcat tmp/downloaded_body.xml.gz > tmp/body.xml ; METADATA/meta $$i > forward/`echo $$i | sed "s/cic:\///"`.xml && rm -f tmp/downloaded.xml.gz tmp/downloaded_body.xml.gz tmp/type.xml tmp/body.xml ; done > log 2>&1 - time for i in `cat tipi_induttivi.txt` ; do (cd tmp && wget -t 1 -O downloaded.xml.gz "http://mowgli.cs.unibo.it:58081/getxml?format=gz&uri=$$i") && mkdir -p forward/`dirname $$i | sed "s/cic:\///"` && zcat tmp/downloaded.xml.gz > tmp/inductive_type.xml && METADATA/meta_ind forward/`echo $$i | sed "s/cic:\///"` $$i && rm tmp/downloaded.xml.gz tmp/inductive_type.xml ; done >> log 2>&1 - (cd forward ; ../mkindex.sh forward) - cat forward_rel_or_sort_header.txt forward_rel.xml forward_rel_or_sort_footer.txt > forward_rel.xml.tmp && mv forward_rel.xml.tmp forward_rel.xml - cat forward_rel_or_sort_header.txt forward_sort.xml forward_rel_or_sort_footer.txt > forward_sort.xml.tmp && mv forward_sort.xml.tmp forward_sort.xml + time for i in `cat tipi_induttivi.txt` ; do (cd tmp ; wget -t 1 -O downloaded.xml.gz "http://mowgli.cs.unibo.it:58081/getxml?format=gz&uri=$$i") ; zcat tmp/downloaded.xml.gz > tmp/inductive_type.xml ; METADATA/meta_ind $$i ; rm -f tmp/downloaded.xml.gz tmp/inductive_type.xml; done > log 2>&1 + time for i in `cat costanti_e_variabili.txt` ; do (cd tmp ; wget -t 1 -O downloaded.xml.gz "http://mowgli.cs.unibo.it:58081/getxml?format=gz&uri=$$i" ; wget -t 1 -O downloaded_body.xml.gz "http://mowgli.cs.unibo.it:58081/getxml?format=gz&uri=$$i.body"); zcat tmp/downloaded.xml.gz > tmp/type.xml ; zcat tmp/downloaded_body.xml.gz > tmp/body.xml ; METADATA/meta $$i ; rm -f tmp/downloaded.xml.gz tmp/downloaded_body.xml.gz tmp/type.xml tmp/body.xml ; done > log 2>&1 backward: time for i in `cat all_objects.txt` ; do touch/touch.opt $$i ; done