--- /dev/null
+CC = gcc
+
+meta: lex.yy.o sthandler.o
+ gcc lex.yy.o sthandler.o -o meta
+
+lex.yy.c: meta_lex.l sthandler.h
+ flex meta_lex.l
+
+sthandler.o: sthandler.c sthandler.h
+
+lex.yy.o: lex.yy.c sthandler.h
+ gcc -c lex.yy.c
+
+clean:
+ -rm *.o
+ -rm lex.yy.c
+ -rm meta
--- /dev/null
+ /******************************************************************/
+ /* Copyright (C) 2000, HELM Team */
+ /* */
+ /* This file is part of HELM, an Hypertextual, Electronic */
+ /* Library of Mathematics, developed at the Computer Science */
+ /* Department, University of Bologna, Italy. */
+ /* */
+ /* HELM is free software; you can redistribute it and/or */
+ /* modify it under the terms of the GNU General Public License */
+ /* as published by the Free Software Foundation; either version */
+ /* 2 of the License, or (at your option) any later version. */
+ /* */
+ /* HELM is distributed in the hope that it will be useful, */
+ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
+ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
+ /* GNU General Public License for more details. */
+ /* */
+ /* You should have received a copy of the GNU General Public */
+ /* License along with HELM; if not, write to the Free Software */
+ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, */
+ /* MA 02111-1307, USA. */
+ /* */
+ /* For details, see the HELM World-Wide-Web page, */
+ /* http://cs.unibo.it/helm/. */
+ /******************************************************************/
+
+ /***************************************************************/
+ /* META_LEXAN */
+ /* Automatic Metadata Extractor */
+ /* First draft 11/12/2001, by Andrea Asperti */
+ /***************************************************************/
+
+ /***************************************************************/
+ /* 1. Inclusion of header files. */
+ /***************************************************************/
+
+%{
+#include <string.h>
+#include <stdlib.h>
+#include "sthandler.h"
+%}
+
+ /***************************************************************/
+ /* 2. Constants and Variables Definitions */
+ /***************************************************************/
+
+%{
+#define NOWHERE 0
+#define CONST 1
+#define MUTIND 2
+#define MUTCONSTRUCT 3
+
+#define INBODY 0
+#define MAINHYP 1
+#define INHYP 2
+#define INCONCL 3
+#define MAINCONCL 4
+#define INTYPE 5
+#define NOTFOUND 6
+
+#define BEFORE 0
+#define HERE 1
+#define AFTER 2
+
+
+int where = NOWHERE;
+int found = NOTFOUND;
+int position = INBODY;
+int first_child = BEFORE;
+int no_open_source =0;
+int tmp_n;
+char sep = '"';
+char *xpointer = "#xpointer(1/";
+char *uri;
+char *tmp;
+%}
+
+ /***************************************************************/
+ /* 3. Regular definitions. */
+ /***************************************************************/
+
+uri [^"]+
+digits [0-9]+
+
+ /***************************************************************/
+ /* 4. Rules. */
+ /***************************************************************/
+
+
+
+%%
+
+"<type>" {
+ position = INTYPE;
+ first_child = BEFORE;
+ }
+
+"<source" {
+ if ((position == INTYPE) ||
+ (position == INHYP))
+ { position = INHYP;
+ no_open_source++;};
+ /* printf("source %d\n", no_open_source); */
+ }
+
+"</source>" {
+ if (position == INHYP)
+ {
+ no_open_source--;
+ /* printf("source %d\n", no_open_source); */
+ if (no_open_source == 0)
+ { position = INTYPE;
+ first_child = BEFORE; };
+ };
+ }
+
+
+"<body>" {
+ position = INBODY;
+ }
+
+.|\n {
+ }
+
+"<APPLY" {
+ if (first_child == BEFORE)
+ first_child = HERE;
+ else if (first_child == HERE)
+ first_child = AFTER;
+ }
+
+"<LAMBDA" |
+"<REL" |
+"<MUTCASE" |
+"<FIX" |
+"<COFIX" {
+ if ((first_child == BEFORE) || (first_child == HERE))
+ first_child = AFTER;
+ }
+
+"<CONST" {
+ if (position == INTYPE) /* CONST on the spine */
+ position = INCONCL;
+ if (first_child == BEFORE)
+ first_child = HERE;
+ where = CONST;
+ }
+
+"<MUTIND" {
+ if (position == INTYPE) /* MUTIND on the spine */
+ position = INCONCL;
+ if (first_child == BEFORE)
+ first_child = HERE;
+ where = MUTIND;
+ }
+
+"<MUTCONSTRUCT" {
+ if (position == INTYPE) /* MUTCONSTRUCT on the spine */
+ position = INCONCL;
+ if (first_child == BEFORE)
+ first_child = HERE;
+ where = MUTCONSTRUCT;
+ }
+
+"uri=\""{uri} {
+ uri=(char *)malloc((sizeof('a')*200));
+ strcpy(uri,yytext);
+ strsep(&uri,&sep);
+ if (where == CONST)
+ {
+ search(uri,first_child,position);
+ where = NOWHERE;
+ first_child = AFTER;
+ free(uri);
+ };
+ }
+
+"noType=\""{digits} {
+ if ((where == MUTIND) || (where == MUTCONSTRUCT))
+ { strsep(&yytext,&sep);
+ tmp=(char *)malloc((sizeof(sep)*(strlen(yytext)+1)));
+ strcpy(tmp,yytext);
+ tmp_n = atoi(tmp)+1;
+ sprintf(tmp,"%d",tmp_n);
+ strcat(uri,"#xpointer(1/");
+ strcat(uri,tmp);
+ };
+ if (where == MUTIND)
+ {
+ strcat(uri,")");
+ search(uri,first_child,position);
+ free(uri);
+ free(tmp);
+ where = NOWHERE;
+ first_child = AFTER;};
+ }
+
+"noConstr=\""{digits} {
+ if (where == MUTCONSTRUCT)
+ { strsep(&yytext,&sep);
+ tmp=(char *)malloc((sizeof(sep)*(strlen(yytext)+1)));
+ strcpy(tmp,yytext);
+ strcat(uri,"/");
+ strcat(uri,tmp);
+ strcat(uri,")");
+ search(uri,first_child,position);
+ free(uri);
+ free(tmp);
+ where = NOWHERE;
+ first_child = AFTER;};
+ }
+
+
+
+%%
+
+ /***************************************************************/
+ /* 6. Auxiliary functions. */
+ /***************************************************************/
+
+main(int argc, char *argv[])
+{
+ init_symbol_table();
+ yylex();
+ printf("<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n\n");
+ printf("<rdf:RDF xml:lang=\"en\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" xmlns:h=\"http:/www.cs.unibo.it/helm/schemas/schema-h.rdf#\">\n");
+ printf("<h:Object rdf:about=\"");
+ printf("%s",argv[1]);
+ printf("\">\n");
+ print_all();
+ printf("</h:Object>\n");
+ printf("</rdf:RDF>\n");
+ }
+
+search(uri,first_child,position)
+char *uri;
+int first_child;
+int position;
+{
+ if (first_child == HERE)
+ {
+ if (position == INHYP)
+ found = search_bucket(uri,MAINHYP);
+ else if (position == INCONCL)
+ found = search_bucket(uri,MAINCONCL);
+ /* if (found == NOTFOUND)
+ printf( "pos = %d, uri = %s\n", MAINCONCL, uri); */
+ }
+ else found = search_bucket(uri,position);
+ /* if (found == NOTFOUND)
+ printf( "pos = %d, uri = %s\n", position, uri); */
+ }
+
+int yywrap() {
+ return 1;
+ }
+
+
+
+
+
+
+
--- /dev/null
+/*********************************************************************/
+/* Copyright (C) 2000, HELM Team */
+/* */
+/* This file is part of HELM, an Hypertextual, Electronic */
+/* Library of Mathematics, developed at the Computer Science */
+/* Department, University of Bologna, Italy. */
+/* */
+/* HELM is free software; you can redistribute it and/or */
+/* modify it under the terms of the GNU General Public License */
+/* as published by the Free Software Foundation; either version 2 */
+/* of the License, or (at your option) any later version. */
+/* */
+/* HELM is distributed in the hope that it will be useful, */
+/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
+/* GNU General Public License for more details. */
+/* */
+/* You should have received a copy of the GNU General Public License */
+/* along with HELM; if not, write to the Free Software */
+/* Foundation, Inc., 59 Temple Place - Suite 330, Boston, */
+/* MA 02111-1307, USA. */
+/* */
+/* For details, see the HELM World-Wide-Web page, */
+/* http://cs.unibo.it/helm/. */
+ /*********************************************************************/
+
+/****************************************************************/
+/* STHANDLER.C */
+/****************************************************************/
+/* This module supplies routines for symbol table handling. */
+/* - init_symbol_table(): it initializes the symbol table */
+/* to void. */
+/* - search_bucket(): it searches the symbol table for the */
+/* bucket containing a given identifier, and */
+/* inserts it if it is not present; */
+/****************************************************************/
+/* First draft 11/12/2001, by Andrea Asperti */
+/****************************************************************/
+
+/****************************************************************/
+/* 1. Inclusion of header files. */
+/****************************************************************/
+
+#include <stdio.h>
+#include <malloc.h>
+
+/****************************************************************/
+/* 2. Declarations */
+/****************************************************************/
+
+
+#define DICTSIZE 211
+#define HASH1 4
+#define HASH2 0xf0000000
+#define HASH3 24
+#define EOS '\0'
+
+#define INBODY 0
+#define MAINHYP 1
+#define INHYP 2
+#define INCONCL 3
+#define MAINCONCL 4
+#define INTYPE 5
+#define NOTFOUND 6
+
+/****************************************************************/
+/* 3. Types. */
+/****************************************************************/
+
+struct st_bucket {
+ char *id;
+ /* identifier */
+ struct st_bucket *next_st_bucket;
+ /* next bucket in the list */
+ struct st_bucket *all_next;
+ /* all buckets in symbol
+ table are linked together */
+ int pos[5];
+
+ };
+
+struct st_bucket *dictionary[DICTSIZE];
+ /* pointers to bucket lists */
+
+/****************************************************************/
+/* 4. Definitions of functions to be exported. */
+/****************************************************************/
+
+struct st_bucket *all;
+
+ /* The following function initializes the symbol table to NULL */
+void init_symbol_table()
+{
+ struct st_bucket *st;
+ int i;
+
+ /* initialize the dictionary */
+ for (i = 0; i < DICTSIZE; i++)
+ dictionary[i] = NULL;
+ all = NULL;
+}
+
+ /* The following function searches the symbol table for an identifier */
+ /* and inserts it if it is not present.
+ /* The bucket associated with the given identifier */
+ /* becomes the first one in its list. */
+
+search_bucket(id, where)
+ char *id;
+ /* identifier */
+ int where;
+{
+ int dict_index;
+ /* value returned by the */
+ /* hash function */
+ struct st_bucket
+ *prev,
+ *curr;
+
+ struct st_bucket *st;
+
+ /* apply the hash function */
+ dict_index = hash_pjw(id);
+ /* printf( "%d\n", dict_index); */
+
+ /* scan the bucket list indicated by the hash function */
+ prev = curr = dictionary[dict_index];
+ while ((curr != NULL) && (strcmp(id, curr->id)))
+ {
+ prev = curr;
+ curr = curr->next_st_bucket;
+ }
+ if (curr == NULL)
+ /* the identifier is not in the list */
+ {
+ allocate_bucket(&st,id,where);
+ move_bucket(st,dict_index);
+ return NOTFOUND;
+ }
+ else
+ /* printf("uno=%s\n", id);
+ printf("st=%s\n", curr->id); */
+
+ /* the identifier is already in the list */
+ {
+ /* st = curr; */
+ curr->pos[where] = 1;
+ if (where >= 1)
+ curr->pos[0] = 0; /* it will never be set again to 1 */
+ if (prev != curr)
+ /* the identifier is not in the first position */
+ {
+ prev->next_st_bucket = curr->next_st_bucket;
+ move_bucket(curr,
+ dict_index);
+ };
+ return where;
+ }
+}
+
+print_all()
+{
+ int i;
+ struct st_bucket *curr;
+ curr = all;
+
+ while (curr != NULL)
+ {
+ for (i = 0; i < 5; ++i)
+ if (curr->pos[i] == 1)
+ print_one(curr->id,i);
+ curr = curr->all_next;
+ }
+}
+
+
+/****************************************************************/
+/* 5. Definitions of functions local to the module. */
+/****************************************************************/
+
+print_one(uri,pos)
+ char *uri;
+ int pos;
+{
+ printf("<h:refObj>\n");
+ printf("<h:Occurrence rdf:about=\"http://www.cs.unibo.it/helm/schemas/schema-h.rdf#");
+ if (pos == INBODY)
+ printf("InBody");
+ else if (pos == MAINHYP)
+ printf("MainHypothesis");
+ else if (pos == INHYP)
+ printf("InHypothesis");
+ else if (pos == INCONCL)
+ printf("InConclusion");
+ else if (pos == MAINCONCL)
+ printf("MainConclusion");
+ printf("\" rdf:value=\"");
+ printf("%s", uri);
+ printf("\"/>\n");
+ printf("</h:refObj>\n");
+}
+
+ /* The following function allocates a bucket for an identifier. */
+allocate_bucket(st, id, where)
+ struct st_bucket
+ **st;
+ /* pointer to the bucket to be */
+ /* allocated */
+ char *id;
+ /* identifier */
+ int where;
+{
+ int i;
+
+ *st = (struct st_bucket *)malloc(sizeof(struct st_bucket));
+ (*st)->id = (char *)malloc(sizeof('a')*strlen(id));
+ strcpy((*st)->id,id);
+ (*st)->next_st_bucket = NULL;
+ (*st)->all_next = all;
+ all = *st;
+ for (i = 0; i < 5; ++i)
+ (*st)->pos[i] = 0;
+ (*st)->pos[where] = 1;
+}
+
+ /* The following function moves a bucket to the head of the */
+ /* list in which it lies. */
+move_bucket(st, dict_index)
+ struct st_bucket
+ *st;
+ /* pointer to the bucket to */
+ /* be moved */
+ int dict_index;
+ /* index corresponding to */
+ /* the list in which the */
+ /* bucket lies */
+{
+ st->next_st_bucket = dictionary[dict_index];
+ dictionary[dict_index] = st;
+}
+
+ /* The following function implements Weinberger's hash function. */
+int
+hash_pjw(id)
+ char *id;
+ /* identifier to be hashed */
+{
+ unsigned h,
+ g;
+
+ for (h = 0; *id != EOS; id++)
+ {
+ h = (h << HASH1) + (*id);
+ if (g = h & HASH2)
+ h = h ^ (g >> HASH3) ^ g;
+ }
+ return(h % DICTSIZE);
+}
+
+
+
+
+
--- /dev/null
+/****************************************************************/
+/* STHANDLER.H */
+/****************************************************************/
+
+
+extern void init_symbol_table();
+extern void print_all();
+extern int search_bucket();
--- /dev/null
+all:
+ @echo Available targets:
+ @echo " forward, backward, compress, clean-forward, clean-backward"
+
+forward:
+ time for i in `cat pluto` ; do (cd tmp ; wget -t 1 "http://phd.cs.unibo.it:8081/getxml?format=gz&uri=$$i") ; mkdir -p forward/`dirname $$i | sed "s/cic:\///"` ; zcat tmp/`basename $$i` | METADATA/meta `basename $$i` > forward/`echo $$i | sed "s/cic:\///"` ; rm tmp/`basename $$i` ; done > log 2>&1
+ (cd forward ; ../mkindex.sh forward)
+
+backward:
+ time for i in `cat pluto` ; do touch/touch.opt $$i ; done
+ find forward -type f -exec ./invert.pl {} \;
+ find backward -type f -exec ./fix_rdf.pl {} \;
+ (cd backward ; ../mkindex.sh backward)
+
+compress:
+ find forward -name "*.xml" -exec gzip {} \;
+ find backward -name "*.xml" -exec gzip {} \;
+ (cd forward ; ../mkindex.sh forward)
+ (cd backward ; ../mkindex.sh backward)
+
+clean-forward:
+ rm -rf forward/*
+
+clean-backward:
+ rm -rf backward/*
+
+.PHONY: all forward backward compress clean-forward clean-backward