]> matita.cs.unibo.it Git - helm.git/blob - helm/metadata/extractor/sthandler.c
The metadata extractor now generates also
[helm.git] / helm / metadata / extractor / sthandler.c
1 /*********************************************************************/
2 /*  Copyright (C) 2000, HELM Team                                    */ 
3 /*                                                                   */
4 /* This file is part of HELM, an Hypertextual, Electronic            */
5 /* Library of Mathematics, developed at the Computer Science         */
6 /* Department, University of Bologna, Italy.                         */
7 /*                                                                   */
8 /* HELM is free software; you can redistribute it and/or             */
9 /* modify it under the terms of the GNU General Public License       */
10 /* as published by the Free Software Foundation; either version 2    */
11 /* of the License, or (at your option) any later version.            */
12 /*                                                                   */
13 /* HELM is distributed in the hope that it will be useful,           */
14 /* but WITHOUT ANY WARRANTY; without even the implied warranty of    */
15 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the     */
16 /* GNU General Public License for more details.                      */
17 /*                                                                   */
18 /* You should have received a copy of the GNU General Public License */
19 /* along with HELM; if not, write to the Free Software               */
20 /* Foundation, Inc., 59 Temple Place - Suite 330, Boston,            */
21 /* MA  02111-1307, USA.                                              */
22 /*                                                                   */
23 /* For details, see the HELM World-Wide-Web page,                    */
24 /* http://cs.unibo.it/helm/.                                         */
25  /*********************************************************************/
26
27 /****************************************************************/
28 /*                        STHANDLER.C                           */
29 /****************************************************************/
30 /* This module supplies routines for symbol table handling.     */
31 /* - init_symbol_table(): it initializes the symbol table       */
32 /*                        to void.                              */
33 /* - search_bucket(): it searches the symbol table for the      */
34 /*                    bucket containing a given identifier, and */
35 /*                    inserts it if it is not present;          */
36 /****************************************************************/
37 /*           First draft 11/12/2001, by Andrea Asperti          */
38 /****************************************************************/
39
40 /****************************************************************/
41 /* 1. Inclusion of header files.                                */
42 /****************************************************************/
43
44 #include                <stdio.h>
45 #include                <malloc.h>
46 #include                <string.h>
47
48 /****************************************************************/
49 /* 2. Declarations                                              */
50 /****************************************************************/
51
52
53 #define         DICTSIZE                        211
54 #define         HASH1                           4
55 #define         HASH2                           0xf0000000
56 #define         HASH3                           28
57 #define         EOS                             '\0'
58
59 #define                 INBODY    0
60 #define                 MAINHYP   1
61 #define                 INHYP     2
62 #define                 INCONCL   3
63 #define                 MAINCONCL 4
64 #define                 INTYPE    5
65 #define                 NOTFOUND  6
66
67 /****************************************************************/
68 /* 3. Types.                                                    */
69 /****************************************************************/
70
71 struct int_list {
72                 int                     val;
73                 struct int_list         *next;
74                   }; 
75
76 struct st_bucket {
77                 char                    *id;
78                                                 /* identifier */
79                 int                     main_depth;
80                 struct int_list         *depths;
81                 struct st_bucket        *next_st_bucket;
82                                                 /* next bucket in the list */
83                 struct st_bucket        *all_next;
84                                                /* all buckets in symbol
85                                                   table are linked together */
86                 int                     pos[5];
87
88                   };                              
89
90 struct st_bucket    *dictionary[DICTSIZE];
91                                /* pointers to bucket lists */
92
93 /****************************************************************/
94 /* 4. Local functions.                                          */
95 /****************************************************************/
96 struct int_list  *add(struct int_list  *,int);
97 void allocate_bucket(struct st_bucket **st, char *id, int where);
98 void print_mainhyp(char *about, char *uri,struct int_list *l);
99 void print_mainconcl(char *about, char *uri, int depth);
100 void move_bucket(struct st_bucket *st, int dict_index);
101 void print_one(char *about, char *uri, int pos);
102 int hash_pjw(char *id);
103
104 /* This function is copied from the file fe-exec.c of PostgreSQL. */
105 /* Copyright (c) 1996-2003, PostgreSQL Global Development Group   */
106 /* Copyright (c) 1994, Regents of the University of California    */
107 size_t
108 PQescapeString(char *to, const char *from, size_t length)
109 {
110   const char *source = from;
111   char     *target = to;
112   size_t    remaining = length;
113                                                                                 
114   while (remaining > 0 && *source != '\0')
115   {
116     switch (*source)
117     {
118       case '\\':
119         *target++ = '\\';
120         *target++ = '\\';
121         break;
122                                                                                 
123       case '\'':
124         *target++ = '\'';
125         *target++ = '\'';
126         break;
127                                                                                 
128       default:
129         *target++ = *source;
130         break;
131     }
132     source++;
133     remaining--;
134   }
135
136   /* Write the terminating NUL character. */
137   *target = '\0';
138                                                                                 
139   return target - to;
140 }
141
142
143 /****************************************************************/
144 /* 5. Definitions of functions to be exported.                  */
145 /****************************************************************/
146
147 struct st_bucket        *all;
148
149  /* The following function initializes the symbol table to NULL */
150 void init_symbol_table()
151 {
152         int                     i;
153
154         /* initialize the dictionary */
155         for (i = 0; i < DICTSIZE; i++)
156                 dictionary[i] = NULL;
157         all = NULL;
158 }
159
160  /* The following function searches the symbol table for an identifier */
161  /* and inserts it if it is not present. */
162  /* The bucket associated with the given identifier */
163  /* becomes the first one in its list. */
164
165 int search_bucket(id, where, depth)
166         char            *id;
167                                         /* identifier */
168         int             where;
169         int             depth;
170 {
171         int             dict_index;
172                                         /* value returned by the */
173                                         /* hash function */
174         struct st_bucket
175                         *prev,
176                         *curr;
177
178         struct st_bucket *st;
179
180         /* apply the hash function */
181         dict_index = hash_pjw(id); 
182         /* fprintf(stderr,"%d\n", dict_index); */
183         
184         /* scan the bucket list indicated by the hash function */
185         prev = curr = dictionary[dict_index];
186         while ((curr != NULL) && (strcmp(id, curr->id)))
187           {
188             prev = curr;
189             curr = curr->next_st_bucket;
190           }
191         if (curr == NULL)
192           /* the identifier is not in the list */
193           {
194             allocate_bucket(&st,id,where);
195             if (where == MAINCONCL)
196               st->main_depth = depth;
197             else if (where == MAINHYP)
198               st->depths = add(st->depths,depth);
199             move_bucket(st,dict_index);
200             return NOTFOUND;
201           }
202         else
203           /*
204              printf("uno=%s\n", id);
205              printf("st=%s\n", curr->id); fflush(stdout) */
206
207           /* the identifier is already in the list */
208           {
209             /* st = curr; */
210             curr->pos[where] = 1;
211             if (where >= 1) 
212               curr->pos[INBODY] = 0; /* it will never be set again to 1 */
213             if (where == MAINHYP)
214               curr->depths=add(curr->depths,depth); 
215             else if (where == MAINCONCL)
216               curr->main_depth = depth; 
217             if (prev != curr)
218               /* the identifier is not in the first position */
219               {
220                 prev->next_st_bucket = curr->next_st_bucket;
221                 move_bucket(curr,dict_index);
222               };
223             return where;
224           }
225 }
226
227 void print_all(about,conn)
228      char       *about;
229 {
230
231         int i;
232         struct st_bucket *curr;
233         curr = all;
234         while (curr != NULL)
235           {
236             for (i = 0; i < 5; ++i)
237               if ((curr->pos[i]) == 1)
238                 {
239                   if (i == MAINHYP)
240                     print_mainhyp(about,curr->id,curr->depths);
241                   else if (i == MAINCONCL)
242                     print_mainconcl(about,curr->id,curr->main_depth);
243                   else
244                     print_one(about,curr->id,i);
245                 }
246             curr = curr->all_next;
247           }
248 }
249
250 void print_name(char *name, char *uri)
251 {
252   printf("INSERT INTO objectName VALUES ('%s', '%s');\n",uri,name);
253 }
254
255 /****************************************************************/
256 /* 5. Definitions of functions local to the module.             */
257 /****************************************************************/
258
259 struct int_list  *add(l,m)
260      struct int_list    *l;
261      int                m;
262 {
263         struct int_list *curr;
264         /* scan the list looking for m */
265         curr = l;
266         while ((curr != NULL) && (m != (curr->val)))
267             curr = curr->next;
268         if (curr == NULL)
269           /* m is not in the list */
270           {
271             curr = (struct int_list *)malloc(sizeof(struct int_list));
272             curr->val = m;
273             curr->next = l;
274             return curr;
275           }
276         else
277         return l;
278        
279 }
280
281 void print_mainhyp(about,uri,l)
282      char                *about;
283      char                *uri;
284      struct int_list     *l;
285 {
286     struct int_list *curr;
287     curr = l;
288     if (!strcmp(uri,"Rel"))
289       {
290         /* scan the list */
291         while (curr != NULL)
292           {
293             size_t len = strlen(about) + 1;
294             char *qabout = malloc (sizeof(char) * len * 2);
295             PQescapeString(qabout,about,len);
296             printf("INSERT INTO refRel VALUES ('%s', 'http://www.cs.unibo.it/helm/schemas/schema-helm#MainHypothesis', %d);\n",qabout,curr->val);
297             curr = curr->next;
298           }
299       }
300    else if ((!strcmp(uri,"Prop")) || (!strcmp(uri,"Type")) ||
301             (!strcmp(uri,"Set")))
302       {
303         /* scan the list */
304         while (curr != NULL)
305           {
306             size_t len = strlen(about) + 1;
307             char *qabout = malloc (sizeof(char) * len * 2);
308             PQescapeString(qabout,about,len);
309             printf("INSERT INTO refSort VALUES ('%s', 'http://www.cs.unibo.it/helm/schemas/schema-helm#MainHypothesis', %d, '%s');\n",qabout,curr->val,uri);
310             curr = curr->next;
311           }
312       }
313     else 
314      {
315         /* scan the list */
316         while (curr != NULL)
317           {
318             size_t len = strlen(about) + 1;
319             char *qabout = malloc (sizeof(char) * len * 2);
320             char *quri;
321             PQescapeString(qabout,about,len);
322             len = strlen(uri) + 1;
323             quri = malloc (sizeof(char) * len * 2);
324             PQescapeString(quri,uri,len);
325             printf("INSERT INTO refObj VALUES ('%s', '%s', 'http://www.cs.unibo.it/helm/schemas/schema-helm#MainHypothesis', %d);\n",qabout,quri,curr->val);
326             curr = curr->next;
327           }
328       }
329 }
330
331 void print_mainconcl(about,uri,depth)
332      char    *about;
333      char    *uri;
334      int     depth;
335      
336 {
337     /* fprintf(stderr,"about = %s\n",about); */
338     if (!strcmp(uri,"Rel"))
339       { 
340         size_t len = strlen(about) + 1;
341         char *qabout = malloc (sizeof(char) * len * 2);
342         PQescapeString(qabout,about,len);
343         printf("INSERT INTO refRel VALUES ('%s', 'http://www.cs.unibo.it/helm/schemas/schema-helm#MainConclusion', %d);\n",qabout,depth);
344       }
345     else if ((!strcmp(uri,"Prop")) || (!strcmp(uri,"Type")) ||
346             (!strcmp(uri,"Set")))
347       {
348         size_t len = strlen(about) + 1;
349         char *qabout = malloc (sizeof(char) * len * 2);
350         PQescapeString(qabout,about,len);
351         printf("INSERT INTO refSort VALUES ('%s', 'http://www.cs.unibo.it/helm/schemas/schema-helm#MainConclusion', %d, '%s');\n",qabout,depth,uri);
352       }
353     else
354       {
355         size_t len = strlen(about) + 1;
356         char *qabout = malloc (sizeof(char) * len * 2);
357         char *quri;
358         PQescapeString(qabout,about,len);
359         len = strlen(uri) + 1;
360         quri = malloc (sizeof(char) * len * 2);
361         PQescapeString(quri,uri,len);
362         printf("INSERT INTO refObj VALUES ('%s', '%s','http://www.cs.unibo.it/helm/schemas/schema-helm#MainConclusion', %d);\n",qabout,quri,depth); 
363       }
364 }
365
366 // dome: cambiata per usare il modello con position
367 void print_one(about,uri,pos)
368      char    *about,
369              *uri;
370      int     pos;
371 {
372     char       *position = (char *)malloc((sizeof('a')*20));
373     size_t len = strlen(about) + 1;
374     char *qabout = malloc (sizeof(char) * len * 2);
375     char *quri;
376     PQescapeString(qabout,about,len);
377     len = strlen(uri) + 1;
378     quri = malloc (sizeof(char) * len * 2);
379     PQescapeString(quri,uri,len);
380     if (pos == INBODY)
381        position="InBody";
382     else if (pos == MAINHYP)
383        position="MainHypothesis";  /* This should never happen */
384     else if (pos == INHYP)
385        position="InHypothesis";
386     else if (pos == INCONCL)
387        position="InConclusion";
388     else if (pos == MAINCONCL)
389        position="MainConclusion";  /* This should never happen */
390     printf("INSERT INTO refObj VALUES ('%s', '%s', \
391     'http://www.cs.unibo.it/helm/schemas/schema-helm#%s', NULL);\n",qabout,quri,position);
392 }
393
394  /* The following function allocates a bucket for an identifier. */
395 void allocate_bucket(st, id, where)
396         struct st_bucket
397                         **st;
398                                         /* pointer to the bucket to be */
399                                         /* allocated */
400         char            *id;
401                                         /* identifier */
402         int             where;
403 {
404         int i;
405
406         *st = (struct st_bucket *)malloc(sizeof(struct st_bucket));
407         (*st)->id = (char *)malloc(sizeof('a')*(strlen(id) + 1));
408         strcpy((*st)->id,id);
409         (*st)->main_depth = 0;
410         (*st)->depths = NULL;
411         (*st)->next_st_bucket = NULL;
412         (*st)->all_next = all;
413         all = *st;
414         for (i = 0; i < 5; ++i)
415           (*st)->pos[i] = 0;
416         (*st)->pos[where] = 1;
417 }
418
419  /* The following function moves a bucket to the head of the */
420  /* list in which it lies. */
421 void move_bucket(st, dict_index)
422         struct st_bucket 
423                         *st;
424                                         /* pointer to the bucket to */
425                                         /* be moved */
426         int             dict_index;
427                                         /* index corresponding to */
428                                         /* the list in which the */
429                                         /* bucket lies */
430 {
431         st->next_st_bucket = dictionary[dict_index];
432         dictionary[dict_index] = st;
433 }
434
435  /* The following function implements Weinberger's hash function. */
436 int
437 hash_pjw(id)
438         char            *id;
439                                         /* identifier to be hashed */
440 {
441         unsigned        h,
442                         g;
443
444         for (h = 0; *id != EOS; id++)
445         {
446                 h = (h << HASH1) + (*id);
447                 if ((g = h) & HASH2)
448                         h = h ^ (g >> HASH3) ^ g;
449         }
450         return(h % DICTSIZE);
451 }
452
453
454
455
456
457