]> matita.cs.unibo.it Git - helm.git/blob - helm/metadata/create_V7_mowgli/METADATA/meta_lex.l
* new version of metadata extraction
[helm.git] / helm / metadata / create_V7_mowgli / METADATA / meta_lex.l
1  /******************************************************************/
2  /*  Copyright (C) 2000, HELM Team                                 */ 
3  /*                                                                */
4  /* This file is part of HELM, an Hypertextual, Electronic         */
5  /* Library of Mathematics, developed at the Computer Science      */
6  /* Department, University of Bologna, Italy.                      */
7  /*                                                                */
8  /* HELM is free software; you can redistribute it and/or          */
9  /* modify it under the terms of the GNU General Public License    */
10  /* as published by the Free Software Foundation; either version   */
11  /* 2 of the License, or (at your option) any later version.       */
12  /*                                                                */
13  /* HELM is distributed in the hope that it will be useful,        */
14  /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15  /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the   */
16  /* GNU General Public License for more details.                   */
17  /*                                                                */
18  /* You should have received a copy of the GNU General Public      */
19  /* License along with HELM; if not, write to the Free Software    */
20  /* Foundation, Inc., 59 Temple Place - Suite 330, Boston,         */
21  /* MA  02111-1307, USA.                                           */
22  /*                                                                */
23  /* For details, see the HELM World-Wide-Web page,                 */
24  /* http://cs.unibo.it/helm/.                                      */
25  /******************************************************************/
26
27  /***************************************************************/
28  /*                        META_LEXAN                           */
29  /*                 Automatic Metadata Extractor                */
30  /*           First draft 11/12/2001, by Andrea Asperti         */
31  /*      more bugs added by domenico lordi on mon 12/17/2001    */
32  /***************************************************************/
33
34  /***************************************************************/
35  /* 1. Inclusion of header files.                               */
36  /***************************************************************/
37
38 %{
39 #include                <string.h>
40 #include                <stdlib.h>
41 #include                <sys/stat.h>
42 #include                <postgresql/libpq-fe.h>
43 #include                "sthandler.h"
44 %}
45
46  /***************************************************************/
47  /* 2. Constants and Variables Definitions                      */
48  /***************************************************************/
49
50 %{
51 #define                 NOWHERE   0
52 #define                 CONST     1
53 #define                 MUTIND    2
54 #define                 MUTCONSTRUCT  3
55 #define                 SORT      4
56
57 #define                 INBODY    0
58 #define                 MAINHYP   1
59 #define                 INHYP     2
60 #define                 INCONCL   3
61 #define                 MAINCONCL 4
62 #define                 INTYPE    5
63 #define                 NOTFOUND  6
64
65 #define                 HERE      0     
66 #define                 AFTER     1
67
68
69 int                     where = NOWHERE;
70 int                     found = NOTFOUND;
71 int                     position = INBODY;
72 int                     first_child = HERE;
73 int                     skip = 0;     // boolean to skip the insertion of a URI
74 int                     no_open_source = 0;
75 int                     spine_depth = 0;
76 int                     depth = 0;
77 int                     tmp_n;
78 char                    sep = '"';
79 char                    *xpointer = "#xpointer(1/";
80 char                    *uri;
81 char                    *tmp;
82 %}
83
84  /***************************************************************/
85  /* 3. Regular definitions.                                     */
86  /***************************************************************/
87
88 uri                     [^"]+
89 digits                  [0-9]+ 
90 value                   [^"]+                  
91
92  /***************************************************************/
93  /* 4. Rules.                                                   */
94  /***************************************************************/
95
96
97 %%
98
99 "<Variable"[^>]*">"(" "|\n)*"<body" {
100                     position = INBODY; // Variables have both a body and a type
101                    }
102
103 "</body>"(" "|\n)*"<type" {
104                     position = INTYPE; // Variables have both a body and a type
105                     first_child = HERE;
106                     no_open_source = 0;
107                     spine_depth = 0;
108                     depth = 0;   
109                    }
110
111 "<decl"            |
112 "<def"             {
113                     if (position == INTYPE)
114                        position = MAINHYP;
115                     else if (position == MAINHYP)
116                         { position = INHYP;
117                           no_open_source++;};
118                    }
119
120 "</decl>"          |
121 "</def>"           {
122                     if (position == INHYP)
123                      {
124                       no_open_source--;
125                       if (no_open_source == 0) 
126                         {
127                          position = MAINHYP;
128                          depth++;
129                          first_child = HERE;
130                         }
131                      }
132                     else if (position == MAINHYP)
133                       {
134                        position = INTYPE;
135                        spine_depth++;
136                        depth = 0;
137                        first_child = HERE;
138                       }
139                     /* bug? first_child = HERE; */
140                    }
141
142
143 .|\n               {
144                    }
145
146 "<LAMBDA"          |
147 "<MUTCASE"         |
148 "<FIX"             |
149 "<COFIX"           { 
150                           first_child = AFTER;
151                    }
152
153 "<REL"             {
154                     if (((position == INTYPE) | (position == MAINHYP)) &&
155                        (first_child == HERE))
156                      {
157                        if (position == INTYPE) /* REL on the spine */
158                          {
159                            position = INCONCL;
160                            search("Rel",first_child,position,spine_depth);
161                          }
162                        else search("Rel",first_child,position,depth);
163                        first_child = AFTER;
164                      }
165                    }
166
167 "<SORT"(" "|\n)+"value=\""{value}   {         
168                     if (((position == INTYPE) | (position == MAINHYP)) &&
169                        (first_child == HERE))
170                      {
171                        tmp=(char *)malloc((sizeof('a')*200)); 
172                        strcpy(tmp,yytext);
173                        strsep(&tmp,&sep); 
174                        if (position == INTYPE) /* SORT on the spine */
175                          { 
176                            position = INCONCL;
177                            search(tmp,first_child,position,spine_depth);
178                          }
179                        else search(tmp,first_child,position,depth);
180                        first_child = AFTER;
181                      }
182                    }
183
184 "<VAR"             {
185                      skip = 1;
186                      first_child = AFTER;
187                    }
188
189 "<CONST"           { 
190                      if (position == INTYPE) /* CONST on the spine */
191                         position = INCONCL;
192                      where = CONST;
193                    }
194
195 "<MUTIND"          { 
196                      if (position == INTYPE) /* MUTIND on the spine */
197                         position = INCONCL;
198                      where = MUTIND;
199                    }
200
201 "<MUTCONSTRUCT"    { 
202                      if (position == INTYPE) /* MUTCONSTRUCT on the spine */
203                         position = INCONCL;
204                      where = MUTCONSTRUCT;
205                    }
206
207 "uri=\""{uri}      {     
208                          if (!skip) {
209                             uri=(char *)malloc((sizeof('a')*200)); 
210                             strcpy(uri,yytext);
211                             strsep(&uri,&sep);
212                             if (where == CONST)
213                               {
214                                 if (position == INCONCL)
215                                   search(uri,first_child,position,spine_depth);
216                                 else search(uri,first_child,position,depth);
217                                 where = NOWHERE;
218                                 first_child = AFTER;
219                                 free(uri); 
220                               };
221                          } else skip = 0;
222                    } 
223
224 "noType=\""{digits} {
225                          if ((where == MUTIND) || (where == MUTCONSTRUCT))
226                           { strsep(&yytext,&sep);
227                             tmp=(char *)malloc((sizeof(sep)*(strlen(yytext)+1)));
228                             strcpy(tmp,yytext);
229                             tmp_n = atoi(tmp)+1;
230                             sprintf(tmp,"%d",tmp_n);
231                             strcat(uri,"#xpointer(1/"); 
232                             strcat(uri,tmp); 
233                           };
234                          if (where == MUTIND) 
235                              { 
236                                strcat(uri,")");
237                                if (position == INCONCL)
238                                   search(uri,first_child,position,spine_depth);
239                                else search(uri,first_child,position,depth);
240                                free(uri);
241                                free(tmp);
242                                where = NOWHERE; 
243                                first_child = AFTER;};
244                    } 
245
246 "noConstr=\""{digits} {
247                          if (where == MUTCONSTRUCT)
248                           { strsep(&yytext,&sep);
249                             tmp=(char *)malloc((sizeof(sep)*(strlen(yytext)+1)));
250                             strcpy(tmp,yytext);
251                             strcat(uri,"/");
252                             strcat(uri,tmp);
253                             strcat(uri,")");
254                             if (position == INCONCL)
255                               search(uri,first_child,position,spine_depth);
256                             else search(uri,first_child,position,depth);
257                             free(uri);
258                             free(tmp);
259                             where = NOWHERE; 
260                             first_child = AFTER;};
261                    } 
262
263
264
265 %%
266
267  /***************************************************************/
268  /* 6. Auxiliary functions.                                     */
269  /***************************************************************/
270
271 main(int argc, char *argv[])
272 {                  
273     struct stat buf;
274
275     char       *pghost,
276                *pgport,
277                *pgoptions,
278                *pgtty;
279     char       *dbName;
280
281     /* FILE *debug; */
282
283     PGconn     *conn;
284     PGresult   *res;
285
286     /*
287      * begin, by setting the parameters for a backend connection if the
288      * parameters are null, then the system will try to use reasonable
289      * defaults by looking up environment variables or, failing that,
290      * using hardwired constants
291      */
292
293     /* make a connection to the database */
294     conn = PQconnectdb("user=helm dbname=mowgli");
295
296     /*
297      * check to see that the backend connection was successfully made
298      */
299     if (PQstatus(conn) == CONNECTION_BAD)
300     {
301         fprintf(stderr, "Connection to database '%s' failed.\n", dbName);
302         fprintf(stderr, "%s", PQerrorMessage(conn));
303         exit_nicely(conn);
304     }
305
306     /* debug = fopen("/tmp/trace.out","w"); */
307     /* PQtrace(conn, debug);  */
308
309     /* initialize the symbol table */
310     init_symbol_table();
311
312     // We process the body
313     if (!stat("tmp/body.xml",&buf)) 
314     {
315         yyin = fopen("tmp/body.xml", "r");
316         position = INBODY;
317         yylex();
318         fclose(yyin);
319      }
320
321     // We process the type
322     yyin = fopen("tmp/type.xml", "r");
323     position = INTYPE;
324     first_child = HERE;
325     no_open_source = 0;
326     spine_depth = 0;
327     depth = 0;
328     yylex(); 
329     fclose(yyin);
330     print_all(argv[1],conn);
331
332
333
334 search(uri,first_child,position,depth)
335 char               *uri;
336 int                first_child;
337 int                position; 
338 {                  
339                    if (position == MAINHYP)
340                       { 
341                        if (first_child == HERE) 
342                            found = search_bucket(uri,MAINHYP,depth);
343                        else 
344                            found = search_bucket(uri,INHYP,0);
345                       }
346                    else if (position == INCONCL)
347                       { 
348                        if (first_child == HERE) 
349                            found = search_bucket(uri,MAINCONCL,depth);
350                        else
351                            found = search_bucket(uri,INCONCL,0);
352                       }
353                         
354                    else 
355                       found = search_bucket(uri,position,depth);
356                    /*
357                    if (found == NOTFOUND)
358                          fprintf(stderr,"here = %d, pos = %d, uri = %s\n", first_child,position, uri); */
359
360 /*                  
361                       (first_child == HERE) 
362                       {
363                        if (position == MAINHYP)
364                           found = search_bucket(uri,MAINHYP,depth);
365                        else if (position == INCONCL)
366                           found = search_bucket(uri,MAINCONCL,0);
367                        else if (position == INHYP)
368                           found = search_bucket(uri,INHYP,0);
369                           if (found == NOTFOUND)
370                           printf( "pos = %d, uri = %s\n", MAINCONCL, uri); 
371                        }
372                    else if ((position == MAINHYP) && (first_child == AFTER))
373                         found = search_bucket(uri,INHYP,0);
374                    else found = search_bucket(uri,position,0);
375                    if (found == NOTFOUND)
376                          printf( "pos = %d, uri = %s\n", position, uri); 
377                    } */
378
379 int yywrap() {
380                return 1;
381              }
382
383
384
385