]> matita.cs.unibo.it Git - helm.git/blob - helm/metadata/extractor/meta_lex.l
ocaml 3.09 transition
[helm.git] / helm / metadata / extractor / meta_lex.l
1  /******************************************************************/
2  /*  Copyright (C) 2000, HELM Team                                 */ 
3  /*                                                                */
4  /* This file is part of HELM, an Hypertextual, Electronic         */
5  /* Library of Mathematics, developed at the Computer Science      */
6  /* Department, University of Bologna, Italy.                      */
7  /*                                                                */
8  /* HELM is free software; you can redistribute it and/or          */
9  /* modify it under the terms of the GNU General Public License    */
10  /* as published by the Free Software Foundation; either version   */
11  /* 2 of the License, or (at your option) any later version.       */
12  /*                                                                */
13  /* HELM is distributed in the hope that it will be useful,        */
14  /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15  /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the   */
16  /* GNU General Public License for more details.                   */
17  /*                                                                */
18  /* You should have received a copy of the GNU General Public      */
19  /* License along with HELM; if not, write to the Free Software    */
20  /* Foundation, Inc., 59 Temple Place - Suite 330, Boston,         */
21  /* MA  02111-1307, USA.                                           */
22  /*                                                                */
23  /* For details, see the HELM World-Wide-Web page,                 */
24  /* http://cs.unibo.it/helm/.                                      */
25  /******************************************************************/
26
27  /***************************************************************/
28  /*                        META_LEXAN                           */
29  /*                 Automatic Metadata Extractor                */
30  /*           First draft 11/12/2001, by Andrea Asperti         */
31  /*      more bugs added by domenico lordi on mon 12/17/2001    */
32  /***************************************************************/
33
34  /***************************************************************/
35  /* 1. Inclusion of header files.                               */
36  /***************************************************************/
37
38 %{
39 #include                <string.h>
40 #include                <stdlib.h>
41 #include                <sys/stat.h>
42 #include                "sthandler.h"
43 %}
44
45  /***************************************************************/
46  /* 2. Constants and Variables Definitions                      */
47  /***************************************************************/
48
49 %{
50 #define                 NOWHERE   0
51 #define                 CONST     1
52 #define                 MUTIND    2
53 #define                 MUTCONSTRUCT  3
54 #define                 SORT      4
55
56 #define                 INBODY    0
57 #define                 MAINHYP   1
58 #define                 INHYP     2
59 #define                 INCONCL   3
60 #define                 MAINCONCL 4
61 #define                 INTYPE    5
62 #define                 NOTFOUND  6
63
64 #define                 HERE      0     
65 #define                 AFTER     1
66
67
68 int                     where = NOWHERE;
69 int                     found = NOTFOUND;
70 int                     position = INBODY;
71 int                     first_child = HERE;
72 int                     skip = 0;     // boolean to skip the insertion of a URI
73 int                     no_open_source = 0;
74 int                     spine_depth = 0;
75 int                     depth = 0;
76 int                     tmp_n;
77 char                    sep = '"';
78 char                    *xpointer = "#xpointer(1/";
79 char                    *uri;
80 char                    *tmp;
81
82 void search(char *uri, int first_child, int position, int depth);
83 %}
84
85  /***************************************************************/
86  /* 3. Regular definitions.                                     */
87  /***************************************************************/
88
89 uri                     [^"]+
90 digits                  [0-9]+ 
91 value                   [^"]+                  
92
93  /***************************************************************/
94  /* 4. Rules.                                                   */
95  /***************************************************************/
96
97
98 %%
99
100 "<Variable"[^>]*">"(" "|\n)*"<body" {
101                     position = INBODY; // Variables have both a body and a type
102                    }
103
104 "</body>"(" "|\n)*"<type" {
105                     position = INTYPE; // Variables have both a body and a type
106                     first_child = HERE;
107                     no_open_source = 0;
108                     spine_depth = 0;
109                     depth = 0;   
110                    }
111
112 "<decl"            |
113 "<def"             {
114                     if (position == INTYPE)
115                        position = MAINHYP;
116                     else if (position == MAINHYP)
117                         { position = INHYP;
118                           no_open_source = 1;}
119                     else if (position == INHYP) no_open_source++;
120                    }
121
122 "</decl>"          |
123 "</def>"           {
124                     if (position == INHYP)
125                      {
126                       no_open_source--;
127                       if (no_open_source == 0) 
128                         {
129                          position = MAINHYP;
130                          depth++;
131                          first_child = HERE;
132                         }
133                      }
134                     else if (position == MAINHYP)
135                       {
136                        position = INTYPE;
137                        spine_depth++;
138                        depth = 0;
139                        first_child = HERE;
140                       }
141                     /* bug? first_child = HERE; */
142                    }
143
144
145 .|\n               {
146                    }
147
148 "<LAMBDA"          |
149 "<MUTCASE"         |
150 "<FIX"             |
151 "<COFIX"           { 
152                           first_child = AFTER;
153                    }
154
155 "<REL"             {
156                     if (((position == INTYPE) | (position == MAINHYP)) &&
157                        (first_child == HERE))
158                      {
159                        if (position == INTYPE) /* REL on the spine */
160                          {
161                            position = INCONCL;
162                            search("Rel",first_child,position,spine_depth);
163                          }
164                        else search("Rel",first_child,position,depth);
165                        first_child = AFTER;
166                      }
167                    }
168
169 "<SORT"(" "|\n)+"value=\""{value}   {         
170                     if (((position == INTYPE) | (position == MAINHYP)) &&
171                        (first_child == HERE))
172                      {
173                        tmp=(char *)malloc((sizeof('a')*200)); 
174                        strcpy(tmp,yytext);
175                        strsep(&tmp,&sep); 
176                        if (position == INTYPE) /* SORT on the spine */
177                          { 
178                            position = INCONCL;
179                            search(tmp,first_child,position,spine_depth);
180                          }
181                        else search(tmp,first_child,position,depth);
182                        first_child = AFTER;
183                      }
184                    }
185
186 "<VAR"             {
187                      skip = 1;
188                      first_child = AFTER;
189                    }
190
191 "<CONST"           { 
192                      if (position == INTYPE) /* CONST on the spine */
193                         position = INCONCL;
194                      where = CONST;
195                    }
196
197 "<MUTIND"          { 
198                      if (position == INTYPE) /* MUTIND on the spine */
199                         position = INCONCL;
200                      where = MUTIND;
201                    }
202
203 "<MUTCONSTRUCT"    { 
204                      if (position == INTYPE) /* MUTCONSTRUCT on the spine */
205                         position = INCONCL;
206                      where = MUTCONSTRUCT;
207                    }
208
209 "uri=\""{uri}      {     
210                          if (!skip) {
211                             uri=(char *)malloc((sizeof('a')*200)); 
212                             strcpy(uri,yytext);
213                             strsep(&uri,&sep);
214                             if (where == CONST)
215                               {
216                                 if (position == INCONCL)
217                                   search(uri,first_child,position,spine_depth);
218                                 else search(uri,first_child,position,depth);
219                                 where = NOWHERE;
220                                 first_child = AFTER;
221                                 free(uri); 
222                               };
223                          } else skip = 0;
224                    } 
225
226 "noType=\""{digits} {
227                          if ((where == MUTIND) || (where == MUTCONSTRUCT))
228                           { strsep(&yytext,&sep);
229                             tmp=(char *)malloc((sizeof(sep)*(strlen(yytext)+1)));
230                             strcpy(tmp,yytext);
231                             tmp_n = atoi(tmp)+1;
232                             sprintf(tmp,"%d",tmp_n);
233                             strcat(uri,"#xpointer(1/"); 
234                             strcat(uri,tmp); 
235                           };
236                          if (where == MUTIND) 
237                              { 
238                                strcat(uri,")");
239                                if (position == INCONCL)
240                                   search(uri,first_child,position,spine_depth);
241                                else search(uri,first_child,position,depth);
242                                free(uri);
243                                free(tmp);
244                                where = NOWHERE; 
245                                first_child = AFTER;};
246                    } 
247
248 "noConstr=\""{digits} {
249                          if (where == MUTCONSTRUCT)
250                           { strsep(&yytext,&sep);
251                             tmp=(char *)malloc((sizeof(sep)*(strlen(yytext)+1)));
252                             strcpy(tmp,yytext);
253                             strcat(uri,"/");
254                             strcat(uri,tmp);
255                             strcat(uri,")");
256                             if (position == INCONCL)
257                               search(uri,first_child,position,spine_depth);
258                             else search(uri,first_child,position,depth);
259                             free(uri);
260                             free(tmp);
261                             where = NOWHERE; 
262                             first_child = AFTER;};
263                    } 
264
265
266
267 %%
268
269  /***************************************************************/
270  /* 6. Auxiliary functions.                                     */
271  /***************************************************************/
272
273 int main(int argc, char *argv[])
274 {                  
275     struct stat buf;
276     char *name;
277     char *poss;
278     char *posd;
279
280     /* FILE *debug; */
281
282     if (argc != 4)
283     {
284         fprintf(stderr, "Usage: meta <object_uri> <body_file> <type_file>\n");
285         exit(1);
286     }
287
288
289     /* initialize the symbol table */
290     init_symbol_table();
291
292     // We process the body
293     if (!stat(argv[2],&buf)) 
294     {
295         yyin = fopen(argv[2], "r");
296         position = INBODY;
297         yylex();
298         fclose(yyin);
299      }
300
301     // We process the type
302     yyin = fopen(argv[3], "r");
303     position = INTYPE;
304     first_child = HERE;
305     no_open_source = 0;
306     spine_depth = 0;
307     depth = 0;
308     yylex(); 
309     fclose(yyin);
310     print_all(argv[1]);
311     poss = rindex(argv[1],'/');
312     posd = rindex(argv[1],'.');
313     name = (char *)malloc((posd - poss) * sizeof(char));
314     strncpy(name, poss + 1, posd - poss - 1);
315     name[posd - poss - 1] = '\0';
316     print_name(name,argv[1]);
317     free(name);
318
319     return 0;
320
321
322
323 void search(uri,first_child,position,depth)
324 char               *uri;
325 int                first_child;
326 int                position; 
327 {                  
328                    if (position == MAINHYP)
329                       { 
330                        if (first_child == HERE) 
331                            found = search_bucket(uri,MAINHYP,depth);
332                        else 
333                            found = search_bucket(uri,INHYP,0);
334                       }
335                    else if (position == INCONCL)
336                       { 
337                        if (first_child == HERE) 
338                            found = search_bucket(uri,MAINCONCL,depth);
339                        else
340                            found = search_bucket(uri,INCONCL,0);
341                       }
342                         
343                    else 
344                       found = search_bucket(uri,position,depth);
345                    /*
346                    if (found == NOTFOUND)
347                          fprintf(stderr,"here = %d, pos = %d, uri = %s\n", first_child,position, uri); */
348
349 /*                  
350                       (first_child == HERE) 
351                       {
352                        if (position == MAINHYP)
353                           found = search_bucket(uri,MAINHYP,depth);
354                        else if (position == INCONCL)
355                           found = search_bucket(uri,MAINCONCL,0);
356                        else if (position == INHYP)
357                           found = search_bucket(uri,INHYP,0);
358                           if (found == NOTFOUND)
359                           printf( "pos = %d, uri = %s\n", MAINCONCL, uri); 
360                        }
361                    else if ((position == MAINHYP) && (first_child == AFTER))
362                         found = search_bucket(uri,INHYP,0);
363                    else found = search_bucket(uri,position,0);
364                    if (found == NOTFOUND)
365                          printf( "pos = %d, uri = %s\n", position, uri); 
366                    } */
367
368 int yywrap() {
369                return 1;
370              }
371
372
373
374