]> matita.cs.unibo.it Git - helm.git/blob - helm/metadata/create_V7_mowgli/METADATA/meta_lex.l
df0977f6caba90f8db1c620ff77a1f8f437f0a44
[helm.git] / helm / metadata / create_V7_mowgli / METADATA / meta_lex.l
1  /******************************************************************/
2  /*  Copyright (C) 2000, HELM Team                                 */ 
3  /*                                                                */
4  /* This file is part of HELM, an Hypertextual, Electronic         */
5  /* Library of Mathematics, developed at the Computer Science      */
6  /* Department, University of Bologna, Italy.                      */
7  /*                                                                */
8  /* HELM is free software; you can redistribute it and/or          */
9  /* modify it under the terms of the GNU General Public License    */
10  /* as published by the Free Software Foundation; either version   */
11  /* 2 of the License, or (at your option) any later version.       */
12  /*                                                                */
13  /* HELM is distributed in the hope that it will be useful,        */
14  /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15  /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the   */
16  /* GNU General Public License for more details.                   */
17  /*                                                                */
18  /* You should have received a copy of the GNU General Public      */
19  /* License along with HELM; if not, write to the Free Software    */
20  /* Foundation, Inc., 59 Temple Place - Suite 330, Boston,         */
21  /* MA  02111-1307, USA.                                           */
22  /*                                                                */
23  /* For details, see the HELM World-Wide-Web page,                 */
24  /* http://cs.unibo.it/helm/.                                      */
25  /******************************************************************/
26
27  /***************************************************************/
28  /*                        META_LEXAN                           */
29  /*                 Automatic Metadata Extractor                */
30  /*           First draft 11/12/2001, by Andrea Asperti         */
31  /*      more bugs added by domenico lordi on mon 12/17/2001    */
32  /***************************************************************/
33
34  /***************************************************************/
35  /* 1. Inclusion of header files.                               */
36  /***************************************************************/
37
38 %{
39 #include                <string.h>
40 #include                <stdlib.h>
41 #include                <sys/stat.h>
42 #include                "sthandler.h"
43 %}
44
45  /***************************************************************/
46  /* 2. Constants and Variables Definitions                      */
47  /***************************************************************/
48
49 %{
50 #define                 NOWHERE   0
51 #define                 CONST     1
52 #define                 MUTIND    2
53 #define                 MUTCONSTRUCT  3
54 #define                 SORT      4
55
56 #define                 INBODY    0
57 #define                 MAINHYP   1
58 #define                 INHYP     2
59 #define                 INCONCL   3
60 #define                 MAINCONCL 4
61 #define                 INTYPE    5
62 #define                 NOTFOUND  6
63
64 #define                 HERE      0     
65 #define                 AFTER     1
66
67
68 int                     where = NOWHERE;
69 int                     found = NOTFOUND;
70 int                     position = INBODY;
71 int                     first_child = HERE;
72 int                     skip = 0;     // boolean to skip the insertion of a URI
73 int                     no_open_source =0;
74 int                     spine_depth = 0;
75 int                     depth = 0;
76 int                     tmp_n;
77 char                    sep = '"';
78 char                    *xpointer = "#xpointer(1/";
79 char                    *uri;
80 char                    *tmp;
81 %}
82
83  /***************************************************************/
84  /* 3. Regular definitions.                                     */
85  /***************************************************************/
86
87 uri                     [^"]+
88 digits                  [0-9]+ 
89 value                   [^"]+                  
90
91  /***************************************************************/
92  /* 4. Rules.                                                   */
93  /***************************************************************/
94
95
96 %%
97
98 "<Variable"[^>]*">"(" "|\n)*"<body" {
99                     position = INBODY; // Variables have both a body and a type
100                    }
101
102 "</body>"(" "|\n)*"<type" {
103                     position = INTYPE; // Variables have both a body and a type
104                    }
105
106 "<decl"            |
107 "<def"             {
108                     if (position == INTYPE)
109                        position = MAINHYP;
110                     else if (position == MAINHYP)
111                         { position = INHYP;
112                           no_open_source++;};
113                    }
114
115 "</decl>"          |
116 "</def"            {
117                     if (position == INHYP)
118                      {
119                       no_open_source--;
120                       if (no_open_source == 0) 
121                         {
122                          position = MAINHYP;
123                          depth++;
124                          first_child = HERE;
125                         }
126                      }
127                     else if (position == MAINHYP)
128                       {
129                        position = INTYPE;
130                        spine_depth++;
131                        depth = 0;
132                       }
133                     first_child = HERE;
134                    }
135
136
137 .|\n               {
138                    }
139
140 "<LAMBDA"          |
141 "<MUTCASE"         |
142 "<FIX"             |
143 "<COFIX"           { 
144                           first_child = AFTER;
145                    }
146
147 "<REL"             {
148                     if (((position == INTYPE) | (position == MAINHYP)) &&
149                        (first_child == HERE))
150                      {
151                        if (position == INTYPE) /* REL on the spine */
152                          {
153                            position = INCONCL;
154                            search("Rel",first_child,position,spine_depth);
155                          }
156                        else search("Rel",first_child,position,depth);
157                        first_child = AFTER;
158                      }
159                    }
160
161 "<SORT"(" "|\n)+"value=\""{value}   {         
162                     if ((position == INTYPE) | (position == MAINHYP))
163                      {
164                        tmp=(char *)malloc((sizeof('a')*200)); 
165                        strcpy(tmp,yytext);
166                        strsep(&tmp,&sep); 
167                        if (position == INTYPE) /* SORT on the spine */
168                          { 
169                            position = INCONCL;
170                            search(tmp,first_child,position,spine_depth);
171                          }
172                        else search(tmp,first_child,position,depth);
173                        first_child = AFTER;
174                      }
175                    }
176
177 "<VAR"             {
178                      skip = 1;
179                      first_child = AFTER;
180                    }
181
182 "<CONST"           { 
183                      if (position == INTYPE) /* CONST on the spine */
184                         position = INCONCL;
185                      where = CONST;
186                    }
187
188 "<MUTIND"          { 
189                      if (position == INTYPE) /* MUTIND on the spine */
190                         position = INCONCL;
191                      where = MUTIND;
192                    }
193
194 "<MUTCONSTRUCT"    { 
195                      if (position == INTYPE) /* MUTCONSTRUCT on the spine */
196                         position = INCONCL;
197                      where = MUTCONSTRUCT;
198                    }
199
200 "uri=\""{uri}      {     
201                          if (!skip) {
202                             uri=(char *)malloc((sizeof('a')*200)); 
203                             strcpy(uri,yytext);
204                             strsep(&uri,&sep);
205                             if (where == CONST)
206                               {
207                                 if (position == INCONCL)
208                                   search(uri,first_child,position,spine_depth);
209                                 else search(uri,first_child,position,depth);
210                                 where = NOWHERE;
211                                 first_child = AFTER;
212                                 free(uri); 
213                               };
214                          } else skip = 0;
215                    } 
216
217 "noType=\""{digits} {
218                          if ((where == MUTIND) || (where == MUTCONSTRUCT))
219                           { strsep(&yytext,&sep);
220                             tmp=(char *)malloc((sizeof(sep)*(strlen(yytext)+1)));
221                             strcpy(tmp,yytext);
222                             tmp_n = atoi(tmp)+1;
223                             sprintf(tmp,"%d",tmp_n);
224                             strcat(uri,"#xpointer(1/"); 
225                             strcat(uri,tmp); 
226                           };
227                          if (where == MUTIND) 
228                              { 
229                                strcat(uri,")");
230                                if (position == INCONCL)
231                                   search(uri,first_child,position,spine_depth);
232                                else search(uri,first_child,position,depth);
233                                free(uri);
234                                free(tmp);
235                                where = NOWHERE; 
236                                first_child = AFTER;};
237                    } 
238
239 "noConstr=\""{digits} {
240                          if (where == MUTCONSTRUCT)
241                           { strsep(&yytext,&sep);
242                             tmp=(char *)malloc((sizeof(sep)*(strlen(yytext)+1)));
243                             strcpy(tmp,yytext);
244                             strcat(uri,"/");
245                             strcat(uri,tmp);
246                             strcat(uri,")");
247                             if (position == INCONCL)
248                               search(uri,first_child,position,spine_depth);
249                             else search(uri,first_child,position,depth);
250                             free(uri);
251                             free(tmp);
252                             where = NOWHERE; 
253                             first_child = AFTER;};
254                    } 
255
256
257
258 %%
259
260  /***************************************************************/
261  /* 6. Auxiliary functions.                                     */
262  /***************************************************************/
263
264 main(int argc, char *argv[])
265 {                  
266                    struct stat buf;
267                    FILE *outrel, *outsort;
268
269                    init_symbol_table();
270                    if (!(outrel = fopen("forward_rel.rdf","a"))) 
271                      {
272                       fprintf(stderr, "error in openinf file forward_rel.rdf\n");
273                       exit(-1);
274                      }
275                    if (!(outsort = fopen("forward_sort.rdf","a"))) 
276                      {
277                       fprintf(stderr, "error in openinf file forward_rel.rdf\n");
278                       exit(-1);
279                      }
280                    // We process the body
281                    if (!stat("tmp/body.xml",&buf)) {
282                       yyin = fopen("tmp/body.xml", "r");
283                       position = INBODY;
284                       yylex();
285                       fclose(yyin);
286                    }
287
288                    // We process the type
289                    yyin = fopen("tmp/type.xml", "r");
290                    position = INTYPE;
291                    first_child = HERE;
292                    no_open_source = 0;
293                    spine_depth = 0;
294                    depth = 0;
295                    yylex(); 
296
297                    printf("<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n\n");
298 printf("<!DOCTYPE rdf:RDF [
299         <!ENTITY rdfns 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'>
300         <!ENTITY hthns 'http://www.cs.unibo.it/helm/schemas/schema-helmth#'>
301         <!ENTITY hns 'http://www.cs.unibo.it/helm/schemas/schema-helm#'>
302
303    ]>\n");
304
305                    printf("<rdf:RDF xml:lang=\"en\" xmlns:rdf=\"&rdfns;\" xmlns:h=\"&hns;\" xmlns:hth=\"&hthns;\">\n");
306                    printf("\t<h:Object rdf:about=\"");
307                    printf("%s",argv[1]);
308                    printf("\">\n");
309                    print_all(argv[1],outrel,outsort);
310                    printf("\t</h:Object>\n");
311                    printf("</rdf:RDF>\n");
312                    fclose(yyin);
313                    } 
314
315 search(uri,first_child,position,depth)
316 char               *uri;
317 int                first_child;
318 int                position; 
319 {                  
320                    if (position == MAINHYP)
321                       { 
322                        if (first_child == HERE) 
323                            found = search_bucket(uri,MAINHYP,depth);
324                        else 
325                            found = search_bucket(uri,INHYP,0);
326                       }
327                    else if (position == INCONCL)
328                       { 
329                        if (first_child == HERE) 
330                            found = search_bucket(uri,MAINCONCL,depth);
331                        else
332                            found = search_bucket(uri,INCONCL,0);
333                       }
334                         
335                    else 
336                       found = search_bucket(uri,position,depth);
337                    /* if (found == NOTFOUND)
338                          fprintf(stderr,"pos = %d, uri = %s\n", position, uri); */
339
340 /*                  
341                       (first_child == HERE) 
342                       {
343                        if (position == MAINHYP)
344                           found = search_bucket(uri,MAINHYP,depth);
345                        else if (position == INCONCL)
346                           found = search_bucket(uri,MAINCONCL,0);
347                        else if (position == INHYP)
348                           found = search_bucket(uri,INHYP,0);
349                           if (found == NOTFOUND)
350                           printf( "pos = %d, uri = %s\n", MAINCONCL, uri); 
351                        }
352                    else if ((position == MAINHYP) && (first_child == AFTER))
353                         found = search_bucket(uri,INHYP,0);
354                    else found = search_bucket(uri,position,0);
355                    if (found == NOTFOUND)
356                          printf( "pos = %d, uri = %s\n", position, uri); 
357                    } */
358
359 int yywrap() {
360                return 1;
361              }
362