]> matita.cs.unibo.it Git - helm.git/blob - helm/metadata/create_V7_mowgli/METADATA/meta_lex.l
New version for the new DTD.
[helm.git] / helm / metadata / create_V7_mowgli / METADATA / meta_lex.l
1  /******************************************************************/
2  /*  Copyright (C) 2000, HELM Team                                 */ 
3  /*                                                                */
4  /* This file is part of HELM, an Hypertextual, Electronic         */
5  /* Library of Mathematics, developed at the Computer Science      */
6  /* Department, University of Bologna, Italy.                      */
7  /*                                                                */
8  /* HELM is free software; you can redistribute it and/or          */
9  /* modify it under the terms of the GNU General Public License    */
10  /* as published by the Free Software Foundation; either version   */
11  /* 2 of the License, or (at your option) any later version.       */
12  /*                                                                */
13  /* HELM is distributed in the hope that it will be useful,        */
14  /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15  /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the   */
16  /* GNU General Public License for more details.                   */
17  /*                                                                */
18  /* You should have received a copy of the GNU General Public      */
19  /* License along with HELM; if not, write to the Free Software    */
20  /* Foundation, Inc., 59 Temple Place - Suite 330, Boston,         */
21  /* MA  02111-1307, USA.                                           */
22  /*                                                                */
23  /* For details, see the HELM World-Wide-Web page,                 */
24  /* http://cs.unibo.it/helm/.                                      */
25  /******************************************************************/
26
27  /***************************************************************/
28  /*                        META_LEXAN                           */
29  /*                 Automatic Metadata Extractor                */
30  /*           First draft 11/12/2001, by Andrea Asperti         */
31  /*      more bugs added by domenico lordi on mon 12/17/2001    */
32  /***************************************************************/
33
34  /***************************************************************/
35  /* 1. Inclusion of header files.                               */
36  /***************************************************************/
37
38 %{
39 #include                <string.h>
40 #include                <stdlib.h>
41 #include                <sys/stat.h>
42 #include                "sthandler.h"
43 %}
44
45  /***************************************************************/
46  /* 2. Constants and Variables Definitions                      */
47  /***************************************************************/
48
49 %{
50 #define                 NOWHERE   0
51 #define                 CONST     1
52 #define                 MUTIND    2
53 #define                 MUTCONSTRUCT  3
54
55 #define                 INBODY    0
56 #define                 MAINHYP   1
57 #define                 INHYP     2
58 #define                 INCONCL   3
59 #define                 MAINCONCL 4
60 #define                 INTYPE    5
61 #define                 NOTFOUND  6
62
63 #define                 HERE      0     
64 #define                 AFTER     1
65
66
67 int                     where = NOWHERE;
68 int                     found = NOTFOUND;
69 int                     position = INBODY;
70 int                     first_child = HERE;
71 int                     skip = 0;     // boolean to skip the insertion of a URI
72 int                     no_open_source =0;
73 int                     tmp_n;
74 char                    sep = '"';
75 char                    *xpointer = "#xpointer(1/";
76 char                    *uri;
77 char                    *tmp;
78 %}
79
80  /***************************************************************/
81  /* 3. Regular definitions.                                     */
82  /***************************************************************/
83
84 uri                     [^"]+
85 digits                  [0-9]+                   
86
87  /***************************************************************/
88  /* 4. Rules.                                                   */
89  /***************************************************************/
90
91
92 %%
93
94 "<Variable"[^>]*">"(" "|\n)*"<body" {
95                     position = INBODY; // Variables have both a body and a type
96                    }
97
98 "</body>"(" "|\n)*"<type" {
99                     position = INTYPE; // Variables have both a body and a type
100                    }
101
102 "<decl"            |
103 "<def"             {
104                     if ((position == INTYPE) ||
105                         (position == INHYP))
106                         { position = INHYP;
107                           no_open_source++;};
108                    }
109
110 "</decl>"          |
111 "</def"            {
112                     if (position == INHYP)
113                      {
114                       no_open_source--;
115                       if (no_open_source == 0) 
116                        { position = INTYPE;
117                          first_child = HERE; };
118                      };
119                    }
120
121
122 .|\n               {
123                    }
124
125 "<LAMBDA"          |
126 "<REL"             |
127 "<MUTCASE"         |
128 "<FIX"             |
129 "<COFIX"           { 
130                           first_child = AFTER;
131                    }
132
133 "<VAR"             {
134                      skip = 1;
135                      first_child = AFTER;
136                    }
137
138 "<CONST"           { 
139                      if (position == INTYPE) /* CONST on the spine */
140                         position = INCONCL;
141                      where = CONST;
142                    }
143
144 "<MUTIND"          { 
145                      if (position == INTYPE) /* MUTIND on the spine */
146                         position = INCONCL;
147                      where = MUTIND;
148                    }
149
150 "<MUTCONSTRUCT"    { 
151                      if (position == INTYPE) /* MUTCONSTRUCT on the spine */
152                         position = INCONCL;
153                      where = MUTCONSTRUCT;
154                    }
155
156 "uri=\""{uri}      {     
157                          if (!skip) {
158                             uri=(char *)malloc((sizeof('a')*200)); 
159                             strcpy(uri,yytext);
160                             strsep(&uri,&sep);
161                             if (where == CONST)
162                                 {
163                                    search(uri,first_child,position); 
164                                    where = NOWHERE;
165                                    first_child = AFTER;
166                                    free(uri); 
167                                  };
168                          } else skip = 0;
169                    } 
170
171 "noType=\""{digits} {
172                          if ((where == MUTIND) || (where == MUTCONSTRUCT))
173                           { strsep(&yytext,&sep);
174                             tmp=(char *)malloc((sizeof(sep)*(strlen(yytext)+1)));
175                             strcpy(tmp,yytext);
176                             tmp_n = atoi(tmp)+1;
177                             sprintf(tmp,"%d",tmp_n);
178                             strcat(uri,"#xpointer(1/"); 
179                             strcat(uri,tmp); 
180                           };
181                          if (where == MUTIND) 
182                              { 
183                                strcat(uri,")");
184                                search(uri,first_child,position); 
185                                free(uri);
186                                free(tmp);
187                                where = NOWHERE; 
188                                first_child = AFTER;};
189                    } 
190
191 "noConstr=\""{digits} {
192                          if (where == MUTCONSTRUCT)
193                           { strsep(&yytext,&sep);
194                             tmp=(char *)malloc((sizeof(sep)*(strlen(yytext)+1)));
195                             strcpy(tmp,yytext);
196                             strcat(uri,"/");
197                             strcat(uri,tmp);
198                             strcat(uri,")");
199                             search(uri,first_child,position);
200                             free(uri);
201                             free(tmp);
202                             where = NOWHERE; 
203                             first_child = AFTER;};
204                    } 
205
206
207
208 %%
209
210  /***************************************************************/
211  /* 6. Auxiliary functions.                                     */
212  /***************************************************************/
213
214 main(int argc, char *argv[])
215 {                  
216                    struct stat buf;
217                    init_symbol_table();
218
219                    // We process the body
220                    if (!stat("tmp/body.xml",&buf)) {
221                       yyin = fopen("tmp/body.xml", "r");
222                       position = INBODY;
223                       yylex();
224                       fclose(yyin);
225                    }
226
227                    // We process the type
228                    yyin = fopen("tmp/type.xml", "r");
229                    position = INTYPE;
230                    first_child = HERE;
231                    yylex();
232
233                    printf("<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n\n");
234                    printf("<rdf:RDF xml:lang=\"en\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" xmlns:h=\"file:mattone.rdf#\">\n");
235                    printf("\t<h:Object rdf:about=\"");
236                    printf("%s",argv[1]);
237                    printf("\">\n");
238                    print_all();
239                    printf("\t</h:Object>\n");
240                    printf("</rdf:RDF>\n");
241                    fclose(yyin);
242                    } 
243
244 search(uri,first_child,position)
245 char               *uri;
246 int                first_child;
247 int                position; 
248 {                  
249                    if (first_child == HERE)
250                       {
251                        if (position == INHYP)
252                           found = search_bucket(uri,MAINHYP);
253                        else if (position == INCONCL)
254                           found = search_bucket(uri,MAINCONCL);
255                        /* if (found == NOTFOUND)
256                           printf( "pos = %d, uri = %s\n", MAINCONCL, uri); */
257                        }
258                     else found = search_bucket(uri,position);
259                     /* if (found == NOTFOUND)
260                           printf( "pos = %d, uri = %s\n", position, uri); */
261                     }
262
263 int yywrap() {
264                return 1;
265              }