1 (* Copyright (C) 2000, HELM Team.
3 * This file is part of HELM, an Hypertextual, Electronic
4 * Library of Mathematics, developed at the Computer Science
5 * Department, University of Bologna, Italy.
7 * HELM is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * HELM is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with HELM; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
22 * For details, see the HELM World-Wide-Web page,
23 * http://cs.unibo.it/helm/.
26 (******************************************************************************)
30 (* Claudio Sacerdoti Coen <sacerdot@cs.unibo.it> *)
33 (* This is the main (top level) module of a parser for cic objects from xml *)
34 (* files to the internal representation. It uses the modules cicParser2 *)
35 (* (objects level) and cicParser3 (terms level) *)
37 (******************************************************************************)
44 prerr_endline ("WARNING: " ^ w) ;
45 (raise Warnings : unit)
49 exception EmptyUri of string;;
51 (* given an uri u it returns the list of tokens of the base uri of u *)
52 (* e.g.: token_of_uri "cic:/a/b/c/d.xml" returns ["a" ; "b" ; "c"] *)
53 let tokens_of_uri uri =
54 let uri' = UriManager.string_of_uri uri in
57 [] -> raise (EmptyUri uri')
60 | he::tl -> he::(chop_list tl)
62 let trimmed_uri = Str.replace_first (Str.regexp "cic:") "" uri' in
63 let list_of_tokens = Str.split (Str.regexp "/") trimmed_uri in
64 chop_list list_of_tokens
67 (* given the filename of an xml file of a cic object it returns its internal *)
69 let annobj_of_xml filename filenamebody uri =
70 let module Y = Pxp_yacc in
73 (* sets the current base uri to resolve relative URIs *)
74 CicParser3.current_sp := tokens_of_uri uri ;
75 CicParser3.current_uri := uri ;
76 let config = {Y.default_config with Y.warner = new warner} in
78 Y.parse_document_entity config
79 (Y.from_file ~alt:[PxpUrlResolver.url_resolver] filename)
81 (* CSC: Until PXP bug is resolved *)
82 PxpUrlResolver.url_resolver#close_all ;
83 let docroot = doc#root in
84 match filenamebody with
88 Y.parse_document_entity config
89 (Y.from_file ~alt:[PxpUrlResolver.url_resolver] filename)
92 (* CSC: Until PXP bug is resolved *)
93 PxpUrlResolver.url_resolver#close_all ;
94 docroot,Some docbody#root
96 CicParser2.get_term root rootbody
99 prerr_endline ("Filenames: " ^ filename ^
100 (match filenamebody with None -> "" | Some s -> ", " ^ s)) ;
101 prerr_endline ("Exception: " ^ Pxp_types.string_of_exn e) ;
105 let obj_of_xml filename filenamebody uri =
106 Deannotate.deannotate_obj (annobj_of_xml filename filenamebody uri)