1 (* Copyright (C) 2004-2005, HELM Team.
3 * This file is part of HELM, an Hypertextual, Electronic
4 * Library of Mathematics, developed at the Computer Science
5 * Department, University of Bologna, Italy.
7 * HELM is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * HELM is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with HELM; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
22 * For details, see the HELM World-Wide-Web page,
23 * http://helm.cs.unibo.it/
26 let debug_print = prerr_endline
30 (* ZACK TODO element from the DTD still to be handled:
31 <!ELEMENT CurrentProof (Conjecture*,body)>
32 <!ELEMENT Sequent %sequent;>
33 <!ELEMENT Conjecture %sequent;>
34 <!ELEMENT Decl %term;>
36 <!ELEMENT Hidden EMPTY>
37 <!ELEMENT Goal %term;>
43 exception Getter_failure of string * string
44 exception Parser_failure of string
47 | Arg of string * Cic.annterm (* relative uri, term *)
48 (* constants' body and types resides in differne files, thus we can't simple
49 * keep constants in Cic_obj stack entries *)
50 | Cic_attributes of Cic.attribute list
51 | Cic_constant_body of string * string * UriManager.uri list * Cic.annterm
53 (* id, for, params, body, object attributes *)
54 | Cic_constant_type of string * string * UriManager.uri list * Cic.annterm
56 (* id, name, params, type, object attributes *)
57 | Cic_term of Cic.annterm (* term *)
58 | Cic_obj of Cic.annobj (* object *)
59 | Cofix_fun of Cic.id * string * Cic.annterm * Cic.annterm
60 (* id, name, type, body *)
61 | Constructor of string * Cic.annterm (* name, type *)
62 | Decl of Cic.id * Cic.name * Cic.annterm (* id, binder, source *)
63 | Def of Cic.id * Cic.name * Cic.annterm (* id, binder, source *)
64 | Fix_fun of Cic.id * string * int * Cic.annterm * Cic.annterm
65 (* id, name, ind. index, type, body *)
66 | Inductive_type of string * string * bool * Cic.annterm *
67 (string * Cic.annterm) list (* id, name, inductive, arity, constructors *)
68 | Meta_subst of Cic.annterm option
69 | Tag of string * (string * string) list (* tag name, attributes *)
70 (* ZACK TODO add file position to tag stack entry so that when attribute
71 * errors occur, the position of their _start_tag_ could be printed
72 * instead of the current position (usually the end tag) *)
75 mutable stack: stack_entry list;
76 mutable xml_parser: XmlPushParser.xml_parser option;
77 mutable filename: string;
81 let string_of_stack ctxt =
82 "[" ^ (String.concat "; "
85 | Arg (reluri, _) -> sprintf "Arg %s" reluri
86 | Cic_attributes _ -> "Cic_attributes"
87 | Cic_constant_body (id, name, _, _, _) ->
88 sprintf "Cic_constant_body %s (id=%s)" name id
89 | Cic_constant_type (id, name, _, _, _) ->
90 sprintf "Cic_constant_type %s (id=%s)" name id
91 | Cic_term _ -> "Cic_term"
92 | Cic_obj _ -> "Cic_obj"
93 | Constructor (name, _) -> "Constructor " ^ name
94 | Cofix_fun (id, _, _, _) -> sprintf "Cofix_fun (id=%s)" id
95 | Decl (id, _, _) -> sprintf "Decl (id=%s)" id
96 | Def (id, _, _) -> sprintf "Def (id=%s)" id
97 | Fix_fun (id, _, _, _, _) -> sprintf "Fix_fun (id=%s)" id
98 | Inductive_type (id, name, _, _, _) ->
99 sprintf "Inductive_type %s (id=%s)" name id
100 | Meta_subst _ -> "Meta_subst"
101 | Tag (tag, _) -> "Tag " ^ tag)
104 let compare_attrs (a1, v1) (a2, v2) = Pervasives.compare a1 a2
105 let sort_attrs = List.sort compare_attrs
107 let new_parser_context uri = {
114 let get_parser ctxt =
115 match ctxt.xml_parser with
117 | None -> assert false
119 (** {2 Error handling} *)
121 let parse_error ctxt msg =
122 let (line, col) = XmlPushParser.get_position (get_parser ctxt) in
123 raise (Parser_failure (sprintf "[%s: line %d, column %d] %s"
124 ctxt.filename line col msg))
126 let attribute_error ctxt tag =
127 parse_error ctxt ("wrong attribute set for " ^ tag)
129 (** {2 Parsing context management} *)
132 (* debug_print "pop";*)
133 match ctxt.stack with
134 | hd :: tl -> (ctxt.stack <- tl)
138 (* debug_print "push";*)
139 ctxt.stack <- v :: ctxt.stack
142 (* debug_print "set_top";*)
143 match ctxt.stack with
144 | _ :: tl -> (ctxt.stack <- v :: tl)
147 (** pop the last tag from the open tags stack returning a pair <tag_name,
150 match ctxt.stack with
151 | Tag (tag, attrs) :: tl ->
154 | _ -> parse_error ctxt "unexpected extra content"
156 (** pop the last tag from the open tags stack returning its attributes.
157 * Attributes are returned as a list of pair <name, value> _sorted_ by
159 let pop_tag_attrs ctxt = sort_attrs (snd (pop_tag ctxt))
162 let rec aux acc stack =
164 | Cic_term t :: tl -> aux (t :: acc) tl
167 let values, new_stack = aux [] ctxt.stack in
168 ctxt.stack <- new_stack;
171 let pop_meta_substs ctxt =
172 let rec aux acc stack =
174 | Meta_subst t :: tl -> aux (t :: acc) tl
177 let values, new_stack = aux [] ctxt.stack in
178 ctxt.stack <- new_stack;
181 let pop_fix_funs ctxt =
182 let rec aux acc stack =
184 | Fix_fun (id, name, index, typ, body) :: tl ->
185 aux ((id, name, index, typ, body) :: acc) tl
188 let values, new_stack = aux [] ctxt.stack in
189 ctxt.stack <- new_stack;
192 let pop_cofix_funs ctxt =
193 let rec aux acc stack =
195 | Cofix_fun (id, name, typ, body) :: tl ->
196 aux ((id, name, typ, body) :: acc) tl
199 let values, new_stack = aux [] ctxt.stack in
200 ctxt.stack <- new_stack;
203 let pop_constructors ctxt =
204 let rec aux acc stack =
206 | Constructor (name, t) :: tl -> aux ((name, t) :: acc) tl
209 let values, new_stack = aux [] ctxt.stack in
210 ctxt.stack <- new_stack;
213 let pop_inductive_types ctxt =
214 let rec aux acc stack =
216 | Inductive_type (id, name, ind, arity, ctors) :: tl ->
217 aux ((id, name, ind, arity, ctors) :: acc) tl
220 let values, new_stack = aux [] ctxt.stack in
222 parse_error ctxt "no \"InductiveType\" element found";
223 ctxt.stack <- new_stack;
226 (** travels the stack (without popping) for the first term subject of explicit
227 * named substitution and return its URI *)
228 let find_base_uri ctxt =
229 let rec aux = function
230 | Cic_term (Cic.AConst (_, uri, _)) :: _
231 | Cic_term (Cic.AMutInd (_, uri, _, _)) :: _
232 | Cic_term (Cic.AMutConstruct (_, uri, _, _, _)) :: _
233 | Cic_term (Cic.AVar (_, uri, _)) :: _ ->
235 | Arg _ :: tl -> aux tl
236 | _ -> parse_error ctxt "no \"arg\" element found"
238 UriManager.buri_of_uri (aux ctxt.stack)
240 (** backwardly eats the stack building an explicit named substitution from Arg
242 let pop_subst ctxt base_uri =
243 let rec aux acc stack =
245 | Arg (rel_uri, term) :: tl ->
246 let uri = UriManager.uri_of_string (base_uri ^ "/" ^ rel_uri) in
247 aux ((uri, term) :: acc) tl
250 let subst, new_stack = aux [] ctxt.stack in
252 parse_error ctxt "no \"arg\" element found";
253 ctxt.stack <- new_stack;
257 match ctxt.stack with
258 | Cic_term t :: tl ->
261 | _ -> parse_error ctxt "no cic term found"
263 let pop_obj_attributes ctxt =
264 match ctxt.stack with
265 | Cic_attributes attributes :: tl ->
270 (** {2 Auxiliary functions} *)
272 let uri_of_string = UriManager.uri_of_string
274 let uri_list_of_string =
275 let space_RE = Str.regexp " " in
277 List.map uri_of_string (Str.split space_RE s)
279 let sort_of_string ctxt = function
283 (* CicUniv.restart_numbering (); |+ useful only to test parser +| *)
284 Cic.Type (CicUniv.fresh ~uri:ctxt.uri ())
285 | _ -> parse_error ctxt "sort expected"
287 let patch_subst ctxt subst = function
288 | Cic.AConst (id, uri, _) -> Cic.AConst (id, uri, subst)
289 | Cic.AMutInd (id, uri, typeno, _) ->
290 Cic.AMutInd (id, uri, typeno, subst)
291 | Cic.AMutConstruct (id, uri, typeno, consno, _) ->
292 Cic.AMutConstruct (id, uri, typeno, consno, subst)
293 | Cic.AVar (id, uri, _) -> Cic.AVar (id, uri, subst)
296 ("only \"CONST\", \"VAR\", \"MUTIND\", and \"MUTCONSTRUCT\" can be" ^
299 (** backwardly eats the stack seeking for the first open tag carrying
300 * "helm:exception" attributes. If found return Some of a pair containing
301 * exception name and argument. Return None otherwise *)
302 let find_helm_exception ctxt =
303 let rec aux = function
305 | Tag (_, attrs) :: tl ->
307 let exn = List.assoc "helm:exception" attrs in
309 try List.assoc "helm:exception_arg" attrs with Not_found -> ""
312 with Not_found -> aux tl)
317 (** {2 Push parser callbacks}
318 * each callback needs to be instantiated to a parsing context *)
320 let start_element ctxt tag attrs =
321 (* debug_print (sprintf "<%s%s>" tag (match attrs with | [] -> "" | _ -> " " ^ String.concat " " (List.map (fun (a,v) -> sprintf "%s=\"%s\"" a v) attrs)));*)
322 push ctxt (Tag (tag, attrs))
324 let end_element ctxt tag =
325 (* debug_print (sprintf "</%s>" tag);*)
326 (* debug_print (string_of_stack ctxt);*)
327 let attribute_error () = attribute_error ctxt tag in
328 let parse_error = parse_error ctxt in
329 let sort_of_string = sort_of_string ctxt in
333 (match pop_tag_attrs ctxt with
334 | ["binder", binder; "id", id; "idref", idref; "sort", _;
336 Cic.ARel (id, idref, int_of_string value, binder)
337 | _ -> attribute_error ()))
340 (match pop_tag_attrs ctxt with
341 | ["id", id; "sort", _; "uri", uri] ->
342 Cic.AVar (id, uri_of_string uri, [])
343 | _ -> attribute_error ()))
346 (match pop_tag_attrs ctxt with
347 | ["id", id; "sort", _; "uri", uri] ->
348 Cic.AConst (id, uri_of_string uri, [])
349 | _ -> attribute_error ()))
352 (match pop_tag_attrs ctxt with
353 | ["id", id; "value", sort] -> Cic.ASort (id, sort_of_string sort)
354 | _ -> attribute_error ()))
356 let args = pop_cics ctxt in
358 (match pop_tag_attrs ctxt with
359 | ["id", id; "sort", _] -> Cic.AAppl (id, args)
360 | _ -> attribute_error ()))
362 let source = pop_cic ctxt in
364 (match pop_tag_attrs ctxt with
365 | ["binder", binder; "id", id; "type", _] ->
366 Decl (id, Cic.Name binder, source)
367 | ["id", id; "type", _] -> Decl (id, Cic.Anonymous, source)
368 | _ -> attribute_error ())
369 | "def" -> (* same as "decl" above *)
370 let source = pop_cic ctxt in
372 (match pop_tag_attrs ctxt with
373 | ["binder", binder; "id", id; "sort", _] ->
374 Def (id, Cic.Name binder, source)
375 | ["id", id; "sort", _] -> Def (id, Cic.Anonymous, source)
376 | _ -> attribute_error ())
377 | "arity" (* transparent elements (i.e. which contain a CIC) *)
385 let term = pop_cic ctxt in
386 pop ctxt; (* pops start tag matching current end tag (e.g. <arity>) *)
387 push ctxt (Cic_term term)
388 | "substitution" -> (* optional transparent elements (i.e. which _may_
390 set_top ctxt (* replace <substitution> *)
391 (match ctxt.stack with
392 | Cic_term term :: tl ->
394 (Meta_subst (Some term))
395 | _ -> Meta_subst None)
397 let target = pop_cic ctxt in
398 let rec add_decl target = function
399 | Decl (id, binder, source) :: tl ->
400 add_decl (Cic.AProd (id, binder, source, target)) tl
405 let term = add_decl target ctxt.stack in
406 (match pop_tag_attrs ctxt with
408 | _ -> attribute_error ());
409 push ctxt (Cic_term term)
411 let target = pop_cic ctxt in
412 let rec add_decl target = function
413 | Decl (id, binder, source) :: tl ->
414 add_decl (Cic.ALambda (id, binder, source, target)) tl
419 let term = add_decl target ctxt.stack in
420 (match pop_tag_attrs ctxt with
422 | _ -> attribute_error ());
423 push ctxt (Cic_term term)
425 let target = pop_cic ctxt in
426 let rec add_def target = function
427 | Def (id, binder, source) :: tl ->
428 add_def (Cic.ALetIn (id, binder, source, target)) tl
433 let term = add_def target ctxt.stack in
434 (match pop_tag_attrs ctxt with
436 | _ -> attribute_error ());
437 push ctxt (Cic_term term)
439 let typ = pop_cic ctxt in
440 let term = pop_cic ctxt in
442 (match pop_tag_attrs ctxt with
443 | ["id", id; "sort", _] -> Cic.ACast (id, term, typ)
444 | _ -> attribute_error ()));
447 (match pop_tag_attrs ctxt with
448 | ["id", id] -> Cic.AImplicit (id, None)
449 | ["annotation", annotation; "id", id] ->
450 let implicit_annotation =
451 match annotation with
452 | "closed" -> `Closed
455 | _ -> parse_error "invalid value for \"annotation\" attribute"
457 Cic.AImplicit (id, Some implicit_annotation)
458 | _ -> attribute_error ()))
460 let meta_substs = pop_meta_substs ctxt in
462 (match pop_tag_attrs ctxt with
463 | ["id", id; "no", no; "sort", _] ->
464 Cic.AMeta (id, int_of_string no, meta_substs)
465 | _ -> attribute_error ()));
468 (match pop_tag_attrs ctxt with
469 | ["id", id; "noType", noType; "uri", uri] ->
470 Cic.AMutInd (id, uri_of_string uri, int_of_string noType, [])
471 | _ -> attribute_error ()));
474 (match pop_tag_attrs ctxt with
475 | ["id", id; "noConstr", noConstr; "noType", noType; "sort", _;
477 Cic.AMutConstruct (id, uri_of_string uri, int_of_string noType,
478 int_of_string noConstr, [])
479 | _ -> attribute_error ()));
481 let body = pop_cic ctxt in
482 let typ = pop_cic ctxt in
484 (match pop_tag_attrs ctxt with
485 | ["id", id; "name", name; "recIndex", recIndex] ->
486 Fix_fun (id, name, int_of_string recIndex, typ, body)
487 | _ -> attribute_error ())
489 let body = pop_cic ctxt in
490 let typ = pop_cic ctxt in
492 (match pop_tag_attrs ctxt with
493 | ["id", id; "name", name] ->
494 Cofix_fun (id, name, typ, body)
495 | _ -> attribute_error ())
497 let fix_funs = pop_fix_funs ctxt in
499 (match pop_tag_attrs ctxt with
500 | ["id", id; "noFun", noFun; "sort", _] ->
501 Cic.AFix (id, int_of_string noFun, fix_funs)
502 | _ -> attribute_error ()))
504 let cofix_funs = pop_cofix_funs ctxt in
506 (match pop_tag_attrs ctxt with
507 | ["id", id; "noFun", noFun; "sort", _] ->
508 Cic.ACoFix (id, int_of_string noFun, cofix_funs)
509 | _ -> attribute_error ()))
511 (match pop_cics ctxt with
512 | patternsType :: inductiveTerm :: patterns ->
514 (match pop_tag_attrs ctxt with
515 | ["id", id; "noType", noType; "sort", _; "uriType", uriType] ->
516 Cic.AMutCase (id, uri_of_string uriType, int_of_string noType,
517 patternsType, inductiveTerm, patterns)
518 | _ -> attribute_error ()))
519 | _ -> parse_error "invalid \"MUTCASE\" content")
521 let typ = pop_cic ctxt in
523 (match pop_tag_attrs ctxt with
524 | ["name", name] -> Constructor (name, typ)
525 | _ -> attribute_error ())
527 let constructors = pop_constructors ctxt in
528 let arity = pop_cic ctxt in
530 (match pop_tag_attrs ctxt with
531 | ["id", id; "inductive", inductive; "name", name] ->
532 Inductive_type (id, name, bool_of_string inductive, arity,
534 | _ -> attribute_error ())
535 | "InductiveDefinition" ->
536 let inductive_types = pop_inductive_types ctxt in
537 let obj_attributes = pop_obj_attributes ctxt in
539 (match pop_tag_attrs ctxt with
540 | ["id", id; "noParams", noParams; "params", params] ->
541 Cic.AInductiveDefinition (id, inductive_types,
542 uri_list_of_string params, int_of_string noParams, obj_attributes)
543 | _ -> attribute_error ()))
545 let typ = pop_cic ctxt in
546 let obj_attributes = pop_obj_attributes ctxt in
548 (match pop_tag_attrs ctxt with
549 | ["id", id; "name", name; "params", params] ->
550 Cic_constant_type (id, name, uri_list_of_string params, typ,
552 | _ -> attribute_error ())
554 let body = pop_cic ctxt in
555 let obj_attributes = pop_obj_attributes ctxt in
557 (match pop_tag_attrs ctxt with
558 | ["for", for_; "id", id; "params", params] ->
559 Cic_constant_body (id, for_, uri_list_of_string params, body,
561 | _ -> attribute_error ())
563 let typ = pop_cic ctxt in
565 match pop_cics ctxt with
568 | _ -> parse_error "wrong content for \"Variable\""
570 let obj_attributes = pop_obj_attributes ctxt in
572 (match pop_tag_attrs ctxt with
573 | ["id", id; "name", name; "params", params] ->
574 Cic.AVariable (id, name, body, typ, uri_list_of_string params,
576 | _ -> attribute_error ()))
578 let term = pop_cic ctxt in
580 (match pop_tag_attrs ctxt with
581 | ["relUri", relUri] -> Arg (relUri, term)
582 | _ -> attribute_error ())
584 (* explicit named substitution handling: when the end tag of an element
585 * subject of exlicit named subst (MUTIND, MUTCONSTRUCT, CONST, VAR) it
586 * is stored on the stack with no substitutions (i.e. []). When the end
587 * tag of an "instantiate" element is found we patch the term currently
588 * on the stack with the substitution built from "instantiate" children
590 (* XXX inefficiency here: first travels the <arg> elements in order to
591 * find the baseUri, then in order to build the explicit named subst *)
592 let base_uri = find_base_uri ctxt in
593 let subst = pop_subst ctxt base_uri in
594 let term = pop_cic ctxt in
595 (* comment from CicParser3.ml:
596 * CSC: the "id" optional attribute should be parsed and reflected in
597 * Cic.annterm and id = string_of_xml_attr (n#attribute "id") *)
598 (* replace <instantiate> *)
599 set_top ctxt (Cic_term (patch_subst ctxt subst term))
601 let (_, attrs) = pop_tag ctxt in
602 let rec mk_obj_attributes acc = function
604 | ("generated", "true") :: tl ->
605 mk_obj_attributes (`Generated :: acc) tl
606 | ("class", "coercion") :: tl ->
607 mk_obj_attributes (`Class `Coercion :: acc) tl
608 | ("class", "record") :: tl ->
609 mk_obj_attributes (`Class `Record :: acc) tl
610 | ("class", "projection") :: tl ->
611 mk_obj_attributes (`Class `Projection :: acc) tl
612 | ("class", "elimProp") :: tl ->
613 mk_obj_attributes (`Class (`Elim Cic.Prop) :: acc) tl
614 | ("class", "elimSet") :: tl ->
615 mk_obj_attributes (`Class (`Elim Cic.Set) :: acc) tl
616 | ("class", "elimType") :: tl ->
617 let univ = CicUniv.fresh ~uri:ctxt.uri () in
618 mk_obj_attributes (`Class (`Elim (Cic.Type univ)) :: acc) tl
619 | _ -> attribute_error ()
621 push ctxt (Cic_attributes (mk_obj_attributes [] attrs))
623 match find_helm_exception ctxt with
624 | Some (exn, arg) -> raise (Getter_failure (exn, arg))
625 | None -> parse_error (sprintf "unknown element \"%s\"" tag)
627 (** {2 Parser internals} *)
629 let parse uri filename =
630 let ctxt = new_parser_context uri in
631 ctxt.filename <- filename;
632 let module P = XmlPushParser in
634 P.default_callbacks with
635 P.start_element = Some (start_element ctxt);
636 P.end_element = Some (end_element ctxt);
638 let xml_parser = P.create_parser callbacks in
639 ctxt.xml_parser <- Some xml_parser;
642 P.parse xml_parser (`Gzip_file filename);
644 ctxt.xml_parser <- None;
645 (* ZACK: the above "<- None" is vital for garbage collection. Without it
646 * we keep in memory a circular structure parser -> callbacks -> ctxt ->
647 * parser. I don't know if the ocaml garbage collector is supposed to
648 * collect such structures, but for sure the expat bindings will (orribly)
649 * leak when used in conjunction with such structures *)
651 ctxt.xml_parser <- None; (* ZACK: same comment as above *)
652 (* debug_print (string_of_stack stack);*)
653 (* assert (List.length ctxt.stack = 1) *)
656 | Failure "int_of_string" -> parse_error ctxt "integer number expected"
657 | Invalid_argument "bool_of_string" -> parse_error ctxt "boolean expected"
658 | P.Parse_error msg -> parse_error ctxt ("parse error: " ^ msg)
660 | Getter_failure _ as exn ->
663 raise (Parser_failure ("uncaught exception: " ^ Printexc.to_string exn))
665 (** {2 API implementation} *)
667 let annobj_of_xml uri filename filenamebody =
668 match filenamebody with
670 (match parse uri filename with
671 | Cic_constant_type (id, name, params, typ, obj_attributes) ->
672 Cic.AConstant (id, None, name, None, typ, params, obj_attributes)
674 | _ -> raise (Parser_failure ("no object found in " ^ filename)))
675 | Some filenamebody ->
676 (match parse uri filename, parse uri filenamebody with
677 | Cic_constant_type (type_id, name, params, typ, obj_attributes),
678 Cic_constant_body (body_id, _, _, body, _) ->
679 Cic.AConstant (type_id, Some body_id, name, Some body, typ, params,[])
681 raise (Parser_failure (sprintf "no constant found in %s, %s"
682 filename filenamebody)))
684 let obj_of_xml uri filename filenamebody =
685 Deannotate.deannotate_obj (annobj_of_xml uri filename filenamebody)