1 (* Copyright (C) 2004-2005, HELM Team.
3 * This file is part of HELM, an Hypertextual, Electronic
4 * Library of Mathematics, developed at the Computer Science
5 * Department, University of Bologna, Italy.
7 * HELM is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * HELM is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with HELM; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
22 * For details, see the HELM World-Wide-Web page,
23 * http://helm.cs.unibo.it/
26 let debug_print = prerr_endline
30 (* ZACK TODO element from the DTD still to be handled:
31 <!ELEMENT CurrentProof (Conjecture*,body)>
32 <!ELEMENT Sequent %sequent;>
33 <!ELEMENT Conjecture %sequent;>
34 <!ELEMENT Decl %term;>
36 <!ELEMENT Hidden EMPTY>
37 <!ELEMENT Goal %term;>
43 exception Getter_failure of string * string
44 exception Parser_failure of string
47 | Arg of string * Cic.annterm (* relative uri, term *)
48 (* constants' body and types resides in differne files, thus we can't simple
49 * keep constants in Cic_obj stack entries *)
50 | Cic_attributes of Cic.attribute list
51 | Cic_constant_body of string * string * UriManager.uri list * Cic.annterm
53 (* id, for, params, body, object attributes *)
54 | Cic_constant_type of string * string * UriManager.uri list * Cic.annterm
56 (* id, name, params, type, object attributes *)
57 | Cic_term of Cic.annterm (* term *)
58 | Cic_obj of Cic.annobj (* object *)
59 | Cofix_fun of Cic.id * string * Cic.annterm * Cic.annterm
60 (* id, name, type, body *)
61 | Constructor of string * Cic.annterm (* name, type *)
62 | Decl of Cic.id * Cic.name * Cic.annterm (* id, binder, source *)
63 | Def of Cic.id * Cic.name * Cic.annterm (* id, binder, source *)
64 | Fix_fun of Cic.id * string * int * Cic.annterm * Cic.annterm
65 (* id, name, ind. index, type, body *)
66 | Inductive_type of string * string * bool * Cic.annterm *
67 (string * Cic.annterm) list (* id, name, inductive, arity, constructors *)
68 | Meta_subst of Cic.annterm option
69 | Obj_class of Cic.object_class
70 | Obj_field of string (* field name *)
72 | Tag of string * (string * string) list (* tag name, attributes *)
73 (* ZACK TODO add file position to tag stack entry so that when attribute
74 * errors occur, the position of their _start_tag_ could be printed
75 * instead of the current position (usually the end tag) *)
78 mutable stack: stack_entry list;
79 mutable xml_parser: XmlPushParser.xml_parser option;
80 mutable filename: string;
84 let string_of_stack ctxt =
85 "[" ^ (String.concat "; "
88 | Arg (reluri, _) -> sprintf "Arg %s" reluri
89 | Cic_attributes _ -> "Cic_attributes"
90 | Cic_constant_body (id, name, _, _, _) ->
91 sprintf "Cic_constant_body %s (id=%s)" name id
92 | Cic_constant_type (id, name, _, _, _) ->
93 sprintf "Cic_constant_type %s (id=%s)" name id
94 | Cic_term _ -> "Cic_term"
95 | Cic_obj _ -> "Cic_obj"
96 | Constructor (name, _) -> "Constructor " ^ name
97 | Cofix_fun (id, _, _, _) -> sprintf "Cofix_fun (id=%s)" id
98 | Decl (id, _, _) -> sprintf "Decl (id=%s)" id
99 | Def (id, _, _) -> sprintf "Def (id=%s)" id
100 | Fix_fun (id, _, _, _, _) -> sprintf "Fix_fun (id=%s)" id
101 | Inductive_type (id, name, _, _, _) ->
102 sprintf "Inductive_type %s (id=%s)" name id
103 | Meta_subst _ -> "Meta_subst"
104 | Obj_class _ -> "Obj_class"
105 | Obj_field name -> "Obj_field " ^ name
106 | Obj_generated -> "Obj_generated"
107 | Tag (tag, _) -> "Tag " ^ tag)
110 let compare_attrs (a1, v1) (a2, v2) = Pervasives.compare a1 a2
111 let sort_attrs = List.sort compare_attrs
113 let new_parser_context uri = {
120 let get_parser ctxt =
121 match ctxt.xml_parser with
123 | None -> assert false
125 (** {2 Error handling} *)
127 let parse_error ctxt msg =
128 let (line, col) = XmlPushParser.get_position (get_parser ctxt) in
129 raise (Parser_failure (sprintf "[%s: line %d, column %d] %s"
130 ctxt.filename line col msg))
132 let attribute_error ctxt tag =
133 parse_error ctxt ("wrong attribute set for " ^ tag)
135 (** {2 Parsing context management} *)
138 (* debug_print "pop";*)
139 match ctxt.stack with
140 | hd :: tl -> (ctxt.stack <- tl)
144 (* debug_print "push";*)
145 ctxt.stack <- v :: ctxt.stack
148 (* debug_print "set_top";*)
149 match ctxt.stack with
150 | _ :: tl -> (ctxt.stack <- v :: tl)
153 (** pop the last tag from the open tags stack returning a pair <tag_name,
156 match ctxt.stack with
157 | Tag (tag, attrs) :: tl ->
160 | _ -> parse_error ctxt "unexpected extra content"
162 (** pop the last tag from the open tags stack returning its attributes.
163 * Attributes are returned as a list of pair <name, value> _sorted_ by
165 let pop_tag_attrs ctxt = sort_attrs (snd (pop_tag ctxt))
168 let rec aux acc stack =
170 | Cic_term t :: tl -> aux (t :: acc) tl
173 let values, new_stack = aux [] ctxt.stack in
174 ctxt.stack <- new_stack;
177 let pop_class_modifiers ctxt =
178 let rec aux acc stack =
180 | (Cic_term (Cic.ASort _) as m) :: tl
181 | (Obj_field _ as m) :: tl ->
185 let values, new_stack = aux [] ctxt.stack in
186 ctxt.stack <- new_stack;
189 let pop_meta_substs ctxt =
190 let rec aux acc stack =
192 | Meta_subst t :: tl -> aux (t :: acc) tl
195 let values, new_stack = aux [] ctxt.stack in
196 ctxt.stack <- new_stack;
199 let pop_fix_funs ctxt =
200 let rec aux acc stack =
202 | Fix_fun (id, name, index, typ, body) :: tl ->
203 aux ((id, name, index, typ, body) :: acc) tl
206 let values, new_stack = aux [] ctxt.stack in
207 ctxt.stack <- new_stack;
210 let pop_cofix_funs ctxt =
211 let rec aux acc stack =
213 | Cofix_fun (id, name, typ, body) :: tl ->
214 aux ((id, name, typ, body) :: acc) tl
217 let values, new_stack = aux [] ctxt.stack in
218 ctxt.stack <- new_stack;
221 let pop_constructors ctxt =
222 let rec aux acc stack =
224 | Constructor (name, t) :: tl -> aux ((name, t) :: acc) tl
227 let values, new_stack = aux [] ctxt.stack in
228 ctxt.stack <- new_stack;
231 let pop_inductive_types ctxt =
232 let rec aux acc stack =
234 | Inductive_type (id, name, ind, arity, ctors) :: tl ->
235 aux ((id, name, ind, arity, ctors) :: acc) tl
238 let values, new_stack = aux [] ctxt.stack in
240 parse_error ctxt "no \"InductiveType\" element found";
241 ctxt.stack <- new_stack;
244 (** travels the stack (without popping) for the first term subject of explicit
245 * named substitution and return its URI *)
246 let find_base_uri ctxt =
247 let rec aux = function
248 | Cic_term (Cic.AConst (_, uri, _)) :: _
249 | Cic_term (Cic.AMutInd (_, uri, _, _)) :: _
250 | Cic_term (Cic.AMutConstruct (_, uri, _, _, _)) :: _
251 | Cic_term (Cic.AVar (_, uri, _)) :: _ ->
253 | Arg _ :: tl -> aux tl
254 | _ -> parse_error ctxt "no \"arg\" element found"
256 UriManager.buri_of_uri (aux ctxt.stack)
258 (** backwardly eats the stack building an explicit named substitution from Arg
260 let pop_subst ctxt base_uri =
261 let rec aux acc stack =
263 | Arg (rel_uri, term) :: tl ->
264 let uri = UriManager.uri_of_string (base_uri ^ "/" ^ rel_uri) in
265 aux ((uri, term) :: acc) tl
268 let subst, new_stack = aux [] ctxt.stack in
270 parse_error ctxt "no \"arg\" element found";
271 ctxt.stack <- new_stack;
275 match ctxt.stack with
276 | Cic_term t :: tl ->
279 | _ -> parse_error ctxt "no cic term found"
281 let pop_obj_attributes ctxt =
282 match ctxt.stack with
283 | Cic_attributes attributes :: tl ->
288 (** {2 Auxiliary functions} *)
290 let uri_of_string = UriManager.uri_of_string
292 let uri_list_of_string =
293 let space_RE = Str.regexp " " in
295 List.map uri_of_string (Str.split space_RE s)
297 let sort_of_string ctxt = function
300 | "Type" -> Cic.Type (CicUniv.fresh ~uri:ctxt.uri ())
301 (* | "Type" -> CicUniv.restart_numbering (); |+ useful only to test parser +| *)
302 | "CProp" -> Cic.CProp
303 | _ -> parse_error ctxt "sort expected"
305 let patch_subst ctxt subst = function
306 | Cic.AConst (id, uri, _) -> Cic.AConst (id, uri, subst)
307 | Cic.AMutInd (id, uri, typeno, _) ->
308 Cic.AMutInd (id, uri, typeno, subst)
309 | Cic.AMutConstruct (id, uri, typeno, consno, _) ->
310 Cic.AMutConstruct (id, uri, typeno, consno, subst)
311 | Cic.AVar (id, uri, _) -> Cic.AVar (id, uri, subst)
314 ("only \"CONST\", \"VAR\", \"MUTIND\", and \"MUTCONSTRUCT\" can be" ^
317 (** backwardly eats the stack seeking for the first open tag carrying
318 * "helm:exception" attributes. If found return Some of a pair containing
319 * exception name and argument. Return None otherwise *)
320 let find_helm_exception ctxt =
321 let rec aux = function
323 | Tag (_, attrs) :: tl ->
325 let exn = List.assoc "helm:exception" attrs in
327 try List.assoc "helm:exception_arg" attrs with Not_found -> ""
330 with Not_found -> aux tl)
335 (** {2 Push parser callbacks}
336 * each callback needs to be instantiated to a parsing context *)
338 let start_element ctxt tag attrs =
339 (* debug_print (sprintf "<%s%s>" tag (match attrs with | [] -> "" | _ -> " " ^ String.concat " " (List.map (fun (a,v) -> sprintf "%s=\"%s\"" a v) attrs)));*)
340 push ctxt (Tag (tag, attrs))
342 let end_element ctxt tag =
343 (* debug_print (sprintf "</%s>" tag);*)
344 (* debug_print (string_of_stack ctxt);*)
345 let attribute_error () = attribute_error ctxt tag in
346 let parse_error = parse_error ctxt in
347 let sort_of_string = sort_of_string ctxt in
351 (match pop_tag_attrs ctxt with
352 | ["binder", binder; "id", id; "idref", idref; "sort", _;
354 Cic.ARel (id, idref, int_of_string value, binder)
355 | _ -> attribute_error ()))
358 (match pop_tag_attrs ctxt with
359 | ["id", id; "sort", _; "uri", uri] ->
360 Cic.AVar (id, uri_of_string uri, [])
361 | _ -> attribute_error ()))
364 (match pop_tag_attrs ctxt with
365 | ["id", id; "sort", _; "uri", uri] ->
366 Cic.AConst (id, uri_of_string uri, [])
367 | _ -> attribute_error ()))
370 (match pop_tag_attrs ctxt with
371 | ["id", id; "value", sort] -> Cic.ASort (id, sort_of_string sort)
372 | _ -> attribute_error ()))
374 let args = pop_cics ctxt in
376 (match pop_tag_attrs ctxt with
377 | ["id", id; "sort", _] -> Cic.AAppl (id, args)
378 | _ -> attribute_error ()))
380 let source = pop_cic ctxt in
382 (match pop_tag_attrs ctxt with
383 | ["binder", binder; "id", id; "type", _] ->
384 Decl (id, Cic.Name binder, source)
385 | ["id", id; "type", _] -> Decl (id, Cic.Anonymous, source)
386 | _ -> attribute_error ())
387 | "def" -> (* same as "decl" above *)
388 let source = pop_cic ctxt in
390 (match pop_tag_attrs ctxt with
391 | ["binder", binder; "id", id; "sort", _] ->
392 Def (id, Cic.Name binder, source)
393 | ["id", id; "sort", _] -> Def (id, Cic.Anonymous, source)
394 | _ -> attribute_error ())
395 | "arity" (* transparent elements (i.e. which contain a CIC) *)
403 let term = pop_cic ctxt in
404 pop ctxt; (* pops start tag matching current end tag (e.g. <arity>) *)
405 push ctxt (Cic_term term)
406 | "substitution" -> (* optional transparent elements (i.e. which _may_
408 set_top ctxt (* replace <substitution> *)
409 (match ctxt.stack with
410 | Cic_term term :: tl ->
412 (Meta_subst (Some term))
413 | _ -> Meta_subst None)
415 let target = pop_cic ctxt in
416 let rec add_decl target = function
417 | Decl (id, binder, source) :: tl ->
418 add_decl (Cic.AProd (id, binder, source, target)) tl
423 let term = add_decl target ctxt.stack in
424 (match pop_tag_attrs ctxt with
426 | _ -> attribute_error ());
427 push ctxt (Cic_term term)
429 let target = pop_cic ctxt in
430 let rec add_decl target = function
431 | Decl (id, binder, source) :: tl ->
432 add_decl (Cic.ALambda (id, binder, source, target)) tl
437 let term = add_decl target ctxt.stack in
438 (match pop_tag_attrs ctxt with
440 | _ -> attribute_error ());
441 push ctxt (Cic_term term)
443 let target = pop_cic ctxt in
444 let rec add_def target = function
445 | Def (id, binder, source) :: tl ->
446 add_def (Cic.ALetIn (id, binder, source, target)) tl
451 let term = add_def target ctxt.stack in
452 (match pop_tag_attrs ctxt with
454 | _ -> attribute_error ());
455 push ctxt (Cic_term term)
457 let typ = pop_cic ctxt in
458 let term = pop_cic ctxt in
460 (match pop_tag_attrs ctxt with
461 | ["id", id; "sort", _] -> Cic.ACast (id, term, typ)
462 | _ -> attribute_error ()));
465 (match pop_tag_attrs ctxt with
466 | ["id", id] -> Cic.AImplicit (id, None)
467 | ["annotation", annotation; "id", id] ->
468 let implicit_annotation =
469 match annotation with
470 | "closed" -> `Closed
473 | _ -> parse_error "invalid value for \"annotation\" attribute"
475 Cic.AImplicit (id, Some implicit_annotation)
476 | _ -> attribute_error ()))
478 let meta_substs = pop_meta_substs ctxt in
480 (match pop_tag_attrs ctxt with
481 | ["id", id; "no", no; "sort", _] ->
482 Cic.AMeta (id, int_of_string no, meta_substs)
483 | _ -> attribute_error ()));
486 (match pop_tag_attrs ctxt with
487 | ["id", id; "noType", noType; "uri", uri] ->
488 Cic.AMutInd (id, uri_of_string uri, int_of_string noType, [])
489 | _ -> attribute_error ()));
492 (match pop_tag_attrs ctxt with
493 | ["id", id; "noConstr", noConstr; "noType", noType; "sort", _;
495 Cic.AMutConstruct (id, uri_of_string uri, int_of_string noType,
496 int_of_string noConstr, [])
497 | _ -> attribute_error ()));
499 let body = pop_cic ctxt in
500 let typ = pop_cic ctxt in
502 (match pop_tag_attrs ctxt with
503 | ["id", id; "name", name; "recIndex", recIndex] ->
504 Fix_fun (id, name, int_of_string recIndex, typ, body)
505 | _ -> attribute_error ())
507 let body = pop_cic ctxt in
508 let typ = pop_cic ctxt in
510 (match pop_tag_attrs ctxt with
511 | ["id", id; "name", name] ->
512 Cofix_fun (id, name, typ, body)
513 | _ -> attribute_error ())
515 let fix_funs = pop_fix_funs ctxt in
517 (match pop_tag_attrs ctxt with
518 | ["id", id; "noFun", noFun; "sort", _] ->
519 Cic.AFix (id, int_of_string noFun, fix_funs)
520 | _ -> attribute_error ()))
522 let cofix_funs = pop_cofix_funs ctxt in
524 (match pop_tag_attrs ctxt with
525 | ["id", id; "noFun", noFun; "sort", _] ->
526 Cic.ACoFix (id, int_of_string noFun, cofix_funs)
527 | _ -> attribute_error ()))
529 (match pop_cics ctxt with
530 | patternsType :: inductiveTerm :: patterns ->
532 (match pop_tag_attrs ctxt with
533 | ["id", id; "noType", noType; "sort", _; "uriType", uriType] ->
534 Cic.AMutCase (id, uri_of_string uriType, int_of_string noType,
535 patternsType, inductiveTerm, patterns)
536 | _ -> attribute_error ()))
537 | _ -> parse_error "invalid \"MUTCASE\" content")
539 let typ = pop_cic ctxt in
541 (match pop_tag_attrs ctxt with
542 | ["name", name] -> Constructor (name, typ)
543 | _ -> attribute_error ())
545 let constructors = pop_constructors ctxt in
546 let arity = pop_cic ctxt in
548 (match pop_tag_attrs ctxt with
549 | ["id", id; "inductive", inductive; "name", name] ->
550 Inductive_type (id, name, bool_of_string inductive, arity,
552 | _ -> attribute_error ())
553 | "InductiveDefinition" ->
554 let inductive_types = pop_inductive_types ctxt in
555 let obj_attributes = pop_obj_attributes ctxt in
557 (match pop_tag_attrs ctxt with
558 | ["id", id; "noParams", noParams; "params", params] ->
559 Cic.AInductiveDefinition (id, inductive_types,
560 uri_list_of_string params, int_of_string noParams, obj_attributes)
561 | _ -> attribute_error ()))
563 let typ = pop_cic ctxt in
564 let obj_attributes = pop_obj_attributes ctxt in
566 (match pop_tag_attrs ctxt with
567 | ["id", id; "name", name; "params", params] ->
568 Cic_constant_type (id, name, uri_list_of_string params, typ,
570 | _ -> attribute_error ())
572 let body = pop_cic ctxt in
573 let obj_attributes = pop_obj_attributes ctxt in
575 (match pop_tag_attrs ctxt with
576 | ["for", for_; "id", id; "params", params] ->
577 Cic_constant_body (id, for_, uri_list_of_string params, body,
579 | _ -> attribute_error ())
581 let typ = pop_cic ctxt in
583 match pop_cics ctxt with
586 | _ -> parse_error "wrong content for \"Variable\""
588 let obj_attributes = pop_obj_attributes ctxt in
590 (match pop_tag_attrs ctxt with
591 | ["id", id; "name", name; "params", params] ->
592 Cic.AVariable (id, name, body, typ, uri_list_of_string params,
594 | _ -> attribute_error ()))
596 let term = pop_cic ctxt in
598 (match pop_tag_attrs ctxt with
599 | ["relUri", relUri] -> Arg (relUri, term)
600 | _ -> attribute_error ())
602 (* explicit named substitution handling: when the end tag of an element
603 * subject of exlicit named subst (MUTIND, MUTCONSTRUCT, CONST, VAR) it
604 * is stored on the stack with no substitutions (i.e. []). When the end
605 * tag of an "instantiate" element is found we patch the term currently
606 * on the stack with the substitution built from "instantiate" children
608 (* XXX inefficiency here: first travels the <arg> elements in order to
609 * find the baseUri, then in order to build the explicit named subst *)
610 let base_uri = find_base_uri ctxt in
611 let subst = pop_subst ctxt base_uri in
612 let term = pop_cic ctxt in
613 (* comment from CicParser3.ml:
614 * CSC: the "id" optional attribute should be parsed and reflected in
615 * Cic.annterm and id = string_of_xml_attr (n#attribute "id") *)
616 (* replace <instantiate> *)
617 set_top ctxt (Cic_term (patch_subst ctxt subst term))
619 let rec aux acc = function (* retrieve object attributes *)
620 | Obj_class c :: tl -> aux (`Class c :: acc) tl
621 | Obj_generated :: tl -> aux (`Generated :: acc) tl
624 let obj_attrs, new_stack = aux [] ctxt.stack in
625 ctxt.stack <- new_stack;
626 set_top ctxt (Cic_attributes obj_attrs)
627 | "generated" -> set_top ctxt Obj_generated
630 (match pop_tag_attrs ctxt with
631 | ["name", name] -> Obj_field name
632 | _ -> attribute_error ())
634 let class_modifiers = pop_class_modifiers ctxt in
636 (match pop_tag_attrs ctxt with
637 | ["value", "coercion"] -> Obj_class `Coercion
638 | ["value", "elim"] ->
639 (match class_modifiers with
640 | [Cic_term (Cic.ASort (_, sort))] -> Obj_class (`Elim sort)
643 "unexpected extra content for \"elim\" object class")
644 | ["value", "record"] ->
648 | Obj_field name -> name
651 "unexpected extra content for \"record\" object class")
654 Obj_class (`Record fields)
655 | ["value", "projection"] -> Obj_class `Projection
656 | _ -> attribute_error ())
658 match find_helm_exception ctxt with
659 | Some (exn, arg) -> raise (Getter_failure (exn, arg))
660 | None -> parse_error (sprintf "unknown element \"%s\"" tag)
662 (** {2 Parser internals} *)
664 let parse uri filename =
665 let ctxt = new_parser_context uri in
666 ctxt.filename <- filename;
667 let module P = XmlPushParser in
669 P.default_callbacks with
670 P.start_element = Some (start_element ctxt);
671 P.end_element = Some (end_element ctxt);
673 let xml_parser = P.create_parser callbacks in
674 ctxt.xml_parser <- Some xml_parser;
677 P.parse xml_parser (`Gzip_file filename);
679 ctxt.xml_parser <- None;
680 (* ZACK: the above "<- None" is vital for garbage collection. Without it
681 * we keep in memory a circular structure parser -> callbacks -> ctxt ->
682 * parser. I don't know if the ocaml garbage collector is supposed to
683 * collect such structures, but for sure the expat bindings will (orribly)
684 * leak when used in conjunction with such structures *)
686 ctxt.xml_parser <- None; (* ZACK: same comment as above *)
687 (* debug_print (string_of_stack stack);*)
688 (* assert (List.length ctxt.stack = 1) *)
691 | Failure "int_of_string" -> parse_error ctxt "integer number expected"
692 | Invalid_argument "bool_of_string" -> parse_error ctxt "boolean expected"
693 | P.Parse_error msg -> parse_error ctxt ("parse error: " ^ msg)
695 | Getter_failure _ as exn ->
698 raise (Parser_failure ("uncaught exception: " ^ Printexc.to_string exn))
700 (** {2 API implementation} *)
702 let annobj_of_xml uri filename filenamebody =
703 match filenamebody with
705 (match parse uri filename with
706 | Cic_constant_type (id, name, params, typ, obj_attributes) ->
707 Cic.AConstant (id, None, name, None, typ, params, obj_attributes)
709 | _ -> raise (Parser_failure ("no object found in " ^ filename)))
710 | Some filenamebody ->
711 (match parse uri filename, parse uri filenamebody with
712 | Cic_constant_type (type_id, name, params, typ, obj_attributes),
713 Cic_constant_body (body_id, _, _, body, _) ->
714 Cic.AConstant (type_id, Some body_id, name, Some body, typ, params,[])
716 raise (Parser_failure (sprintf "no constant found in %s, %s"
717 filename filenamebody)))
719 let obj_of_xml uri filename filenamebody =
720 Deannotate.deannotate_obj (annobj_of_xml uri filename filenamebody)