1 (* Copyright (C) 2004-2005, HELM Team.
3 * This file is part of HELM, an Hypertextual, Electronic
4 * Library of Mathematics, developed at the Computer Science
5 * Department, University of Bologna, Italy.
7 * HELM is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * HELM is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with HELM; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
22 * For details, see the HELM World-Wide-Web page,
23 * http://helm.cs.unibo.it/
26 let debug_print = prerr_endline
30 (* ZACK TODO element from the DTD still to be handled:
31 <!ELEMENT CurrentProof (Conjecture*,body)>
32 <!ELEMENT Sequent %sequent;>
33 <!ELEMENT Conjecture %sequent;>
34 <!ELEMENT Decl %term;>
36 <!ELEMENT Hidden EMPTY>
37 <!ELEMENT Goal %term;>
40 exception Getter_failure of string * string
41 exception Parser_failure of string
44 | Arg of string * Cic.annterm (* relative uri, term *)
45 (* constants' body and types resides in differne files, thus we can't simple
46 * keep constants in Cic_obj stack entries *)
47 | Cic_attributes of Cic.attribute list
48 | Cic_constant_body of string * string * UriManager.uri list * Cic.annterm
50 (* id, for, params, body, object attributes *)
51 | Cic_constant_type of string * string * UriManager.uri list * Cic.annterm
53 (* id, name, params, type, object attributes *)
54 | Cic_term of Cic.annterm (* term *)
55 | Cic_obj of Cic.annobj (* object *)
56 | Cofix_fun of Cic.id * string * Cic.annterm * Cic.annterm
57 (* id, name, type, body *)
58 | Constructor of string * Cic.annterm (* name, type *)
59 | Decl of Cic.id * Cic.name * Cic.annterm (* id, binder, source *)
60 | Def of Cic.id * Cic.name * Cic.annterm (* id, binder, source *)
61 | Fix_fun of Cic.id * string * int * Cic.annterm * Cic.annterm
62 (* id, name, ind. index, type, body *)
63 | Inductive_type of string * string * bool * Cic.annterm *
64 (string * Cic.annterm) list (* id, name, inductive, arity, constructors *)
65 | Meta_subst of Cic.annterm option
66 | Obj_class of Cic.object_class
67 | Obj_field of string (* field name *)
69 | Tag of string * (string * string) list (* tag name, attributes *)
70 (* ZACK TODO add file position to tag stack entry so that when attribute
71 * errors occur, the position of their _start_tag_ could be printed
72 * instead of the current position (usually the end tag) *)
75 mutable stack: stack_entry list;
76 mutable xml_parser: XmlPushParser.xml_parser option;
77 mutable filename: string;
81 let string_of_stack ctxt =
82 "[" ^ (String.concat "; "
85 | Arg (reluri, _) -> sprintf "Arg %s" reluri
86 | Cic_attributes _ -> "Cic_attributes"
87 | Cic_constant_body (id, name, _, _, _) ->
88 sprintf "Cic_constant_body %s (id=%s)" name id
89 | Cic_constant_type (id, name, _, _, _) ->
90 sprintf "Cic_constant_type %s (id=%s)" name id
91 | Cic_term _ -> "Cic_term"
92 | Cic_obj _ -> "Cic_obj"
93 | Constructor (name, _) -> "Constructor " ^ name
94 | Cofix_fun (id, _, _, _) -> sprintf "Cofix_fun (id=%s)" id
95 | Decl (id, _, _) -> sprintf "Decl (id=%s)" id
96 | Def (id, _, _) -> sprintf "Def (id=%s)" id
97 | Fix_fun (id, _, _, _, _) -> sprintf "Fix_fun (id=%s)" id
98 | Inductive_type (id, name, _, _, _) ->
99 sprintf "Inductive_type %s (id=%s)" name id
100 | Meta_subst _ -> "Meta_subst"
101 | Obj_class _ -> "Obj_class"
102 | Obj_field name -> "Obj_field " ^ name
103 | Obj_generated -> "Obj_generated"
104 | Tag (tag, _) -> "Tag " ^ tag)
107 let compare_attrs (a1, v1) (a2, v2) = Pervasives.compare a1 a2
108 let sort_attrs = List.sort compare_attrs
110 let new_parser_context uri = {
117 let get_parser ctxt =
118 match ctxt.xml_parser with
120 | None -> assert false
122 (** {2 Error handling} *)
124 let parse_error ctxt msg =
125 let (line, col) = XmlPushParser.get_position (get_parser ctxt) in
126 raise (Parser_failure (sprintf "[%s: line %d, column %d] %s"
127 ctxt.filename line col msg))
129 let attribute_error ctxt tag =
130 parse_error ctxt ("wrong attribute set for " ^ tag)
132 (** {2 Parsing context management} *)
135 (* debug_print "pop";*)
136 match ctxt.stack with
137 | hd :: tl -> (ctxt.stack <- tl)
141 (* debug_print "push";*)
142 ctxt.stack <- v :: ctxt.stack
145 (* debug_print "set_top";*)
146 match ctxt.stack with
147 | _ :: tl -> (ctxt.stack <- v :: tl)
150 (** pop the last tag from the open tags stack returning a pair <tag_name,
153 match ctxt.stack with
154 | Tag (tag, attrs) :: tl ->
157 | _ -> parse_error ctxt "unexpected extra content"
159 (** pop the last tag from the open tags stack returning its attributes.
160 * Attributes are returned as a list of pair <name, value> _sorted_ by
162 let pop_tag_attrs ctxt = sort_attrs (snd (pop_tag ctxt))
165 let rec aux acc stack =
167 | Cic_term t :: tl -> aux (t :: acc) tl
170 let values, new_stack = aux [] ctxt.stack in
171 ctxt.stack <- new_stack;
174 let pop_class_modifiers ctxt =
175 let rec aux acc stack =
177 | (Cic_term (Cic.ASort _) as m) :: tl
178 | (Obj_field _ as m) :: tl ->
182 let values, new_stack = aux [] ctxt.stack in
183 ctxt.stack <- new_stack;
186 let pop_meta_substs ctxt =
187 let rec aux acc stack =
189 | Meta_subst t :: tl -> aux (t :: acc) tl
192 let values, new_stack = aux [] ctxt.stack in
193 ctxt.stack <- new_stack;
196 let pop_fix_funs ctxt =
197 let rec aux acc stack =
199 | Fix_fun (id, name, index, typ, body) :: tl ->
200 aux ((id, name, index, typ, body) :: acc) tl
203 let values, new_stack = aux [] ctxt.stack in
204 ctxt.stack <- new_stack;
207 let pop_cofix_funs ctxt =
208 let rec aux acc stack =
210 | Cofix_fun (id, name, typ, body) :: tl ->
211 aux ((id, name, typ, body) :: acc) tl
214 let values, new_stack = aux [] ctxt.stack in
215 ctxt.stack <- new_stack;
218 let pop_constructors ctxt =
219 let rec aux acc stack =
221 | Constructor (name, t) :: tl -> aux ((name, t) :: acc) tl
224 let values, new_stack = aux [] ctxt.stack in
225 ctxt.stack <- new_stack;
228 let pop_inductive_types ctxt =
229 let rec aux acc stack =
231 | Inductive_type (id, name, ind, arity, ctors) :: tl ->
232 aux ((id, name, ind, arity, ctors) :: acc) tl
235 let values, new_stack = aux [] ctxt.stack in
237 parse_error ctxt "no \"InductiveType\" element found";
238 ctxt.stack <- new_stack;
241 (** travels the stack (without popping) for the first term subject of explicit
242 * named substitution and return its URI *)
243 let find_base_uri ctxt =
244 let rec aux = function
245 | Cic_term (Cic.AConst (_, uri, _)) :: _
246 | Cic_term (Cic.AMutInd (_, uri, _, _)) :: _
247 | Cic_term (Cic.AMutConstruct (_, uri, _, _, _)) :: _
248 | Cic_term (Cic.AVar (_, uri, _)) :: _ ->
250 | Arg _ :: tl -> aux tl
251 | _ -> parse_error ctxt "no \"arg\" element found"
253 UriManager.buri_of_uri (aux ctxt.stack)
255 (** backwardly eats the stack building an explicit named substitution from Arg
257 let pop_subst ctxt base_uri =
258 let rec aux acc stack =
260 | Arg (rel_uri, term) :: tl ->
261 let uri = UriManager.uri_of_string (base_uri ^ "/" ^ rel_uri) in
262 aux ((uri, term) :: acc) tl
265 let subst, new_stack = aux [] ctxt.stack in
267 parse_error ctxt "no \"arg\" element found";
268 ctxt.stack <- new_stack;
272 match ctxt.stack with
273 | Cic_term t :: tl ->
276 | _ -> parse_error ctxt "no cic term found"
278 let pop_obj_attributes ctxt =
279 match ctxt.stack with
280 | Cic_attributes attributes :: tl ->
285 (** {2 Auxiliary functions} *)
287 let uri_of_string = UriManager.uri_of_string
289 let uri_list_of_string =
290 let space_RE = Str.regexp " " in
292 List.map uri_of_string (Str.split space_RE s)
294 let sort_of_string ctxt = function
297 | "Type" -> Cic.Type (CicUniv.fresh ~uri:ctxt.uri ())
298 (* | "Type" -> CicUniv.restart_numbering (); |+ useful only to test parser +| *)
299 | "CProp" -> Cic.CProp
300 | _ -> parse_error ctxt "sort expected"
302 let patch_subst ctxt subst = function
303 | Cic.AConst (id, uri, _) -> Cic.AConst (id, uri, subst)
304 | Cic.AMutInd (id, uri, typeno, _) ->
305 Cic.AMutInd (id, uri, typeno, subst)
306 | Cic.AMutConstruct (id, uri, typeno, consno, _) ->
307 Cic.AMutConstruct (id, uri, typeno, consno, subst)
308 | Cic.AVar (id, uri, _) -> Cic.AVar (id, uri, subst)
311 ("only \"CONST\", \"VAR\", \"MUTIND\", and \"MUTCONSTRUCT\" can be" ^
314 (** backwardly eats the stack seeking for the first open tag carrying
315 * "helm:exception" attributes. If found return Some of a pair containing
316 * exception name and argument. Return None otherwise *)
317 let find_helm_exception ctxt =
318 let rec aux = function
320 | Tag (_, attrs) :: tl ->
322 let exn = List.assoc "helm:exception" attrs in
324 try List.assoc "helm:exception_arg" attrs with Not_found -> ""
327 with Not_found -> aux tl)
332 (** {2 Push parser callbacks}
333 * each callback needs to be instantiated to a parsing context *)
335 let start_element ctxt tag attrs =
336 (* debug_print (sprintf "<%s%s>" tag (match attrs with | [] -> "" | _ -> " " ^ String.concat " " (List.map (fun (a,v) -> sprintf "%s=\"%s\"" a v) attrs)));*)
337 push ctxt (Tag (tag, attrs))
339 let end_element ctxt tag =
340 (* debug_print (sprintf "</%s>" tag);*)
341 (* debug_print (string_of_stack ctxt);*)
342 let attribute_error () = attribute_error ctxt tag in
343 let parse_error = parse_error ctxt in
344 let sort_of_string = sort_of_string ctxt in
348 (match pop_tag_attrs ctxt with
349 | ["binder", binder; "id", id; "idref", idref; "sort", _;
351 Cic.ARel (id, idref, int_of_string value, binder)
352 | _ -> attribute_error ()))
355 (match pop_tag_attrs ctxt with
356 | ["id", id; "sort", _; "uri", uri] ->
357 Cic.AVar (id, uri_of_string uri, [])
358 | _ -> attribute_error ()))
361 (match pop_tag_attrs ctxt with
362 | ["id", id; "sort", _; "uri", uri] ->
363 Cic.AConst (id, uri_of_string uri, [])
364 | _ -> attribute_error ()))
367 (match pop_tag_attrs ctxt with
368 | ["id", id; "value", sort] -> Cic.ASort (id, sort_of_string sort)
369 | _ -> attribute_error ()))
371 let args = pop_cics ctxt in
373 (match pop_tag_attrs ctxt with
374 | ["id", id; "sort", _] -> Cic.AAppl (id, args)
375 | _ -> attribute_error ()))
377 let source = pop_cic ctxt in
379 (match pop_tag_attrs ctxt with
380 | ["binder", binder; "id", id; "type", _] ->
381 Decl (id, Cic.Name binder, source)
382 | ["id", id; "type", _] -> Decl (id, Cic.Anonymous, source)
383 | _ -> attribute_error ())
384 | "def" -> (* same as "decl" above *)
385 let source = pop_cic ctxt in
387 (match pop_tag_attrs ctxt with
388 | ["binder", binder; "id", id; "sort", _] ->
389 Def (id, Cic.Name binder, source)
390 | ["id", id; "sort", _] -> Def (id, Cic.Anonymous, source)
391 | _ -> attribute_error ())
392 | "arity" (* transparent elements (i.e. which contain a CIC) *)
400 let term = pop_cic ctxt in
401 pop ctxt; (* pops start tag matching current end tag (e.g. <arity>) *)
402 push ctxt (Cic_term term)
403 | "substitution" -> (* optional transparent elements (i.e. which _may_
405 set_top ctxt (* replace <substitution> *)
406 (match ctxt.stack with
407 | Cic_term term :: tl ->
409 (Meta_subst (Some term))
410 | _ -> Meta_subst None)
412 let target = pop_cic ctxt in
413 let rec add_decl target = function
414 | Decl (id, binder, source) :: tl ->
415 add_decl (Cic.AProd (id, binder, source, target)) tl
420 let term = add_decl target ctxt.stack in
421 (match pop_tag_attrs ctxt with
423 | _ -> attribute_error ());
424 push ctxt (Cic_term term)
426 let target = pop_cic ctxt in
427 let rec add_decl target = function
428 | Decl (id, binder, source) :: tl ->
429 add_decl (Cic.ALambda (id, binder, source, target)) tl
434 let term = add_decl target ctxt.stack in
435 (match pop_tag_attrs ctxt with
437 | _ -> attribute_error ());
438 push ctxt (Cic_term term)
440 let target = pop_cic ctxt in
441 let rec add_def target = function
442 | Def (id, binder, source) :: tl ->
443 add_def (Cic.ALetIn (id, binder, source, target)) tl
448 let term = add_def target ctxt.stack in
449 (match pop_tag_attrs ctxt with
451 | _ -> attribute_error ());
452 push ctxt (Cic_term term)
454 let typ = pop_cic ctxt in
455 let term = pop_cic ctxt in
457 (match pop_tag_attrs ctxt with
458 | ["id", id; "sort", _] -> Cic.ACast (id, term, typ)
459 | _ -> attribute_error ()));
462 (match pop_tag_attrs ctxt with
463 | ["id", id] -> Cic.AImplicit (id, None)
464 | ["annotation", annotation; "id", id] ->
465 let implicit_annotation =
466 match annotation with
467 | "closed" -> `Closed
470 | _ -> parse_error "invalid value for \"annotation\" attribute"
472 Cic.AImplicit (id, Some implicit_annotation)
473 | _ -> attribute_error ()))
475 let meta_substs = pop_meta_substs ctxt in
477 (match pop_tag_attrs ctxt with
478 | ["id", id; "no", no; "sort", _] ->
479 Cic.AMeta (id, int_of_string no, meta_substs)
480 | _ -> attribute_error ()));
483 (match pop_tag_attrs ctxt with
484 | ["id", id; "noType", noType; "uri", uri] ->
485 Cic.AMutInd (id, uri_of_string uri, int_of_string noType, [])
486 | _ -> attribute_error ()));
489 (match pop_tag_attrs ctxt with
490 | ["id", id; "noConstr", noConstr; "noType", noType; "sort", _;
492 Cic.AMutConstruct (id, uri_of_string uri, int_of_string noType,
493 int_of_string noConstr, [])
494 | _ -> attribute_error ()));
496 let body = pop_cic ctxt in
497 let typ = pop_cic ctxt in
499 (match pop_tag_attrs ctxt with
500 | ["id", id; "name", name; "recIndex", recIndex] ->
501 Fix_fun (id, name, int_of_string recIndex, typ, body)
502 | _ -> attribute_error ())
504 let body = pop_cic ctxt in
505 let typ = pop_cic ctxt in
507 (match pop_tag_attrs ctxt with
508 | ["id", id; "name", name] ->
509 Cofix_fun (id, name, typ, body)
510 | _ -> attribute_error ())
512 let fix_funs = pop_fix_funs ctxt in
514 (match pop_tag_attrs ctxt with
515 | ["id", id; "noFun", noFun; "sort", _] ->
516 Cic.AFix (id, int_of_string noFun, fix_funs)
517 | _ -> attribute_error ()))
519 let cofix_funs = pop_cofix_funs ctxt in
521 (match pop_tag_attrs ctxt with
522 | ["id", id; "noFun", noFun; "sort", _] ->
523 Cic.ACoFix (id, int_of_string noFun, cofix_funs)
524 | _ -> attribute_error ()))
526 (match pop_cics ctxt with
527 | patternsType :: inductiveTerm :: patterns ->
529 (match pop_tag_attrs ctxt with
530 | ["id", id; "noType", noType; "sort", _; "uriType", uriType] ->
531 Cic.AMutCase (id, uri_of_string uriType, int_of_string noType,
532 patternsType, inductiveTerm, patterns)
533 | _ -> attribute_error ()))
534 | _ -> parse_error "invalid \"MUTCASE\" content")
536 let typ = pop_cic ctxt in
538 (match pop_tag_attrs ctxt with
539 | ["name", name] -> Constructor (name, typ)
540 | _ -> attribute_error ())
542 let constructors = pop_constructors ctxt in
543 let arity = pop_cic ctxt in
545 (match pop_tag_attrs ctxt with
546 | ["id", id; "inductive", inductive; "name", name] ->
547 Inductive_type (id, name, bool_of_string inductive, arity,
549 | _ -> attribute_error ())
550 | "InductiveDefinition" ->
551 let inductive_types = pop_inductive_types ctxt in
552 let obj_attributes = pop_obj_attributes ctxt in
554 (match pop_tag_attrs ctxt with
555 | ["id", id; "noParams", noParams; "params", params] ->
556 Cic.AInductiveDefinition (id, inductive_types,
557 uri_list_of_string params, int_of_string noParams, obj_attributes)
558 | _ -> attribute_error ()))
560 let typ = pop_cic ctxt in
561 let obj_attributes = pop_obj_attributes ctxt in
563 (match pop_tag_attrs ctxt with
564 | ["id", id; "name", name; "params", params] ->
565 Cic_constant_type (id, name, uri_list_of_string params, typ,
567 | _ -> attribute_error ())
569 let body = pop_cic ctxt in
570 let obj_attributes = pop_obj_attributes ctxt in
572 (match pop_tag_attrs ctxt with
573 | ["for", for_; "id", id; "params", params] ->
574 Cic_constant_body (id, for_, uri_list_of_string params, body,
576 | _ -> attribute_error ())
578 let typ = pop_cic ctxt in
580 match pop_cics ctxt with
583 | _ -> parse_error "wrong content for \"Variable\""
585 let obj_attributes = pop_obj_attributes ctxt in
587 (match pop_tag_attrs ctxt with
588 | ["id", id; "name", name; "params", params] ->
589 Cic.AVariable (id, name, body, typ, uri_list_of_string params,
591 | _ -> attribute_error ()))
593 let term = pop_cic ctxt in
595 (match pop_tag_attrs ctxt with
596 | ["relUri", relUri] -> Arg (relUri, term)
597 | _ -> attribute_error ())
599 (* explicit named substitution handling: when the end tag of an element
600 * subject of exlicit named subst (MUTIND, MUTCONSTRUCT, CONST, VAR) it
601 * is stored on the stack with no substitutions (i.e. []). When the end
602 * tag of an "instantiate" element is found we patch the term currently
603 * on the stack with the substitution built from "instantiate" children
605 (* XXX inefficiency here: first travels the <arg> elements in order to
606 * find the baseUri, then in order to build the explicit named subst *)
607 let base_uri = find_base_uri ctxt in
608 let subst = pop_subst ctxt base_uri in
609 let term = pop_cic ctxt in
610 (* comment from CicParser3.ml:
611 * CSC: the "id" optional attribute should be parsed and reflected in
612 * Cic.annterm and id = string_of_xml_attr (n#attribute "id") *)
613 (* replace <instantiate> *)
614 set_top ctxt (Cic_term (patch_subst ctxt subst term))
616 let rec aux acc = function (* retrieve object attributes *)
617 | Obj_class c :: tl -> aux (`Class c :: acc) tl
618 | Obj_generated :: tl -> aux (`Generated :: acc) tl
621 let obj_attrs, new_stack = aux [] ctxt.stack in
622 ctxt.stack <- new_stack;
623 set_top ctxt (Cic_attributes obj_attrs)
624 | "generated" -> set_top ctxt Obj_generated
627 (match pop_tag_attrs ctxt with
628 | ["name", name] -> Obj_field name
629 | _ -> attribute_error ())
631 let class_modifiers = pop_class_modifiers ctxt in
633 (match pop_tag_attrs ctxt with
634 | ["value", "coercion"] -> Obj_class `Coercion
635 | ["value", "elim"] ->
636 (match class_modifiers with
637 | [Cic_term (Cic.ASort (_, sort))] -> Obj_class (`Elim sort)
640 "unexpected extra content for \"elim\" object class")
641 | ["value", "record"] ->
645 | Obj_field name -> name
648 "unexpected extra content for \"record\" object class")
651 Obj_class (`Record fields)
652 | ["value", "projection"] -> Obj_class `Projection
653 | _ -> attribute_error ())
655 match find_helm_exception ctxt with
656 | Some (exn, arg) -> raise (Getter_failure (exn, arg))
657 | None -> parse_error (sprintf "unknown element \"%s\"" tag)
659 (** {2 Parser internals} *)
661 let parse uri filename =
662 let ctxt = new_parser_context uri in
663 ctxt.filename <- filename;
664 let module P = XmlPushParser in
666 P.default_callbacks with
667 P.start_element = Some (start_element ctxt);
668 P.end_element = Some (end_element ctxt);
670 let xml_parser = P.create_parser callbacks in
671 ctxt.xml_parser <- Some xml_parser;
674 P.parse xml_parser (`Gzip_file filename);
676 ctxt.xml_parser <- None;
677 (* ZACK: the above "<- None" is vital for garbage collection. Without it
678 * we keep in memory a circular structure parser -> callbacks -> ctxt ->
679 * parser. I don't know if the ocaml garbage collector is supposed to
680 * collect such structures, but for sure the expat bindings will (orribly)
681 * leak when used in conjunction with such structures *)
683 ctxt.xml_parser <- None; (* ZACK: same comment as above *)
684 (* debug_print (string_of_stack stack);*)
685 (* assert (List.length ctxt.stack = 1) *)
688 | Failure "int_of_string" -> parse_error ctxt "integer number expected"
689 | Invalid_argument "bool_of_string" -> parse_error ctxt "boolean expected"
690 | P.Parse_error msg -> parse_error ctxt ("parse error: " ^ msg)
692 | Getter_failure _ as exn ->
695 raise (Parser_failure ("uncaught exception: " ^ Printexc.to_string exn))
697 (** {2 API implementation} *)
699 let annobj_of_xml uri filename filenamebody =
700 match filenamebody with
702 (match parse uri filename with
703 | Cic_constant_type (id, name, params, typ, obj_attributes) ->
704 Cic.AConstant (id, None, name, None, typ, params, obj_attributes)
706 | _ -> raise (Parser_failure ("no object found in " ^ filename)))
707 | Some filenamebody ->
708 (match parse uri filename, parse uri filenamebody with
709 | Cic_constant_type (type_id, name, params, typ, obj_attributes),
710 Cic_constant_body (body_id, _, _, body, _) ->
711 Cic.AConstant (type_id, Some body_id, name, Some body, typ, params,[])
713 raise (Parser_failure (sprintf "no constant found in %s, %s"
714 filename filenamebody)))
716 let obj_of_xml uri filename filenamebody =
717 Deannotate.deannotate_obj (annobj_of_xml uri filename filenamebody)