2 * ----------------------------------------------------------------------
3 * PXP: The polymorphic XML parser for Objective Caml.
4 * Copyright by Gerd Stolpmann. See LICENSE for details.
23 | T_attribute of string
24 | T_namespace of string
28 class type ['node] extension =
32 method set_node : 'node -> unit
37 class type [ 'ext ] node =
39 constraint 'ext = 'ext node #extension
40 method extension : 'ext
42 method parent : 'ext node
43 method root : 'ext node
44 method orphaned_clone : 'self
45 method orphaned_flat_clone : 'self
46 method add_node : ?force:bool -> 'ext node -> unit
47 method add_pinstr : proc_instruction -> unit
48 method pinstr : string -> proc_instruction list
49 method pinstr_names : string list
50 method node_position : int
51 method node_path : int list
52 method sub_nodes : 'ext node list
53 method iter_nodes : ('ext node -> unit) -> unit
54 method iter_nodes_sibl :
55 ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
56 method nth_node : int -> 'ext node
57 method previous_node : 'ext node
58 method next_node : 'ext node
59 method set_nodes : 'ext node list -> unit
61 method node_type : node_type
62 method position : (string * int * int)
63 method attribute : string -> att_value
64 method attribute_names : string list
65 method attribute_type : string -> att_type
66 method attributes : (string * Pxp_types.att_value) list
67 method required_string_attribute : string -> string
68 method required_list_attribute : string -> string list
69 method optional_string_attribute : string -> string option
70 method optional_list_attribute : string -> string list
71 method id_attribute_name : string
72 method id_attribute_value : string
73 method idref_attribute_names : string list
74 method quick_set_attributes : (string * Pxp_types.att_value) list -> unit
75 method attributes_as_nodes : 'ext node list
76 method set_comment : string option -> unit
77 method comment : string option
79 method encoding : rep_encoding
80 method create_element :
81 ?position:(string * int * int) ->
82 dtd -> node_type -> (string * string) list -> 'ext node
83 method create_data : dtd -> string -> 'ext node
84 method local_validate : ?use_dfa:bool -> unit -> unit
85 method keep_always_whitespace_mode : unit
86 method write : output_stream -> encoding -> unit
87 method write_compact_as_latin1 : output_stream -> unit
88 method internal_adopt : 'ext node option -> int -> unit
89 method internal_set_pos : int -> unit
90 method internal_delete : 'ext node -> unit
91 method internal_init : (string * int * int) ->
92 dtd -> string -> (string * string) list -> unit
93 method internal_init_other : (string * int * int) ->
94 dtd -> node_type -> unit
98 type 'ext spec_table =
99 { mapping : (string, 'ext node) Hashtbl.t;
100 data_node : 'ext node;
101 default_element : 'ext node;
102 super_root_node : 'ext node option;
103 pinstr_mapping : (string, 'ext node) Hashtbl.t;
104 default_pinstr_node : 'ext node option;
105 comment_node : 'ext node option;
110 Spec_table of 'ext spec_table
114 let make_spec_from_mapping
117 ?default_pinstr_exemplar
119 ~data_exemplar ~default_element_exemplar ~element_mapping () =
121 { mapping = element_mapping;
122 data_node = data_exemplar;
123 default_element = default_element_exemplar;
124 super_root_node = super_root_exemplar;
125 comment_node = comment_exemplar;
126 default_pinstr_node = default_pinstr_exemplar;
128 (match pinstr_mapping with
129 None -> Hashtbl.create 1
136 let make_spec_from_alist
139 ?default_pinstr_exemplar
141 ~data_exemplar ~default_element_exemplar ~element_alist () =
142 let m = List.length pinstr_alist in
143 let pinstr_mapping = Hashtbl.create m in
145 (fun (name,ex) -> Hashtbl.add pinstr_mapping name ex)
147 let n = List.length element_alist in
148 let element_mapping = Hashtbl.create m in
150 (fun (name,ex) -> Hashtbl.add element_mapping name ex)
152 make_spec_from_mapping
153 ?super_root_exemplar: super_root_exemplar
154 ?comment_exemplar: comment_exemplar
155 ?default_pinstr_exemplar: default_pinstr_exemplar
156 ~pinstr_mapping: pinstr_mapping
157 ~data_exemplar: data_exemplar
158 ~default_element_exemplar: default_element_exemplar
159 ~element_mapping: element_mapping
163 (**********************************************************************)
167 let validate_content ?(use_dfa=None) model (el : 'a node) =
168 (* checks that the nodes of 'el' matches the DTD. Returns 'true'
169 * on success and 'false' on failure.
172 let rec is_empty cl =
173 (* Whether the node list counts as empty or not. *)
177 ( match n # node_type with
178 | T_element _ -> false
179 | _ -> is_empty cl' (* ignore other nodes *)
183 let rec run_regexp cl ml =
184 (* Validates regexp content models ml against instances cl. This
185 * function works for deterministic and non-determninistic models.
186 * The implementation uses backtracking and may sometimes be slow.
188 * cl: the list of children that will have to be matched
189 * ml: the list of regexps that will have to match (to be read as
191 * returns () meaning that no match has been found, or raises Found.
195 if cl = [] then raise Found; (* Frequent case *)
196 if is_empty cl then raise Found; (* General condition *)
198 assert (seq <> []); (* necessary to ensure termination *)
199 run_regexp cl (seq @ ml')
205 run_regexp cl (alt :: ml');
208 assert (alts <> []); (* Alt [] matches nothing *)
210 | Repeated re :: ml' ->
211 let rec norm re = (* to avoid infinite loops *)
213 Repeated subre -> norm subre (* necessary *)
214 | Optional subre -> norm subre (* necessary *)
215 | Repeated1 subre -> norm subre (* an optimization *)
219 run_regexp cl (re' :: Repeated re' :: ml');
221 | Repeated1 re :: ml' ->
222 run_regexp cl (re :: Repeated re :: ml')
223 | Optional re :: ml' ->
224 run_regexp cl (re :: ml');
226 | Child chld :: ml' ->
231 begin match sub_el # node_type with
232 T_data -> (* Ignore data *)
234 (* Note: It can happen that we find a data node here
235 * if the 'keep_always_whitespace' mode is turned on.
238 if nt = chld then run_regexp cl' ml'
239 | _ -> (* Ignore this element *)
245 (* Validates regexp content models ml against instances cl. This
246 * function works ONLY for deterministic models.
247 * The implementation executes the automaton.
249 let current_vertex = ref dfa.dfa_start in
250 let rec next_step cl =
253 begin match el # node_type with
254 T_data -> (* Ignore data *)
256 (* Note: It can happen that we find a data node here
257 * if the 'keep_always_whitespace' mode is turned on.
261 current_vertex := Graph.follow_edge !current_vertex nt;
266 | _ -> (* Ignore this node *)
270 VertexSet.mem !current_vertex dfa.dfa_stops
279 let cl = el # sub_nodes in
281 | Mixed (MPCDATA :: mix) ->
282 let mix' = List.map (function
283 MPCDATA -> assert false
289 let nt = sub_el # node_type in
292 if not (List.mem name mix') then raise Not_found;
301 let cl = el # sub_nodes in
302 begin match use_dfa with
304 (* General backtracking implementation: *)
318 (**********************************************************************)
321 class virtual ['ext] node_impl an_ext =
323 constraint 'ext = 'ext node #extension
325 val mutable parent = (None : 'ext node option)
326 val mutable node_position = -1
327 val mutable dtd = (None : dtd option)
328 val mutable extension = an_ext
331 extension # set_node (self : 'ext #node :> 'ext node)
334 method extension = (extension : 'ext)
339 | Some p -> p # internal_delete (self : 'ext #node :> 'ext node)
343 None -> raise Not_found
348 None -> (self : 'ext #node :> 'ext node)
351 method node_position =
352 if node_position >= 0 then node_position else
356 let rec collect n path =
358 let p = n # node_position in
359 collect (n # parent) (p :: path)
365 collect (self : 'ext #node :> 'ext node) []
367 method previous_node =
368 self # parent # nth_node (self # node_position - 1)
371 self # parent # nth_node (self # node_position + 1)
373 method orphaned_clone =
374 let x = extension # clone in
380 x # set_node (n : 'ext #node :> 'ext node);
383 method orphaned_flat_clone =
384 let x = extension # clone in
390 x # set_node (n : 'ext #node :> 'ext node);
395 None -> failwith "Pxp_document.node_impl#dtd: No DTD available"
400 None -> failwith "Pxp_document.node_impl#encoding: No DTD available"
401 | Some d -> d # encoding
403 method internal_adopt (new_parent : 'ext node option) pos =
404 begin match parent with
407 if new_parent <> None then
408 failwith "Pxp_document.node_impl#internal_adopt: Tried to add a bound element"
410 parent <- new_parent;
413 method internal_set_pos pos =
416 method virtual add_node : ?force:bool -> 'ext node -> unit
417 method virtual add_pinstr : proc_instruction -> unit
418 method virtual sub_nodes : 'ext node list
419 method virtual pinstr : string -> proc_instruction list
420 method virtual pinstr_names : string list
421 method virtual iter_nodes : ('ext node -> unit) -> unit
422 method virtual iter_nodes_sibl : ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
423 method virtual nth_node : int -> 'ext node
424 method virtual set_nodes : 'ext node list -> unit
425 method virtual data : string
426 method virtual node_type : node_type
427 method virtual position : (string * int * int)
428 method virtual attribute : string -> att_value
429 method virtual attribute_names : string list
430 method virtual attribute_type : string -> att_type
431 method virtual attributes : (string * Pxp_types.att_value) list
432 method virtual required_string_attribute : string -> string
433 method virtual required_list_attribute : string -> string list
434 method virtual optional_string_attribute : string -> string option
435 method virtual optional_list_attribute : string -> string list
436 method virtual quick_set_attributes : (string * Pxp_types.att_value) list -> unit
437 method virtual attributes_as_nodes : 'ext node list
438 method virtual set_comment : string option -> unit
439 method virtual comment : string option
440 method virtual create_element :
441 ?position:(string * int * int) ->
442 dtd -> node_type -> (string * string) list -> 'ext node
443 method virtual create_data : dtd -> string -> 'ext node
444 method virtual keep_always_whitespace_mode : unit
445 method virtual write : output_stream -> encoding -> unit
446 method virtual write_compact_as_latin1 : output_stream -> unit
447 method virtual local_validate : ?use_dfa:bool -> unit -> unit
448 method virtual internal_delete : 'ext node -> unit
449 method virtual internal_init : (string * int * int) ->
450 dtd -> string -> (string * string) list -> unit
451 method virtual internal_init_other : (string * int * int) ->
452 dtd -> node_type -> unit
457 (**********************************************************************)
459 let no_position = ("?", 0, 0) ;;
462 class ['ext] data_impl an_ext : ['ext] node =
464 inherit ['ext] node_impl an_ext
465 val mutable content = ("" : string)
467 method position = no_position
469 method add_node ?(force=false) _ =
470 failwith "method 'add_node' not applicable to data node"
471 method add_pinstr _ =
472 failwith "method 'add_pinstr' not applicable to data node"
474 method pinstr_names = []
475 method sub_nodes = []
476 method iter_nodes _ = ()
477 method iter_nodes_sibl _ = ()
478 method nth_node _ = raise Not_found
480 failwith "method 'set_nodes' not applicable to data node"
481 method data = content
482 method node_type = T_data
483 method attribute _ = raise Not_found
484 method attribute_names = []
485 method attribute_type _ = raise Not_found
486 method attributes = []
487 method required_string_attribute _ =
488 failwith "Markup.document, method required_string_attribute: not found"
489 method required_list_attribute _ =
490 failwith "Markup.document, method required_list_attribute: not found"
491 method optional_string_attribute _ = None
492 method optional_list_attribute _ = []
493 method id_attribute_name = raise Not_found
494 method id_attribute_value = raise Not_found
495 method idref_attribute_names = []
496 method quick_set_attributes _ =
497 failwith "method 'quick_set_attributes' not applicable to data node"
498 method attributes_as_nodes = []
499 method comment = None
500 method set_comment c =
503 | Some _ -> failwith "method 'set_comment' not applicable to data node"
504 method create_element ?position _ _ _ =
505 failwith "method 'create_element' not applicable to data node"
506 method create_data new_dtd new_str =
507 let x = extension # clone in
514 : 'ext #node :> 'ext node) in
517 method local_validate ?use_dfa () = ()
518 method keep_always_whitespace_mode = ()
521 method write os enc =
522 let encoding = self # encoding in
523 write_data_string ~from_enc:encoding ~to_enc:enc os content
526 method write_compact_as_latin1 os =
527 self # write os `Enc_iso88591
529 method internal_delete _ =
531 method internal_init _ _ _ _ =
533 method internal_init_other _ _ _ =
539 (**********************************************************************)
541 class ['ext] attribute_impl ~element ~name value dtd =
543 val mutable parent = (None : 'ext node option)
544 val mutable dtd = dtd
545 val mutable element_name = element
546 val mutable att_name = name
547 val mutable att_value = value
551 None -> raise Not_found
556 None -> (self : 'ext #node :> 'ext node)
559 method internal_adopt new_parent _ =
562 method orphaned_clone =
565 method orphaned_flat_clone =
570 method encoding = dtd # encoding
572 method node_type = T_attribute att_name
575 if n = att_name then att_value else raise Not_found
577 method attribute_names = [ att_name ]
579 method attribute_type n =
580 let eltype = dtd # element element_name in
582 let atype, adefault = eltype # attribute n in
589 method attributes = [ att_name, att_value ]
591 method required_string_attribute n =
595 | Valuelist l -> String.concat " " l
596 | Implied_value -> raise Not_found
598 failwith "Pxp_document.attribute_impl#required_string_attribute: not found"
601 method required_list_attribute n =
606 | Implied_value -> raise Not_found
608 failwith "Pxp_document.attribute_impl#required_list_attribute: not found"
610 method optional_string_attribute n =
614 | Valuelist l -> Some(String.concat " " l)
615 | Implied_value -> None
619 method optional_list_attribute n =
624 | Implied_value -> []
628 (* Senseless methods: *)
630 method sub_nodes = []
632 method pinstr_names = []
633 method iter_nodes _ = ()
634 method iter_nodes_sibl _ = ()
635 method nth_node _ = raise Not_found
637 method position = ("?",0,0)
638 method comment = None
639 method local_validate ?use_dfa () = ()
641 (* Non-applicable methods: *)
644 failwith "Pxp_document.attribute_impl#extension: not applicable"
646 failwith "Pxp_document.attribute_impl#delete: not applicable"
647 method node_position =
648 failwith "Pxp_document.attribute_impl#node_position: not applicable"
650 failwith "Pxp_document.attribute_impl#node_path: not applicable"
651 method previous_node =
652 failwith "Pxp_document.attribute_impl#previous_node: not applicable"
654 failwith "Pxp_document.attribute_impl#next_node: not applicable"
655 method internal_set_pos _ =
656 failwith "Pxp_document.attribute_impl#internal_set_pos: not applicable"
657 method internal_delete _ =
658 failwith "Pxp_document.attribute_impl#internal_delete: not applicable"
659 method internal_init _ _ _ _ =
660 failwith "Pxp_document.attribute_impl#internal_init: not applicable"
661 method internal_init_other _ _ _ =
662 failwith "Pxp_document.attribute_impl#internal_init_other: not applicable"
663 method add_node ?force _ =
664 failwith "Pxp_document.attribute_impl#add_node: not applicable"
665 method add_pinstr _ =
666 failwith "Pxp_document.attribute_impl#add_pinstr: not applicable"
668 failwith "Pxp_document.attribute_impl#set_nodes: not applicable"
669 method quick_set_attributes _ =
670 failwith "Pxp_document.attribute_impl#quick_set_attributes: not applicable"
671 method attributes_as_nodes =
672 failwith "Pxp_document.attribute_impl#dattributes_as_nodes: not applicable"
673 method set_comment c =
675 failwith "Pxp_document.attribute_impl#set_comment: not applicable"
676 method create_element ?position _ _ _ =
677 failwith "Pxp_document.attribute_impl#create_element: not applicable"
678 method create_data _ _ =
679 failwith "Pxp_document.attribute_impl#create_data: not applicable"
680 method keep_always_whitespace_mode =
681 failwith "Pxp_document.attribute_impl#keep_always_whitespace_mode: not applicable"
683 failwith "Pxp_document.attribute_impl#write: not applicable"
684 method write_compact_as_latin1 _ =
685 failwith "Pxp_document.attribute_impl#write_compact_as_latin1: not applicable"
686 method id_attribute_name =
687 failwith "Pxp_document.attribute_impl#id_attribute_name: not applicable"
688 method id_attribute_value =
689 failwith "Pxp_document.attribute_impl#id_attribute_value: not applicable"
690 method idref_attribute_names =
691 failwith "Pxp_document.attribute_impl#idref_attribute_names: not applicable"
696 (**********************************************************************)
698 class ['ext] element_impl an_ext : ['ext] node =
700 inherit ['ext] node_impl an_ext as super
702 val mutable content_model = Any
703 val mutable content_dfa = lazy None
704 val mutable ext_decl = false
705 val mutable ntype = T_none
706 val mutable id_att_name = None
707 val mutable idref_att_names = []
708 val mutable rev_nodes = ([] : 'c list)
709 val mutable nodes = (None : 'c list option)
710 val mutable array = (None : 'c array option)
712 val mutable attributes = []
713 val mutable att_nodes = []
714 val mutable comment = None
715 val pinstr = lazy (Hashtbl.create 10 : (string,proc_instruction) Hashtbl.t)
716 val mutable keep_always_whitespace = false
718 val mutable position = no_position
720 method comment = comment
722 method set_comment c =
723 if ntype = T_comment then
726 failwith "set_comment: not applicable to node types other than T_comment"
728 method attributes = attributes
730 method position = position
732 method private error_name =
734 T_element n -> "Element `" ^ n ^ "'"
735 | T_super_root -> "Super root"
736 | T_pinstr n -> "Wrapper element for processing instruction `" ^ n ^
738 | T_comment -> "Wrapper element for comment"
739 | T_none -> "NO element"
740 | T_attribute _ -> assert false
741 | T_namespace _ -> assert false
742 | T_data -> assert false
744 method add_node ?(force = false) n =
745 let only_whitespace s =
746 (* Checks that the string "s" contains only whitespace. On failure,
747 * Validation_error is raised.
749 let l = String.length s in
750 if l < 100 then begin
751 for i=0 to l - 1 do (* for loop is faster for small 'l' *)
753 ('\009'|'\010'|'\013'|'\032') -> ()
755 raise(Validation_error(self # error_name ^
756 " must not have character contents"));
760 let lexbuf = Lexing.from_string s in
761 let lexerset = Pxp_lexers.get_lexer_set (self # dtd # encoding) in
762 let t = lexerset.scan_name_string lexbuf in
764 (lexerset.scan_name_string lexbuf <> Eof)
766 raise(Validation_error(self # error_name ^
767 " must not have character contents"));
771 (* general DTD check: *)
774 | Some d -> if n # dtd != d then
775 failwith "Pxp_document.element_impl # add_node: the sub node has a different DTD";
777 (* specific checks: *)
779 begin match n # node_type with
781 begin match content_model with
785 if not force then begin
786 if n # data <> "" then
787 raise(Validation_error(self # error_name ^
793 if not force then begin
794 only_whitespace (n # data);
795 (* TODO: following check faster *)
796 if n # dtd # standalone_declaration &&
799 (* The standalone declaration is violated if the
800 * element declaration is contained in an external
807 " violates standalone declaration" ^
808 " because extra white space separates" ^
809 " the sub elements"));
811 if not keep_always_whitespace then raise Skip
817 (* all OK, so add this node: *)
818 n # internal_adopt (Some (self : 'ext #node :> 'ext node)) size;
819 rev_nodes <- n :: rev_nodes;
826 method add_pinstr pi =
830 if pi # encoding <> d # encoding then
831 failwith "Pxp_document.element_impl # add_pinstr: Inconsistent encodings";
833 let name = pi # target in
834 Hashtbl.add (Lazy.force pinstr) name pi
837 Hashtbl.find_all (Lazy.force pinstr) name
839 method pinstr_names =
842 (fun n _ -> l := n :: !l)
849 let cl = List.rev rev_nodes in
855 method iter_nodes f =
856 let cl = self # sub_nodes in
859 method iter_nodes_sibl f =
860 let cl = self # sub_nodes in
861 let rec next last_node l =
867 f last_node x (Some y);
873 if p < 0 or p >= size then raise Not_found;
875 array <- Some (Array.of_list (self # sub_nodes));
881 method set_nodes nl =
882 let old_size = size in
884 (fun n -> n # internal_adopt None (-1))
889 (fun n -> n # internal_adopt
890 (Some (self : 'ext #node :> 'ext node))
896 (* revert action as much as possible *)
898 (fun n -> n # internal_adopt None (-1))
901 let pos = ref (size-1) in
903 (fun n -> n # internal_adopt
904 (Some (self : 'ext #node :> 'ext node))
909 (* [TODO] Note: there may be bad members in nl *)
912 rev_nodes <- List.rev nl;
917 method orphaned_clone : 'self =
925 let x = extension # clone in
930 rev_nodes = sub_clones;
935 let pos = ref (size - 1) in
937 (fun m -> m # internal_adopt
938 (Some (n : 'ext #node :> 'ext node))
944 x # set_node (n : 'ext #node :> 'ext node);
947 method orphaned_flat_clone : 'self =
948 let x = extension # clone in
959 x # set_node (n : 'ext #node :> 'ext node);
963 method internal_delete n =
964 rev_nodes <- List.filter (fun n' -> n' != n) rev_nodes;
966 let p = ref (size-1) in
968 (fun n' -> n' # internal_set_pos !p; decr p)
971 n # internal_adopt None (-1);
975 let cl = self # sub_nodes in
976 String.concat "" (List.map (fun n -> n # data) cl)
978 method node_type = ntype
982 List.assoc n attributes
984 method attribute_names =
985 List.map fst attributes
987 method attribute_type n =
994 let eltype = d # element name in
996 let atype, adefault = eltype # attribute n in
1003 failwith "attribute_type: not available for non-element nodes"
1006 method required_string_attribute n =
1008 match List.assoc n attributes with
1010 | Valuelist l -> String.concat " " l
1011 | Implied_value -> raise Not_found
1014 failwith "Pxp_document, method required_string_attribute: not found"
1016 method optional_string_attribute n =
1018 match List.assoc n attributes with
1020 | Valuelist l -> Some (String.concat " " l)
1021 | Implied_value -> None
1026 method required_list_attribute n =
1028 match List.assoc n attributes with
1031 | Implied_value -> raise Not_found
1034 failwith "Markup.document, method required_list_attribute: not found"
1036 method optional_list_attribute n =
1038 match List.assoc n attributes with
1041 | Implied_value -> []
1046 method id_attribute_name =
1047 match id_att_name with
1048 None -> raise Not_found
1051 method id_attribute_value =
1052 match id_att_name with
1053 None -> raise Not_found
1055 begin match List.assoc name attributes (* may raise Not_found *)
1058 | _ -> raise Not_found
1062 method idref_attribute_names = idref_att_names
1065 method quick_set_attributes atts =
1071 failwith "quick_set_attributes: not applicable for non-element node"
1074 method attributes_as_nodes =
1075 match att_nodes with
1076 [] when attributes = [] ->
1079 let dtd = self # dtd in
1089 ~element:element_name
1100 method create_element
1101 ?(position = no_position) new_dtd new_type new_attlist =
1102 let x = extension # clone in
1103 let obj = ( {< parent = None;
1105 pinstr = lazy (Hashtbl.create 10)
1107 : 'ext #node :> 'ext node
1112 failwith "create_element: Cannot create T_data node"
1114 obj # internal_init position new_dtd name new_attlist;
1116 | (T_comment | T_pinstr _ | T_super_root | T_none) ->
1117 obj # internal_init_other position new_dtd new_type;
1120 failwith "create_element: Cannot create such node"
1123 method internal_init_other new_pos new_dtd new_ntype =
1124 (* resets the contents of the object *)
1129 position <- new_pos;
1130 content_model <- Any;
1131 content_dfa <- lazy None;
1134 dtd <- Some new_dtd;
1136 id_att_name <- None;
1137 idref_att_names <- [];
1141 method internal_init new_pos new_dtd new_name new_attlist =
1142 (* ONLY FOR T_Element NODES!!! *)
1143 (* resets the contents of the object *)
1147 ntype <- T_element new_name;
1148 position <- new_pos;
1152 let lexerset = Pxp_lexers.get_lexer_set (new_dtd # encoding) in
1153 let sadecl = new_dtd # standalone_declaration in
1155 (* First validate the element name and the attributes: *)
1156 (* Well-Formedness Constraint: Unique Att Spec *)
1157 let rec check_uniqueness al =
1161 if List.mem_assoc n al' then
1162 raise (WF_error("Attribute `" ^ n ^ "' occurs twice in element `" ^ new_name ^ "'"));
1163 check_uniqueness al'
1165 check_uniqueness new_attlist;
1166 (* Validity Constraint: Element Valid [element has been declared] *)
1168 let eltype = new_dtd # element new_name in
1169 content_model <- eltype # content_model;
1170 content_dfa <- lazy(eltype # content_dfa);
1171 ext_decl <- eltype # externally_declared;
1172 id_att_name <- eltype # id_attribute_name;
1173 idref_att_names <- eltype # idref_attribute_names;
1174 (* Validity Constraint: Attribute Value Type *)
1175 (* Validity Constraint: Fixed Attribute Default *)
1176 (* Validity Constraint: Standalone Document Declaration (partly) *)
1177 let undeclared_attlist = ref [] in
1182 (* Get type, default, and the normalized attribute
1185 let atype, adefault = eltype # attribute n in
1186 let av = value_of_attribute lexerset new_dtd n atype v in
1187 (* If necessary, check whether normalization violates
1188 * the standalone declaration.
1192 attribute_violates_standalone_declaration n (Some v)
1196 ("Attribute `" ^ n ^ "' of element type `" ^
1197 new_name ^ "' violates standalone declaration"));
1198 (* If the default is "fixed", check that. *)
1199 begin match adefault with
1200 (D_required | D_implied) -> ()
1203 let uv = value_of_attribute
1204 lexerset new_dtd "[default]" atype u in
1208 ("Attribute `" ^ n ^
1209 "' is fixed, but has here a different value"));
1214 (* raised by method "# attribute" *)
1215 undeclared_attlist :=
1216 (n, value_of_attribute lexerset new_dtd n A_cdata v) ::
1217 !undeclared_attlist;
1218 n, Implied_value (* does not matter *)
1221 (* Validity Constraint: Required Attribute *)
1222 (* Validity Constraint: Standalone Document Declaration (partly) *)
1223 (* Add attributes with default values *)
1228 n, List.assoc n new_attlist'
1231 (* Check standalone declaration: *)
1234 attribute_violates_standalone_declaration
1238 ("Attribute `" ^ n ^ "' of element type `" ^
1239 new_name ^ "' violates standalone declaration"));
1240 (* add default value or Implied *)
1241 let atype, adefault = eltype # attribute n in
1244 raise(Validation_error("Required attribute `" ^ n ^ "' is missing"))
1248 n, value_of_attribute lexerset new_dtd n atype v
1250 n, value_of_attribute lexerset new_dtd n atype v
1252 (eltype # attribute_names)
1254 dtd <- Some new_dtd;
1255 attributes <- new_attlist'' @ !undeclared_attlist;
1258 (* The DTD allows arbitrary attributes/contents for this
1261 dtd <- Some new_dtd;
1262 attributes <- List.map (fun (n,v) -> n, Value v) new_attlist;
1263 content_model <- Any;
1264 content_dfa <- lazy None;
1266 method local_validate ?(use_dfa=false) () =
1267 (* validates that the content of this element matches the model *)
1268 let dfa = if use_dfa then Lazy.force content_dfa else None in
1269 if not (validate_content
1272 (self : 'ext #node :> 'ext node)) then
1273 raise(Validation_error(self # error_name ^
1274 " does not match its content model"))
1277 method create_data _ _ =
1278 failwith "method 'create_data' not applicable to element node"
1280 method keep_always_whitespace_mode =
1281 keep_always_whitespace <- true
1283 method write os enc =
1284 let encoding = self # encoding in
1286 write_markup_string ~from_enc:encoding ~to_enc:enc os in
1288 begin match ntype with
1292 (fun (aname, avalue) ->
1296 wms ("\n" ^ aname ^ "=\"");
1297 write_data_string ~from_enc:encoding ~to_enc:enc os v;
1300 let v = String.concat " " l in
1301 wms ("\n" ^ aname ^ "=\"");
1302 write_data_string ~from_enc:encoding ~to_enc:enc os v;
1315 (Lazy.force pinstr);
1317 (fun n -> n # write os enc)
1320 begin match ntype with
1322 wms ("</" ^ name ^ "\n>");
1327 (* TODO: How to write comments? The comment string may contain
1328 * illegal characters or "--".
1332 method write_compact_as_latin1 os =
1333 self # write os `Enc_iso88591
1339 let spec_table_find_exemplar tab eltype =
1341 Hashtbl.find tab.mapping eltype
1343 Not_found -> tab.default_element
1347 let create_data_node spec dtd str =
1350 let exemplar = tab.data_node in
1351 exemplar # create_data dtd str
1355 let create_element_node ?position spec dtd eltype atts =
1358 let exemplar = spec_table_find_exemplar tab eltype in
1359 exemplar # create_element ?position:position dtd (T_element eltype) atts
1363 let create_super_root_node ?position spec dtd =
1366 ( match tab.super_root_node with
1368 failwith "Pxp_document.create_super_root_node: No exemplar"
1370 x # create_element ?position:position dtd T_super_root []
1374 let create_no_node ?position spec dtd =
1377 let x = tab.default_element in
1378 x # create_element ?position:position dtd T_none []
1382 let create_comment_node ?position spec dtd text =
1385 ( match tab.comment_node with
1387 failwith "Pxp_document.create_comment_node: No exemplar"
1389 let e = x # create_element ?position:position dtd T_comment []
1391 e # set_comment (Some text);
1397 let create_pinstr_node ?position spec dtd pi =
1398 let target = pi # target in
1403 Hashtbl.find tab.pinstr_mapping target
1406 ( match tab.default_pinstr_node with
1409 "Pxp_document.create_pinstr_node: No exemplar"
1415 exemplar # create_element ?position:position dtd (T_pinstr target) [] in
1421 let find ?(deeply=false) f base =
1422 let rec search_flat children =
1424 [] -> raise Not_found
1426 if f n then n else search_flat children'
1428 let rec search_deep children =
1430 [] -> raise Not_found
1435 try search_deep (n # sub_nodes)
1436 with Not_found -> search_deep children'
1438 (if deeply then search_deep else search_flat)
1443 let find_all ?(deeply=false) f base =
1444 let rec search_flat children =
1448 if f n then n :: search_flat children' else search_flat children'
1450 let rec search_deep children =
1455 search_deep (n # sub_nodes) @ search_deep children' in
1461 (if deeply then search_deep else search_flat)
1466 let find_element ?deeply eltype base =
1470 match n # node_type with
1471 T_element name -> name = eltype
1477 let find_all_elements ?deeply eltype base =
1481 match n # node_type with
1482 T_element name -> name = eltype
1490 let map_tree ~pre ?(post=(fun x -> x)) base =
1494 if n' # node_type <> T_data then begin
1495 let children = n # sub_nodes in
1496 let children' = map_children children in
1497 n' # set_nodes children';
1501 Skip -> raise Not_found
1503 and map_children l =
1508 let child' = map_rec child in
1509 child' :: map_children l'
1519 let map_tree_sibl ~pre ?(post=(fun _ x _ -> x)) base =
1520 let rec map_rec l n r =
1522 let n' = pre l n r in
1523 if n' # node_type <> T_data then begin
1524 let children = n # sub_nodes in
1525 let children' = map_children None children in
1526 let children'' = postprocess_children None children' in
1527 n' # set_nodes children'';
1531 Skip -> raise Not_found
1533 and map_children predecessor l =
1540 | x :: _ -> Some x in
1542 let child' = map_rec predecessor child successor in
1543 child' :: map_children (Some child) l'
1546 map_children (Some child) l'
1549 and postprocess_children predecessor l =
1556 | x :: _ -> Some x in
1558 let child' = post predecessor child successor in
1559 child' :: postprocess_children (Some child) l'
1562 postprocess_children (Some child) l'
1566 let base' = map_rec None base None in
1567 try post None base' None with Skip -> raise Not_found
1571 let iter_tree ?(pre=(fun x -> ())) ?(post=(fun x -> ())) base =
1572 let rec iter_rec n =
1575 let children = n # sub_nodes in
1576 iter_children children;
1579 Skip -> raise Not_found
1581 and iter_children l =
1597 let iter_tree_sibl ?(pre=(fun _ _ _ -> ())) ?(post=(fun _ _ _ -> ())) base =
1598 let rec iter_rec l n r =
1601 let children = n # sub_nodes in
1602 iter_children None children;
1605 Skip -> raise Not_found
1607 and iter_children predecessor l =
1614 | x :: _ -> Some x in
1616 iter_rec predecessor child successor;
1617 iter_children (Some child) l'
1620 iter_children (Some child) l'
1624 iter_rec None base None
1634 | x::p1', y::p2' -> if x = y then cmp p1' p2' else x - y
1637 let a_path = a # node_path in
1638 let b_path = b # node_path in
1644 type 'ext ord_index = ('ext node, int) Hashtbl.t;;
1646 let create_ord_index base =
1648 iter_tree ~pre:(fun _ -> incr n) base;
1649 let idx = Hashtbl.create !n in
1651 iter_tree ~pre:(fun node -> Hashtbl.add idx node !k; incr k) base;
1656 let ord_number idx node =
1657 Hashtbl.find idx node
1660 let ord_compare idx a b =
1661 let ord_a = Hashtbl.find idx a in
1662 let ord_b = Hashtbl.find idx b in
1666 class ['ext] document the_warner =
1668 val mutable xml_version = "1.0"
1669 val mutable dtd = (None : dtd option)
1670 val mutable root = (None : 'ext node option)
1672 val pinstr = lazy (Hashtbl.create 10 : (string,proc_instruction) Hashtbl.t)
1673 val warner = (the_warner : collect_warnings)
1675 method init_xml_version s =
1677 warner # warn ("XML version '" ^ s ^ "' not supported");
1680 method init_root r =
1681 let dtd_r = r # dtd in
1682 match r # node_type with
1684 (**************** CASE: We have a super root element ***************)
1687 if not (dtd_r # arbitrary_allowed) then begin
1688 match dtd_r # root with
1689 Some declared_root_element_name ->
1690 let real_root_element =
1694 match r' # node_type with
1695 | T_element _ -> true
1700 failwith "Pxp_document.document#init_root: Super root does not contain root element"
1701 (* TODO: Check also that there is at most one
1702 * element in the super root node
1706 let real_root_element_name =
1707 match real_root_element # node_type with
1708 T_element name -> name
1711 if real_root_element_name <> declared_root_element_name then
1713 (Validation_error ("The root element is `" ^
1714 real_root_element_name ^
1715 "' but is declared as `" ^
1716 declared_root_element_name))
1719 (* All is okay, so store dtd and root node: *)
1723 (**************** CASE: No super root element **********************)
1725 | T_element root_element_name ->
1726 if not (dtd_r # arbitrary_allowed) then begin
1727 match dtd_r # root with
1728 Some declared_root_element_name ->
1729 if root_element_name <> declared_root_element_name then
1731 (Validation_error ("The root element is `" ^
1733 "' but is declared as `" ^
1734 declared_root_element_name))
1736 (* This may happen if you initialize your DTD yourself.
1737 * The value 'None' means that the method 'set_root' was
1738 * never called for the DTD; we interpret it here as:
1739 * The root element does not matter.
1743 (* All is okay, so store dtd and root node: *)
1748 failwith "Pxp_document.document#init_root: the root node must be an element or super-root"
1750 method xml_version = xml_version
1752 method xml_standalone =
1755 | Some d -> d # standalone_declaration
1759 None -> failwith "Pxp_document.document#dtd: Document has no DTD"
1764 None -> failwith "Pxp_document.document#encoding: Document has no DTD"
1765 | Some d -> d # encoding
1769 None -> failwith "Pxp_document.document#root: Document has no root element"
1772 method add_pinstr pi =
1773 begin match dtd with
1776 if pi # encoding <> d # encoding then
1777 failwith "Pxp_document.document # add_pinstr: Inconsistent encodings";
1779 let name = pi # target in
1780 Hashtbl.add (Lazy.force pinstr) name pi
1782 method pinstr name =
1783 Hashtbl.find_all (Lazy.force pinstr) name
1785 method pinstr_names =
1788 (fun n _ -> l := n :: !l)
1789 (Lazy.force pinstr);
1792 method write os enc =
1793 let encoding = self # encoding in
1795 write_markup_string ~from_enc:encoding ~to_enc:enc os in
1797 let r = self # root in
1798 wms ("<?xml version='1.0' encoding='" ^
1799 Netconversion.string_of_encoding enc ^
1801 ( match self # dtd # root with
1803 self # dtd # write os enc false
1805 self # dtd # write os enc true
1811 (Lazy.force pinstr);
1815 method write_compact_as_latin1 os =
1816 self # write os `Enc_iso88591
1822 (* ======================================================================
1826 * Revision 1.1 2000/11/17 09:57:29 lpadovan
1829 * Revision 1.14 2000/08/30 15:47:52 gerd
1830 * Implementation of pxp_document.mli rev 1.10.
1832 * Revision 1.13 2000/08/26 23:29:10 gerd
1833 * Implementations for the changed in rev 1.9 of pxp_document.mli.
1835 * Revision 1.12 2000/08/18 20:14:00 gerd
1836 * New node_types: T_super_root, T_pinstr, T_comment, (T_attribute),
1837 * (T_none), (T_namespace).
1839 * Revision 1.11 2000/08/14 22:24:55 gerd
1840 * Moved the module Pxp_encoding to the netstring package under
1841 * the new name Netconversion.
1843 * Revision 1.10 2000/07/23 02:16:34 gerd
1846 * Revision 1.9 2000/07/16 19:37:09 gerd
1849 * Revision 1.8 2000/07/16 17:50:01 gerd
1852 * Revision 1.7 2000/07/16 16:34:41 gerd
1853 * New method 'write', the successor of 'write_compact_as_latin1'.
1855 * Revision 1.6 2000/07/14 13:56:11 gerd
1856 * Added methods id_attribute_name, id_attribute_value,
1857 * idref_attribute_names.
1859 * Revision 1.5 2000/07/09 17:51:14 gerd
1860 * Element nodes can store positions.
1862 * Revision 1.4 2000/07/08 23:04:06 gerd
1863 * [Merging 0.2.10:] Bugfix: allow_undeclared_attribute
1865 * Revision 1.3 2000/07/04 22:10:06 gerd
1866 * Implemented rev 1.3 of pxp_document.mli in a straight-
1869 * Revision 1.2 2000/06/14 22:19:06 gerd
1870 * Added checks such that it is impossible to mix encodings.
1872 * Revision 1.1 2000/05/29 23:48:38 gerd
1873 * Changed module names:
1874 * Markup_aux into Pxp_aux
1875 * Markup_codewriter into Pxp_codewriter
1876 * Markup_document into Pxp_document
1877 * Markup_dtd into Pxp_dtd
1878 * Markup_entity into Pxp_entity
1879 * Markup_lexer_types into Pxp_lexer_types
1880 * Markup_reader into Pxp_reader
1881 * Markup_types into Pxp_types
1882 * Markup_yacc into Pxp_yacc
1883 * See directory "compatibility" for (almost) compatible wrappers emulating
1884 * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
1886 * ======================================================================
1887 * Old logs from markup_document.ml:
1889 * Revision 1.19 2000/05/27 19:14:42 gerd
1890 * value_of_attribute: this function has been moved to
1893 * Added the following checks whether there is a violation
1894 * against the standalone declaration:
1895 * - Externally declared elements with regexp content model
1896 * must not contain extra white space
1897 * - The effect of normalization of externally declared attributes
1898 * must not depend on the type of the attributes
1899 * - Declared default values of externally declared attributes
1900 * must not have an effect on the value of the attributes.
1902 * Removed the method init_xml_standalone. It is now stored in
1903 * the DTD whether there is a standalone declaration.
1905 * Revision 1.18 2000/05/20 20:31:40 gerd
1906 * Big change: Added support for various encodings of the
1907 * internal representation.
1909 * Revision 1.17 2000/05/06 23:12:20 gerd
1910 * Allow undeclared attributes.
1912 * Revision 1.16 2000/05/01 20:42:28 gerd
1913 * New method write_compact_as_latin1.
1915 * Revision 1.15 2000/04/30 18:15:22 gerd
1916 * In function validate_content: Special handling of the pseudo
1917 * nodes "-pi" and "-vr".
1918 * Method init_root, class document: Recognizes whether the
1919 * root is virtual or real. The check on the root element name is different
1921 * New method keep_always_whitespace_mode: Turns a special mode
1922 * on in which ignorable whitespace is included into the document.
1924 * Revision 1.14 2000/03/11 22:58:15 gerd
1925 * Updated to support Markup_codewriter.
1927 * Revision 1.13 2000/01/27 21:51:56 gerd
1928 * Added method 'attributes'.
1930 * Revision 1.12 2000/01/27 21:19:34 gerd
1932 * Bugfix: 'orphaned_clone' performs now really a clone.
1934 * Revision 1.11 2000/01/20 21:57:58 gerd
1935 * Bugfix: method set_nodes does no longer add the new subnodes
1936 * in the reverse order.
1938 * Revision 1.10 1999/12/17 21:35:37 gerd
1939 * Bugfix: If the name of the root element is not specified in
1940 * the DTD, the document does not check whether the root element is a
1943 * Revision 1.9 1999/11/09 22:22:01 gerd
1944 * The "document" classes now checks that the root element is the
1945 * same as the declared root element. Thanks to Claudio Sacerdoti Coen
1946 * for his bug report.
1948 * Revision 1.8 1999/09/01 22:51:40 gerd
1949 * Added methods to store processing instructions.
1951 * Revision 1.7 1999/09/01 16:19:18 gerd
1952 * Added some warnings.
1953 * If an element type has the content model EMPTY, it is now strictly
1954 * checked that the element instance is really empty. Especially, white space
1955 * is NOT allowed in such instances.
1957 * Revision 1.6 1999/08/19 21:58:59 gerd
1958 * Added method "reset_finder". This is not very convincing, but
1959 * currently the simplest way to update the ID hash table.
1961 * Revision 1.5 1999/08/19 01:08:15 gerd
1962 * Added method "find" that searches node by ID in the whole
1964 * Bugfix: After the extension has been cloned, the "set_node" method
1965 * is invoked telling the clone to which node it is associated.
1967 * Revision 1.4 1999/08/15 13:52:52 gerd
1968 * Bugfix: WF_error "Attribute x occurs twice in element [unnamed]"
1969 * no longer possible; instead of "[unnamed]" the actual name is printed.
1970 * Improved some of the error messages.
1972 * Revision 1.3 1999/08/15 02:19:01 gerd
1973 * If the DTD allows arbitrary elements, unknown elements are not
1976 * Revision 1.2 1999/08/11 14:54:23 gerd
1977 * Optimizations: The hashtable for the 'pinstr' variable is only
1978 * created on demand. -- The 'only_whitespace' function uses a simple "for"
1979 * loop is the string is small and a lexer if the string is big.
1981 * Revision 1.1 1999/08/10 00:35:50 gerd