From: Luca Padovani
Date: Fri, 17 Nov 2000 09:57:23 +0000 (+0000)
Subject: Initial revision
X-Git-Tag: nogzip~172
X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=commitdiff_plain;h=c03d2c1fdab8d228cb88aaba5ca0f556318bebc5;p=helm.git
Initial revision
---
diff --git a/helm/DEVEL/pxp/.cvsignore b/helm/DEVEL/pxp/.cvsignore
new file mode 100644
index 000000000..c1fcbc4ae
--- /dev/null
+++ b/helm/DEVEL/pxp/.cvsignore
@@ -0,0 +1,7 @@
+*.cmo
+*.cmx
+*.cmi
+
+*.o
+*.a
+
diff --git a/helm/DEVEL/pxp/netstring/.cvsignore b/helm/DEVEL/pxp/netstring/.cvsignore
new file mode 100644
index 000000000..c1fcbc4ae
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/.cvsignore
@@ -0,0 +1,7 @@
+*.cmo
+*.cmx
+*.cmi
+
+*.o
+*.a
+
diff --git a/helm/DEVEL/pxp/netstring/LICENSE b/helm/DEVEL/pxp/netstring/LICENSE
new file mode 100644
index 000000000..820032ee2
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/LICENSE
@@ -0,0 +1,21 @@
+Copyright 1999 by Gerd Stolpmann
+
+The package "netstring" is copyright by Gerd Stolpmann.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the "netstring" software (the "Software"), to deal in the
+Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+The Software is provided ``as is'', without warranty of any kind, express
+or implied, including but not limited to the warranties of
+merchantability, fitness for a particular purpose and noninfringement.
+In no event shall Gerd Stolpmann be liable for any claim, damages or
+other liability, whether in an action of contract, tort or otherwise,
+arising from, out of or in connection with the Software or the use or
+other dealings in the software.
diff --git a/helm/DEVEL/pxp/netstring/META b/helm/DEVEL/pxp/netstring/META
new file mode 100644
index 000000000..d422128ab
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/META
@@ -0,0 +1,54 @@
+version = "0.9.3"
+requires = "str"
+description = "String processing for the Internet"
+
+archive(byte) =
+ "netstring.cma netmappings_iso.cmo netmappings_other.cmo"
+archive(byte,toploop) =
+ "netstring.cma netmappings_iso.cmo netmappings_other.cmo
+ netstring_top.cmo"
+archive(byte,mt) =
+ "netstring.cma netmappings_iso.cmo netmappings_other.cmo
+ netstring_mt.cmo"
+archive(byte,mt,toploop) =
+ "netstring.cma netmappings_iso.cmo netmappings_other.cmo
+ netstring_mt.cmo netstring_top.cmo"
+archive(native) =
+ "netstring.cmxa netmappings_iso.cmx netmappings_other.cmx"
+archive(native,mt) =
+ "netstring.cmxa netmappings_iso.cmx netmappings_other.cmx
+ netstring_mt.cmx"
+
+archive(byte,netstring_only_iso) =
+ "netstring.cma netmappings_iso.cmo"
+archive(byte,toploop,netstring_only_iso) =
+ "netstring.cma netmappings_iso.cmo
+ netstring_top.cmo"
+archive(byte,mt,netstring_only_iso) =
+ "netstring.cma netmappings_iso.cmo
+ netstring_mt.cmo"
+archive(byte,mt,toploop,netstring_only_iso) =
+ "netstring.cma netmappings_iso.cmo
+ netstring_mt.cmo netstring_top.cmo"
+archive(native,netstring_only_iso) =
+ "netstring.cmxa netmappings_iso.cmx"
+archive(native,mt,netstring_only_iso) =
+ "netstring.cmxa netmappings_iso.cmx
+ netstring_mt.cmx"
+
+archive(byte,netstring_minimum) =
+ "netstring.cma"
+archive(byte,toploop,netstring_minimum) =
+ "netstring.cma
+ netstring_top.cmo"
+archive(byte,mt,netstring_minimum) =
+ "netstring.cma
+ netstring_mt.cmo"
+archive(byte,mt,toploop,netstring_minimum) =
+ "netstring.cma
+ netstring_mt.cmo netstring_top.cmo"
+archive(native,netstring_minimum) =
+ "netstring.cmxa"
+archive(native,mt,netstring_minimum) =
+ "netstring.cmxa
+ netstring_mt.cmx"
diff --git a/helm/DEVEL/pxp/netstring/Makefile b/helm/DEVEL/pxp/netstring/Makefile
new file mode 100644
index 000000000..98f9ef013
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/Makefile
@@ -0,0 +1,151 @@
+# make all: make bytecode archive
+# make opt: make native archive
+# make install: install bytecode archive, and if present, native archive
+# make uninstall: uninstall package
+# make clean: remove intermediate files
+# make distclean: remove any superflous files
+# make release: cleanup, create archive, tag CVS module
+# (for developers)
+
+#----------------------------------------------------------------------
+# specific rules for this package:
+
+OBJECTS = netstring_str.cmo \
+ netencoding.cmo netbuffer.cmo netstream.cmo \
+ mimestring.cmo cgi.cmo base64.cmo \
+ nethtml_scanner.cmo nethtml.cmo \
+ neturl.cmo \
+ netmappings.cmo netconversion.cmo
+XOBJECTS = $(OBJECTS:.cmo=.cmx)
+ARCHIVE = netstring.cma
+XARCHIVE = netstring.cmxa
+
+NAME = netstring
+REQUIRES = str
+
+ISO_MAPPINGS = mappings/iso*.unimap
+OTHER_MAPPINGS = mappings/cp*.unimap \
+ mappings/adobe*.unimap \
+ mappings/jis*.unimap \
+ mappings/koi*.unimap \
+ mappings/mac*.unimap \
+ mappings/windows*.unimap
+
+all: $(ARCHIVE) \
+ netstring_top.cmo netstring_mt.cmo \
+ netmappings_iso.cmo netmappings_other.cmo
+
+opt: $(XARCHIVE) \
+ netstring_mt.cmx \
+ netmappings_iso.cmx netmappings_other.cmx
+
+
+$(ARCHIVE): $(OBJECTS)
+ $(OCAMLC) -a -o $(ARCHIVE) $(OBJECTS)
+
+$(XARCHIVE): $(XOBJECTS)
+ $(OCAMLOPT) -a -o $(XARCHIVE) $(XOBJECTS)
+
+netmappings_iso.ml:
+ $(MAKE) -C tools
+ test ! -d mappings || tools/unimap_to_ocaml/unimap_to_ocaml \
+ -o netmappings_iso.ml $(ISO_MAPPINGS)
+
+netmappings_other.ml:
+ $(MAKE) -C tools
+ test ! -d mappings || tools/unimap_to_ocaml/unimap_to_ocaml \
+ -o netmappings_other.ml $(OTHER_MAPPINGS)
+
+#----------------------------------------------------------------------
+# general rules:
+
+OPTIONS =
+OCAMLC = ocamlc $(DEBUG) $(OPTIONS) $(ROPTIONS)
+OCAMLOPT = ocamlopt $(OPTIONS) $(ROPTIONS)
+OCAMLLEX = ocamllex
+OCAMLDEP = ocamldep $(OPTIONS)
+OCAMLFIND = ocamlfind
+
+DEBUG =
+# Invoke with: make DEBUG=-g
+
+depend: *.ml *.mli
+ $(OCAMLDEP) *.ml *.mli >depend
+
+depend.pkg: Makefile
+ $(OCAMLFIND) use -p ROPTIONS= $(REQUIRES) >depend.pkg
+
+.PHONY: install
+install: all
+ { test ! -f $(XARCHIVE) || extra="*.cmxa *.a netstring_mt.cmx netmappings_iso.cmx netmappings_other.cmx netstring_mt.o netmappings_iso.o netmappings_other.o"; }; \
+ $(OCAMLFIND) install $(NAME) *.mli *.cmi *.cma netstring_top.cmo netstring_mt.cmo netmappings_iso.cmo netmappings_other.cmo META $$extra
+
+.PHONY: install-cgi
+install-cgi:
+ $(OCAMLFIND) install cgi compat-cgi/META
+
+
+.PHONY: install-base64
+install-base64:
+ $(OCAMLFIND) install base64 compat-base64/META
+
+
+.PHONY: uninstall
+uninstall:
+ $(OCAMLFIND) remove $(NAME)
+
+.PHONY: uninstall-cgi
+uninstall-cgi:
+ $(OCAMLFIND) remove cgi
+
+.PHONY: uninstall-base64
+uninstall-base64:
+ $(OCAMLFIND) remove base64
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
+ test ! -d mappings || rm -f netmappings_iso.ml netmappings_other.ml
+
+.PHONY: distclean
+distclean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
+ rm -f *~ depend depend.pkg compat-cgi/*~ compat-base64/*~
+ $(MAKE) -C tests distclean
+ $(MAKE) -C doc distclean
+ $(MAKE) -C tools distclean
+
+RELEASE: META
+ awk '/version/ { print substr($$3,2,length($$3)-2) }' META >RELEASE
+
+.PHONY: dist
+dist: RELEASE
+ r=`head -1 RELEASE`; cd ..; gtar czf $(NAME)-$$r.tar.gz --exclude='*/CVS*' --exclude="*/depend.pkg" --exclude="*/depend" --exclude="*/doc/common.xml" --exclude="*/doc/config.xml" --exclude="*/doc/readme.dtd" --exclude="*/Mail" --exclude="*/mappings" $(NAME)
+
+.PHONY: tag-release
+tag-release: RELEASE
+ r=`head -1 RELEASE | sed -e s/\\\./-/g`; cd ..; cvs tag -F $(NAME)-$$r $(NAME)
+
+.PHONY: release
+release: distclean
+ test -f netmappings_iso.ml
+ test -f netmappings_other.ml
+ $(MAKE) tag-release
+ $(MAKE) dist
+
+.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll
+
+.ml.cmx:
+ $(OCAMLOPT) -c -thread $<
+
+.ml.cmo:
+ $(OCAMLC) -c -thread $<
+
+.mli.cmi:
+ $(OCAMLC) -c $<
+
+.mll.ml:
+ $(OCAMLLEX) $<
+
+include depend
+include depend.pkg
diff --git a/helm/DEVEL/pxp/netstring/RELEASE b/helm/DEVEL/pxp/netstring/RELEASE
new file mode 100644
index 000000000..965065db5
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/RELEASE
@@ -0,0 +1 @@
+0.9.3
diff --git a/helm/DEVEL/pxp/netstring/base64.ml b/helm/DEVEL/pxp/netstring/base64.ml
new file mode 100644
index 000000000..285626f77
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/base64.ml
@@ -0,0 +1,24 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+let encode s = Netencoding.Base64.encode s;;
+let url_encode s = Netencoding.Base64.url_encode s;;
+let decode s = Netencoding.Base64.decode s;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/06/25 22:34:43 gerd
+ * Added labels to arguments.
+ *
+ * Revision 1.1 2000/03/02 01:15:20 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/base64.mli b/helm/DEVEL/pxp/netstring/base64.mli
new file mode 100644
index 000000000..5dd60ea75
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/base64.mli
@@ -0,0 +1,36 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(**********************************************************************)
+(* Base64 compatibility module *)
+(**********************************************************************)
+
+(* PLEASE DO NOT USE THIS MODULE IN NEW SOFTWARE!
+ * The module Netencoding.Base64 is the preferred API. This module is
+ * only for compatibility with older software.
+ *)
+
+(* This interface is compatible with all previously released Base64
+ * modules (0.1 and 0.2).
+ *)
+
+val encode : string -> string
+
+val url_encode : string -> string
+
+val decode : string -> string
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/03/02 01:15:20 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/cgi.ml b/helm/DEVEL/pxp/netstring/cgi.ml
new file mode 100644
index 000000000..48412be29
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/cgi.ml
@@ -0,0 +1,645 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+exception Resources_exceeded
+
+type argument_processing = Memory | File | Automatic;;
+
+type argument =
+ { mutable arg_name : string;
+ mutable arg_processing : argument_processing;
+ mutable arg_buf_value : Buffer.t;
+ mutable arg_mem_value : string option;
+ (* Here, the value is stored if it must be kept in memory *)
+ mutable arg_disk_value : string Weak.t;
+ (* This component is used iff arg_mem_value = None. The
+ * weak array has a length of 1, and the single element stores
+ * the value (if any).
+ *)
+ mutable arg_file : string option;
+ (* The filename of the temporary file storing the value *)
+ mutable arg_fd : out_channel option;
+ (* The file descriptor of the temp file (if open) *)
+ mutable arg_mimetype : string;
+ mutable arg_filename : string option;
+ mutable arg_header : (string * string) list;
+ (* For the last three components, see the description of the
+ * corresponding functions in the mli file.
+ *)
+ }
+;;
+
+type workaround =
+ Work_around_MSIE_Content_type_bug
+ | Work_around_backslash_bug
+;;
+
+type config =
+ { maximum_content_length : int;
+ how_to_process_arguments : argument -> argument_processing;
+ tmp_directory : string;
+ tmp_prefix : string;
+ workarounds : workaround list;
+ }
+;;
+
+
+let print_argument arg =
+ Format.printf
+ ""
+ arg.arg_name
+ (match arg.arg_filename with None -> "*" | Some n -> n)
+ arg.arg_mimetype
+ (match arg.arg_file with None -> "Memory" | Some n -> n)
+;;
+
+
+let encode = Netencoding.Url.encode ;;
+let decode = Netencoding.Url.decode ;;
+
+
+
+let url_split_re =
+ Str.regexp "[&=]";;
+
+
+let mk_url_encoded_parameters nv_pairs =
+ String.concat "&"
+ (List.map
+ (fun (name,value) ->
+ let name_encoded = Netencoding.Url.encode name in
+ let value_encoded = Netencoding.Url.encode value in
+ name_encoded ^ "=" ^ value_encoded
+ )
+ nv_pairs
+ )
+;;
+
+
+let dest_url_encoded_parameters parstr =
+
+ let rec parse_after_amp tl =
+ match tl with
+ Str.Text name :: Str.Delim "=" :: Str.Text value :: tl' ->
+ (Netencoding.Url.decode name,
+ Netencoding.Url.decode value) :: parse_next tl'
+ | Str.Text name :: Str.Delim "=" :: Str.Delim "&" :: tl' ->
+ (Netencoding.Url.decode name, "") :: parse_after_amp tl'
+ | Str.Text name :: Str.Delim "=" :: [] ->
+ [Netencoding.Url.decode name, ""]
+ | _ ->
+ failwith "Cgi.dest_url_encoded_parameters"
+ and parse_next tl =
+ match tl with
+ [] -> []
+ | Str.Delim "&" :: tl' ->
+ parse_after_amp tl'
+ | _ ->
+ failwith "Cgi.dest_url_encoded_parameters"
+ in
+ let toklist = Str.full_split url_split_re parstr in
+ match toklist with
+ [] -> []
+ | _ -> parse_after_amp toklist
+;;
+
+
+let mk_form_encoded_parameters ntv_triples =
+ failwith "Cgi.mk_form_encoded_parameters: not implemented";;
+
+
+let dest_parameter_header header options =
+ let get_name s =
+ (* s is: form-data; ... name="fieldname" ...
+ * Extract "fieldname"
+ *)
+ try
+ let tok, params = Mimestring.scan_value_with_parameters s options in
+ List.assoc "name" params
+ with
+ Not_found ->
+ failwith "Cgi.dest_form_encoded_parameters"
+ | Failure "Mimestring.scan_value_with_parameters" ->
+ failwith "Cgi.dest_form_encoded_parameters"
+ in
+
+ let get_filename s =
+ (* s is: form-data; ... filename="fieldname" ...
+ * Extract "fieldname"
+ *)
+ try
+ let tok, params = Mimestring.scan_value_with_parameters s options in
+ Some(List.assoc "filename" params)
+ with
+ Not_found ->
+ None
+ | Failure "Mimestring.scan_value_with_parameters" ->
+ failwith "Cgi.dest_form_encoded_parameters"
+ in
+
+ let mime_type =
+ try List.assoc "content-type" header
+ with Not_found -> "text/plain" in (* the default *)
+
+ let content_disposition =
+ try List.assoc "content-disposition" header
+ with
+ Not_found ->
+ failwith "Cgi.dest_form_encoded_parameters: no content-disposition"
+ in
+
+ let name = get_name content_disposition in
+ let filename = get_filename content_disposition in
+
+ name, mime_type, filename
+;;
+
+
+let dest_form_encoded_parameters parstr ~boundary config =
+ let options =
+ if List.mem Work_around_backslash_bug config.workarounds then
+ [ Mimestring.No_backslash_escaping ]
+ else
+ []
+ in
+ let parts =
+ Mimestring.scan_multipart_body_and_decode
+ parstr 0 (String.length parstr) boundary in
+ List.map
+ (fun (params, value) ->
+
+ let name, mime_type, filename = dest_parameter_header params options in
+ { arg_name = name;
+ arg_processing = Memory;
+ arg_buf_value = Buffer.create 1;
+ arg_mem_value = Some value;
+ arg_disk_value = Weak.create 1;
+ arg_file = None;
+ arg_fd = None;
+ arg_mimetype = mime_type;
+ arg_filename = filename;
+ arg_header = params;
+ }
+
+ )
+ parts
+;;
+
+
+let make_temporary_file config =
+ (* Returns (filename, out_channel). *)
+ let rec try_creation n =
+ try
+ let fn =
+ Filename.concat
+ config.tmp_directory
+ (config.tmp_prefix ^ "-" ^ (string_of_int n))
+ in
+ let fd =
+ open_out_gen
+ [ Open_wronly; Open_creat; Open_excl; Open_binary ]
+ 0o666
+ fn
+ in
+ fn, fd
+ with
+ Sys_error m ->
+ (* This does not look very intelligent, but it is the only chance
+ * to limit the number of trials.
+ *)
+ if n > 1000 then
+ failwith ("Cgi: Cannot create temporary file: " ^ m);
+ try_creation (n+1)
+ in
+ try_creation 0
+;;
+
+
+let dest_form_encoded_parameters_from_netstream s ~boundary config =
+ let parts = ref [] in
+ let options =
+ if List.mem Work_around_backslash_bug config.workarounds then
+ [ Mimestring.No_backslash_escaping ]
+ else
+ []
+ in
+
+ let create header =
+ (* CALLBACK for scan_multipart_body_from_netstream *)
+ let name, mime_type, filename = dest_parameter_header header options in
+ let p0 =
+ { arg_name = name;
+ arg_processing = Memory;
+ arg_buf_value = Buffer.create 80;
+ arg_mem_value = None;
+ arg_disk_value = Weak.create 1;
+ arg_file = None;
+ arg_fd = None;
+ arg_mimetype = mime_type;
+ arg_filename = filename;
+ arg_header = header;
+ }
+ in
+ let pr = config.how_to_process_arguments p0 in
+ let p = { p0 with arg_processing = pr } in
+ if pr = File then begin
+ let fn, fd = make_temporary_file config in
+ p.arg_file <- Some fn;
+ p.arg_fd <- Some fd;
+ p.arg_mem_value <- None;
+ end;
+ p
+ in
+
+ let add p s k n =
+ (* CALLBACK for scan_multipart_body_from_netstream *)
+ if (p.arg_processing = Automatic) &&
+ (Buffer.length (p.arg_buf_value) >= Netstream.block_size s) then begin
+ (* This is a LARGE argument *)
+ p.arg_processing <- File;
+ let fn, fd = make_temporary_file config in
+ p.arg_file <- Some fn;
+ p.arg_fd <- Some fd;
+ p.arg_mem_value <- None;
+ output_string fd (Buffer.contents p.arg_buf_value);
+ p.arg_buf_value <- Buffer.create 1;
+ end;
+
+ match p.arg_processing with
+ (Memory|Automatic) ->
+ Buffer.add_substring
+ p.arg_buf_value
+ (Netbuffer.unsafe_buffer (Netstream.window s))
+ k
+ n
+ | File ->
+ let fd = match p.arg_fd with Some fd -> fd | None -> assert false in
+ output
+ fd
+ (Netbuffer.unsafe_buffer (Netstream.window s))
+ k
+ n;
+ in
+
+ let stop p =
+ (* CALLBACK for scan_multipart_body_from_netstream *)
+ begin match p.arg_processing with
+ (Memory|Automatic) ->
+ p.arg_mem_value <- Some (Buffer.contents p.arg_buf_value);
+ p.arg_buf_value <- Buffer.create 1;
+ | File ->
+ let fd = match p.arg_fd with Some fd -> fd | None -> assert false in
+ close_out fd;
+ p.arg_mem_value <- None
+ end;
+ parts := p :: !parts
+ in
+
+ Mimestring.scan_multipart_body_from_netstream
+ s
+ boundary
+ create
+ add
+ stop;
+
+ List.rev !parts
+;;
+
+
+let getenv name =
+ try Sys.getenv name with Not_found -> "";;
+
+(* getenv:
+ * We use this getenv instead of Sys.getenv. The CGI specification does not
+ * say anything about what should happen if a certain environment variable
+ * is not set.
+ * Some servers initialize the environment variable to the empty string if
+ * it is not applicable, some servers do not set the variable at all. Because
+ * of this, unset variables are always reported as empty variables.
+ *
+ * This is especially a problem with QUERY_STRING.
+ *)
+
+let mk_simple_arg ~name v =
+ { arg_name = name;
+ arg_processing = Memory;
+ arg_buf_value = Buffer.create 1;
+ arg_mem_value = Some v;
+ arg_disk_value = Weak.create 0;
+ arg_file = None;
+ arg_fd = None;
+ arg_mimetype = "text/plain";
+ arg_filename = None;
+ arg_header = [];
+ }
+;;
+
+let mk_memory_arg ~name ?(mime = "text/plain") ?filename ?(header = []) v =
+ { arg_name = name;
+ arg_processing = Memory;
+ arg_buf_value = Buffer.create 1;
+ arg_mem_value = Some v;
+ arg_disk_value = Weak.create 0;
+ arg_file = None;
+ arg_fd = None;
+ arg_mimetype = mime;
+ arg_filename = filename;
+ arg_header = header;
+ }
+;;
+
+let mk_file_arg
+ ~name ?(mime = "text/plain") ?filename ?(header = []) v_filename =
+ let v_abs_filename =
+ if Filename.is_relative v_filename then
+ Filename.concat (Sys.getcwd()) v_filename
+ else
+ v_filename
+ in
+ { arg_name = name;
+ arg_processing = File;
+ arg_buf_value = Buffer.create 1;
+ arg_mem_value = None;
+ arg_disk_value = Weak.create 0;
+ arg_file = Some v_abs_filename;
+ arg_fd = None;
+ arg_mimetype = mime;
+ arg_filename = filename;
+ arg_header = header;
+ }
+;;
+
+
+let get_content_type config =
+ (* Get the environment variable CONTENT_TYPE; if necessary apply
+ * workarounds for browser bugs.
+ *)
+ let content_type = getenv "CONTENT_TYPE" in
+ let user_agent = getenv "HTTP_USER_AGENT" in
+ let eff_content_type =
+ if Str.string_match (Str.regexp ".*MSIE") user_agent 0 &&
+ List.mem Work_around_MSIE_Content_type_bug config.workarounds
+ then begin
+ (* Microsoft Internet Explorer: When used with SSL connections,
+ * this browser sometimes produces CONTENT_TYPEs like
+ * "multipart/form-data; boundary=..., multipart/form-data; boundary=..."
+ * Workaround: Throw away everything after ", ".
+ *)
+ if Str.string_match (Str.regexp "\\([^,]*boundary[^,]*\\), .*boundary")
+ content_type 0
+ then
+ Str.matched_group 1 content_type
+ else
+ content_type
+ end
+ else
+ content_type
+ in
+ eff_content_type
+;;
+
+
+let really_parse_args config =
+ let make_simple_arg (n,v) = mk_simple_arg n v in
+
+ match getenv "REQUEST_METHOD" with
+ ("GET"|"HEAD") ->
+ List.map
+ make_simple_arg
+ (dest_url_encoded_parameters(getenv "QUERY_STRING"))
+
+ | "POST" ->
+ let n =
+ try
+ int_of_string (getenv "CONTENT_LENGTH")
+ with
+ _ -> failwith "Cgi.parse_arguments"
+ in
+ if n > config.maximum_content_length then
+ raise Resources_exceeded;
+ begin
+ let mime_type, params =
+ Mimestring.scan_mime_type(get_content_type config) [] in
+ match mime_type with
+ "application/x-www-form-urlencoded" ->
+ let buf = String.create n in
+ really_input stdin buf 0 n;
+ List.map
+ make_simple_arg
+ (dest_url_encoded_parameters buf)
+ | "multipart/form-data" ->
+ let boundary =
+ try
+ List.assoc "boundary" params
+ with
+ Not_found ->
+ failwith "Cgi.parse_arguments"
+ in
+ (* -------------------------------------------------- DEBUG
+ let f = open_out "/tmp/cgiout" in
+ output_string f buf;
+ close_out f;
+ * --------------------------------------------------
+ *)
+ dest_form_encoded_parameters_from_netstream
+ (Netstream.create_from_channel stdin (Some n) 4096)
+ boundary
+ config
+ | _ ->
+ failwith ("Cgi.parse_arguments: unknown content-type " ^ mime_type)
+ end
+ | _ ->
+ failwith "Cgi.parse_arguments: unknown method"
+
+let parsed = ref None;; (* protected by lock/unlock *)
+
+let lock = ref (fun () -> ());;
+let unlock = ref (fun () -> ());;
+
+let init_mt new_lock new_unlock =
+ lock := new_lock;
+ unlock := new_unlock
+;;
+
+let protect f =
+ !lock();
+ try
+ let r = f() in
+ !unlock();
+ r
+ with
+ x ->
+ !unlock();
+ raise x
+;;
+
+let parse_arguments config =
+ protect
+ (fun () ->
+ match !parsed with
+ Some _ -> ()
+ | None ->
+ parsed := Some (List.map
+ (fun arg -> arg.arg_name, arg)
+ (really_parse_args config))
+ )
+;;
+
+let arguments () =
+ protect
+ (fun () ->
+ match !parsed with
+ Some plist -> plist
+ | None ->
+ failwith "Cgi.arguments"
+ )
+;;
+
+let set_arguments arglist =
+ protect
+ (fun () ->
+ parsed := Some (List.map
+ (fun arg -> arg.arg_name, arg)
+ arglist)
+ )
+;;
+
+let default_config =
+ { maximum_content_length = max_int;
+ how_to_process_arguments = (fun _ -> Memory);
+ tmp_directory = "/var/tmp";
+ tmp_prefix = "cgi-";
+ workarounds = [ Work_around_MSIE_Content_type_bug;
+ Work_around_backslash_bug;
+ ]
+ }
+;;
+
+let arg_value arg =
+ match arg.arg_mem_value with
+ None ->
+ begin
+ match Weak.get arg.arg_disk_value 0 with
+ None ->
+ begin
+ match arg.arg_file with
+ None ->
+ failwith "Cgi.arg_value: no value present"
+ | Some filename ->
+ let fd = open_in_bin filename in
+ try
+ let len = in_channel_length fd in
+ let s = String.create len in
+ really_input fd s 0 len;
+ Weak.set arg.arg_disk_value 0 (Some s);
+ close_in fd;
+ s
+ with
+ any -> close_in fd; raise any
+ end
+ | Some v -> v
+ end
+ | Some s ->
+ s
+;;
+
+let arg_name arg = arg.arg_name;;
+let arg_file arg = arg.arg_file;;
+let arg_mimetype arg = arg.arg_mimetype;;
+let arg_filename arg = arg.arg_filename;;
+let arg_header arg = arg.arg_header;;
+
+let cleanup () =
+ protect
+ (fun () ->
+ match !parsed with
+ None -> ()
+ | Some plist ->
+ List.iter
+ (fun (name, arg) ->
+ match arg.arg_file with
+ None -> ()
+ | Some filename ->
+ (* We do not complain if the file does not exist anymore. *)
+ if Sys.file_exists filename then
+ Sys.remove filename;
+ arg.arg_file <- None
+ )
+ plist
+ )
+;;
+
+let argument name = List.assoc name (arguments());;
+let argument_value name = arg_value (argument name);;
+
+module Operators = struct
+ let ( !% ) = argument
+ let ( !$ ) = argument_value
+end;;
+
+
+let parse_args() =
+ parse_arguments default_config;
+ List.map
+ (fun (name, arg) -> name, arg_value arg)
+ (arguments())
+;;
+
+let parse_args_with_mimetypes() =
+ parse_arguments default_config;
+ List.map
+ (fun (name, arg) -> name, arg_mimetype arg, arg_value arg)
+ (arguments())
+;;
+
+let header s =
+ let t =
+ match s with
+ "" -> "text/html"
+ | _ -> s
+ in
+ print_string ("Content-type: " ^ t ^ "\n\n");
+ flush stdout
+;;
+
+
+let this_url() =
+ "http://" ^ (getenv "SERVER_NAME") ^ (getenv "SCRIPT_NAME")
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.8 2000/06/25 22:34:43 gerd
+ * Added labels to arguments.
+ *
+ * Revision 1.7 2000/06/25 21:40:36 gerd
+ * Added printer.
+ *
+ * Revision 1.6 2000/06/25 21:15:48 gerd
+ * Checked thread-safety.
+ *
+ * Revision 1.5 2000/05/16 22:29:36 gerd
+ * Added support for two common file upload bugs.
+ *
+ * Revision 1.4 2000/04/15 16:47:27 gerd
+ * Last minor changes before releasing 0.6.
+ *
+ * Revision 1.3 2000/04/15 13:09:01 gerd
+ * Implemented uploads to temporary files.
+ *
+ * Revision 1.2 2000/03/02 01:15:30 gerd
+ * Updated.
+ *
+ * Revision 1.1 2000/02/25 15:21:12 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/cgi.mli b/helm/DEVEL/pxp/netstring/cgi.mli
new file mode 100644
index 000000000..8aea499d8
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/cgi.mli
@@ -0,0 +1,419 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* FOR SIMPLE CGI PROGRAMS:
+ *
+ * If you do not need all the features of the API below, the following may
+ * be enough:
+ *
+ * - At the beginning of the main program, call 'parse_argument' with
+ * either 'default_config' as argument or with a customized configuration.
+ * - Use 'argument_value(name)' to get the string value of the CGI parameter
+ * 'name'. If you like, you can also open the Cgi.Operators module and
+ * write '!$ name' instead. Here, !$ is a prefix operator equivalent to
+ * argument_value.
+ *
+ * If you do not change the default configuration, you do not need to
+ * worry about temporary files - there are not any.
+ *
+ * Most of the other functions defined below deal with file uploads, and
+ * are only useful for that.
+ *)
+
+
+(**********************************************************************)
+(* CGI functions *)
+(**********************************************************************)
+
+(* First, the general interface to the CGI argument parser. *)
+
+exception Resources_exceeded
+
+type argument
+
+type argument_processing =
+ Memory (* Keep the value of the argument in memory *)
+ | File (* Store the value of the argument into a temporary file *)
+ | Automatic (* Store only large arguments into files. An argument
+ * value is large if it is longer than about one block (4K).
+ * This is not an exact definition.
+ *)
+
+type workaround =
+ Work_around_MSIE_Content_type_bug
+ (* There is a bug in MSIE I observed together with SSL connections.
+ * The CONTENT_TYPE passed to the server has sometimes the wrong
+ * format. This option enables a workaround if the user agent string
+ * contains the word "MSIE".
+ *)
+ | Work_around_backslash_bug
+ (* There is a bug in many browsers: The backslash character is not
+ * handled as an escaping character in MIME headers. Because DOS-
+ * based systems use the backslash regularly in filenames, this bug
+ * matters.
+ * This option changes the interpretation of backslashes such that
+ * these are handled as normal characters. I do not know any browser
+ * that is not affected by this bug, so there is no check on
+ * the user agent string.
+ *)
+
+
+type config =
+ { maximum_content_length : int;
+ (* The maximum CONTENT_LENGTH. Bigger requests trigger an
+ * Resources_exceeded exception. This feature can be used
+ * to detect primitive denial-of-service attacks.
+ *)
+ how_to_process_arguments : argument -> argument_processing;
+ (* After the beginning of an argument has been decoded, the
+ * type of processing is decided by invoking this function on
+ * the argument. Note that the passed argument is incomplete -
+ * it does not have a value. You can assume that name, filename,
+ * MIME type and the whole header are already known.
+ * - THIS CONFIGURATION PARAMETER ONLY AFFECTS ARGUMENTS
+ * "POST"ED FROM THE CLIENT IN FORM-ENCODED REPRESENTATION.
+ * All other transport methods can only handle the Memory
+ * processing type.
+ *)
+ tmp_directory : string;
+ (* The temporary directory to use for the temporary files. *)
+ tmp_prefix : string;
+ (* A prefix for temporary files. It is recommended that the prefix
+ * contains a part that is random or that depends on rapidly changing
+ * environment properties. For example, the process ID is a good
+ * candidate, or the current system time. It is not required that
+ * the prefix is unique; there is a fail-safe algorithm that
+ * computes a unique file name from the prefix, even if several
+ * CGI programs run concurrently.
+ *)
+ workarounds : workaround list;
+ (* Specifies which workarounds should be enabled. *)
+ }
+
+val parse_arguments : config -> unit
+val arguments : unit -> (string * argument) list
+ (* - let () = parse_arguments config:
+ * Decodes the CGI arguments. 'config' specifies limits and processing
+ * hints; you can simply pass default_config (see below).
+ *
+ * - let arglist = get_arguments():
+ * The function returns a list with (name, arg) pairs. The name is
+ * passed back as string while the value is returned as opaque type
+ * 'argument'. Below accessor functions are defined. These functions
+ * require that parse_arguments was invoked before.
+ *
+ * Note 1: You can invoke 'parse_arguments' several times, but only
+ * the first time the arguments are read in. If you call the function
+ * again, it does nothing (even if the config changes). This is also
+ * true if 'parse_arguments' has been invoked after 'set_arguments'.
+ *
+ * Note 2: It is not guaranteed that stdin has been read until EOF.
+ * Only CONTENT_LENGTH bytes are read from stdin (following the CGI spec).
+ *
+ * Note 3: If arguments are processed in File or Automatic mode, the
+ * caller of 'parse_arguments' is responsible for deleting the files
+ * after use. You may consider to apply the at_exit function of the
+ * core library for this purpose. See also 'cleanup' below.
+ *)
+
+val set_arguments : argument list -> unit
+ (* Alternatively, you can set the arguments to use. This overrides any
+ * previously parsed set of arguments, and also any following parsing.
+ * - Intended for debugging, and to make it possible to replace the
+ * CGI parser by a different one while retaining this API.
+ *)
+
+val default_config : config
+ (* maximum_content_length = maxint
+ * how_to_process_arguments = "use always Memory"
+ * tmp_directory = "/var/tmp"
+ * tmp_prefix = "cgi"
+ * workarounds = [ Work_around_MSIE_content_type_bug;
+ * Work_around_backslash_bug;
+ * ]
+ *
+ * Note 1: On some Unixes, a special file system is used for /tmp that
+ * stores the files into the virtual memory (main memory or swap area).
+ * Because of this, /var/tmp is preferred as default.
+ *
+ * Note 2: Filename.temp_file is not used because it depends on
+ * environment variables which are usually not set in a CGI environment.
+ *)
+
+val arg_name : argument -> string
+val arg_value : argument -> string
+val arg_file : argument -> string option
+val arg_mimetype : argument -> string
+val arg_filename : argument -> string option
+val arg_header : argument -> (string * string) list
+ (* The accessor functions that return several aspects of arguments.
+ * arg_name: returns the name of the argument
+ * arg_value: returns the value of the argument. If the value is stored
+ * in a temporary file, the contents of this file are returned, i.e.
+ * the file is loaded. This may have some consequences:
+ * (1) The function may fail because of I/O errors.
+ * (2) The function may be very slow, especially if the file is
+ * non-local.
+ * (3) If the value is bigger than Sys.max_string_length, the function
+ * raises the exception Resources_exceeded. On 32 bit architectures,
+ * strings are limited to 16 MB.
+ * Note that loaded values are put into weak arrays. This makes it
+ * possible that subsequent calls of 'arg_value' on the same argument
+ * can avoid loading the value again, and that unused values will
+ * nevertheless be collected by the GC.
+ * arg_file: returns 'Some filename' if the value resides in a temporary
+ * file, and 'filename' is the absolute path of this file. If the
+ * value is only available in memory, None is returned.
+ * arg_mimetype: returns the MIME type of the argument. Note that the
+ * default MIME type is "text/plain", and that the default is returned
+ * if the MIME type is not available.
+ * arg_filename: returns 'Some filename' if the argument is associated
+ * with a certain filename (e.g. from a file upload); otherwise None
+ * arg_header: returns pairs (name,value) containing the complete header
+ * of the argument. If the transmission protocol does not specify
+ * a header, the empty list is passed back.
+ *)
+
+val mk_simple_arg : name:string -> string -> argument
+ (* mk_simple_arg name value:
+ * Creates a simple argument with only name, and a value passed by string.
+ * The MIME type is "text/plain".
+ *)
+
+val mk_memory_arg
+ : name:string -> ?mime:string -> ?filename:string ->
+ ?header:((string * string) list) -> string -> argument
+ (* mk_memory_arg name mimetype filename header value:
+ * Creates an argument whose value is kept in memory.
+ *
+ * Note: The signature of this function changed in release 0.8.
+ *)
+
+val mk_file_arg
+ : name:string -> ?mime:string -> ?filename:string ->
+ ?header:((string * string) list) -> string -> argument
+ (* mk_file_arg name mimetype filename header value_filename:
+ * Creates an argument whose value is stored in the file
+ * 'value_filename'. If this file name is not absolute, it is interpreted
+ * relative to the directory returned by Sys.getcwd() - this might not
+ * be what you want with respect to mount points and symlinks (and it
+ * depends on the operating system as getcwd is only POSIX.1). The
+ * file name is turned into an absolute name immediately, and the
+ * function arg_file returns the rewritten name.
+ *
+ * Note: The signature of this function changed in release 0.8.
+ *)
+
+
+val cleanup : unit -> unit
+ (* Removes all temporary files that occur in the current set of arguments
+ * (as returned by 'arguments()').
+ *)
+
+
+(* Convenience functions: *)
+
+val argument : string -> argument
+ (* let argument name = List.assoc name (arguments()) -- i.e. returns
+ * the argument with the passed name. Of course, this function expects
+ * that 'parse_arguments' was called before.
+ *)
+
+val argument_value : string -> string
+ (* let argument_value name = arg_value(argument name) -- i.e. returns
+ * the value of the argument.
+ * See also Operators.( !$ ) below.
+ *)
+
+(* For toploop printers: *)
+
+val print_argument : argument -> unit
+
+
+(* Now, the compatibility functions. *)
+
+val parse_args : unit -> (string * string) list
+ (* Decodes the arguments of the CGI and returns them as an association list
+ * Works whatever the method is (GET or POST)
+ *)
+
+val parse_args_with_mimetypes : unit -> (string * string * string) list
+ (* Like parse_args, but returns also the MIME type.
+ * The triples contain (name, mime_type, value).
+ * If an encoding was chosen that does not transfer the MIME type,
+ * "text/plain" is returned.
+ *
+ * THIS FUNCTION SHOULD BE CONSIDERED AS DEPRECATED.
+ * It was included in netstring-0.4, but most people want not only
+ * the MIME type. parse_arguments should be used instead.
+ *)
+
+val header : string -> unit
+ (* Prints the content-type header.
+ * the argument is the MIME type (default value is "text/html" if the
+ * argument is the empty string)
+ *)
+
+val this_url : unit -> string
+ (* Returns the address of the CGI *)
+
+(**********************************************************************)
+(* The Operators module *)
+(**********************************************************************)
+
+(* If you open the Operators module, you can write
+ * !% "name" instead of argument "name", and
+ * !$ "name" instead of argument_value "name"
+ *)
+
+module Operators : sig
+ val ( !% ) : string -> argument
+ (* same as 'argument' above *)
+ val ( !$ ) : string -> string
+ (* same as 'argument_value' above *)
+end
+
+(**********************************************************************)
+(* Low-level functions *)
+(**********************************************************************)
+
+(* Encoding/Decoding within URLs:
+ *
+ * The following two functions perform the '%'-substitution for
+ * characters that may otherwise be interpreted as metacharacters.
+ *
+ * See also the Netencoding module. This interface contains these functions
+ * to keep the compatibility with the old Cgi module.
+ *)
+
+val decode : string -> string
+val encode : string -> string
+
+(* URL-encoded parameters:
+ *
+ * The following two functions create and analyze URL-encoded parameters.
+ * Format: name1=val1&name2=val2&...
+ *)
+
+val mk_url_encoded_parameters : (string * string) list -> string
+ (* The argument is a list of (name,value) pairs. The result is the
+ * single URL-encoded parameter string.
+ *)
+
+val dest_url_encoded_parameters : string -> (string * string) list
+ (* The argument is the URL-encoded parameter string. The result is
+ * the corresponding list of (name,value) pairs.
+ * Note: Whitespace within the parameter string is ignored.
+ * If there is a format error, the function fails.
+ *)
+
+(* Form-encoded parameters:
+ *
+ * According to: RFCs 2388, 2183, 2045, 2046
+ *
+ * General note: This is a simple API to encode/decode form-encoded parameters.
+ * Especially, it is not possible to pass the header of the parts through
+ * this API.
+ *)
+
+val mk_form_encoded_parameters : (string * string * string) list ->
+ (string * string)
+ (* The argument is a list of (name,mimetype,value) triples.
+ * The result is (parstr, boundary), where 'parstr' is the
+ * single form-encoded parameter string, and 'boundary' is the
+ * boundary to separate the message parts.
+ *
+ * THIS FUNCTION IS CURRENTLY NOT IMPLEMENTED!
+ *)
+
+val dest_form_encoded_parameters : string -> boundary:string -> config ->
+ argument list
+ (* The first argument is the form-encoded parameter string.
+ * The second argument is the boundary (extracted from the mime type).
+ * Third argument: Only the workarounds component is used.
+ * The result is
+ * the corresponding list of arguments (all in memory).
+ * If there is a format error, the function fails.
+ * Note: embedded multipart/mixed types are returned as they are,
+ * and are not recursively decoded.
+ * Note: The content-transfer-encodings "7bit", "8bit", "binary",
+ * "base64", and "quoted-printable" are supported.
+ * Note: Parameter names which include spaces or non-alphanumeric
+ * characters may be problematic (the rules of RFC 2047 are NOT applied).
+ * Note: The returned MIME type is not normalized.
+ *)
+
+val dest_form_encoded_parameters_from_netstream
+ : Netstream.t -> boundary:string -> config -> argument list
+ (* let arglist = dest_form_encoded_parameters_from_netstream s b c:
+ * Reads the form-encoded parameters from netstream s. The boundary
+ * is passed in b, and the configuration in c.
+ * A list of arguments is returned.
+ *
+ * See also dest_form_encoded_parameters.
+ *
+ * Restriction: In contrast to dest_form_encoded_parameters, this
+ * function is not able to handle the content-transfer-encodings
+ * "base64" and "quoted-printable". (This is not really a restriction
+ * because no browser uses these encodings in conjunction with HTTP.
+ * This is different if mail transport is chosen. - The reason for
+ * this restriction is that there are currently no stream functions
+ * for decoding.)
+ *)
+
+(* Private functions: *)
+
+val init_mt : (unit -> unit) -> (unit -> unit) -> unit
+
+
+(**********************************************************************)
+(* Compatibility with CGI library by J.-C. Filliatre *)
+(**********************************************************************)
+
+(* The following functions are compatible with J.-C. Filliatre's CGI
+ * library:
+ *
+ * parse_args, header, this_url, decode, encode.
+ *
+ * Note that the new implementation of parse_args can be safely invoked
+ * several times.
+ *
+ * Since release 0.8, Netstring's CGI implementation is again thread-safe.
+ *)
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.8 2000/06/25 22:34:43 gerd
+ * Added labels to arguments.
+ *
+ * Revision 1.7 2000/06/25 21:40:36 gerd
+ * Added printer.
+ *
+ * Revision 1.6 2000/06/25 21:15:48 gerd
+ * Checked thread-safety.
+ *
+ * Revision 1.5 2000/05/16 22:28:13 gerd
+ * New "workarounds" config component.
+ *
+ * Revision 1.4 2000/04/15 16:47:27 gerd
+ * Last minor changes before releasing 0.6.
+ *
+ * Revision 1.3 2000/04/15 13:09:01 gerd
+ * Implemented uploads to temporary files.
+ *
+ * Revision 1.2 2000/03/02 01:15:30 gerd
+ * Updated.
+ *
+ * Revision 1.1 2000/02/25 15:21:12 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/compat-base64/META b/helm/DEVEL/pxp/netstring/compat-base64/META
new file mode 100644
index 000000000..a5c003ea4
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/compat-base64/META
@@ -0,0 +1,3 @@
+version = "0.5"
+requires = "netstring"
+description = "Compatibility with base64"
diff --git a/helm/DEVEL/pxp/netstring/compat-cgi/META b/helm/DEVEL/pxp/netstring/compat-cgi/META
new file mode 100644
index 000000000..2294921a0
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/compat-cgi/META
@@ -0,0 +1,3 @@
+version = "0.5"
+requires = "netstring"
+description = "Compatibility with cgi"
diff --git a/helm/DEVEL/pxp/netstring/depend b/helm/DEVEL/pxp/netstring/depend
new file mode 100644
index 000000000..5991264c6
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/depend
@@ -0,0 +1,36 @@
+base64.cmo: netencoding.cmi base64.cmi
+base64.cmx: netencoding.cmx base64.cmi
+cgi.cmo: mimestring.cmi netbuffer.cmi netencoding.cmi netstream.cmi cgi.cmi
+cgi.cmx: mimestring.cmx netbuffer.cmx netencoding.cmx netstream.cmx cgi.cmi
+mimestring.cmo: netbuffer.cmi netencoding.cmi netstream.cmi netstring_str.cmi \
+ mimestring.cmi
+mimestring.cmx: netbuffer.cmx netencoding.cmx netstream.cmx netstring_str.cmx \
+ mimestring.cmi
+netbuffer.cmo: netbuffer.cmi
+netbuffer.cmx: netbuffer.cmi
+netconversion.cmo: netmappings.cmi netconversion.cmi
+netconversion.cmx: netmappings.cmx netconversion.cmi
+netencoding.cmo: netstring_str.cmi netencoding.cmi
+netencoding.cmx: netstring_str.cmx netencoding.cmi
+nethtml.cmo: nethtml.cmi
+nethtml.cmx: nethtml.cmi
+netmappings.cmo: netmappings.cmi
+netmappings.cmx: netmappings.cmi
+netmappings_iso.cmo: netmappings.cmi
+netmappings_iso.cmx: netmappings.cmx
+netmappings_other.cmo: netmappings.cmi
+netmappings_other.cmx: netmappings.cmx
+netstream.cmo: netbuffer.cmi netstream.cmi
+netstream.cmx: netbuffer.cmx netstream.cmi
+netstring_mt.cmo: cgi.cmi netmappings.cmi netstring_str.cmi netstring_mt.cmi
+netstring_mt.cmx: cgi.cmx netmappings.cmx netstring_str.cmx netstring_mt.cmi
+netstring_str.cmo: netstring_str.cmi
+netstring_str.cmx: netstring_str.cmi
+netstring_top.cmo: netstring_top.cmi
+netstring_top.cmx: netstring_top.cmi
+neturl.cmo: netencoding.cmi neturl.cmi
+neturl.cmx: netencoding.cmx neturl.cmi
+cgi.cmi: netstream.cmi
+mimestring.cmi: netstream.cmi
+netmappings.cmi: netconversion.cmi
+netstream.cmi: netbuffer.cmi
diff --git a/helm/DEVEL/pxp/netstring/depend.pkg b/helm/DEVEL/pxp/netstring/depend.pkg
new file mode 100644
index 000000000..e69de29bb
diff --git a/helm/DEVEL/pxp/netstring/doc/ABOUT-FINDLIB b/helm/DEVEL/pxp/netstring/doc/ABOUT-FINDLIB
new file mode 100644
index 000000000..d942e2786
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/doc/ABOUT-FINDLIB
@@ -0,0 +1,52 @@
+******************************************************************************
+ABOUT-FINDLIB - Package manager for O'Caml
+******************************************************************************
+
+
+==============================================================================
+Abstract
+==============================================================================
+
+The findlib library provides a scheme to manage reusable software components
+(packages), and includes tools that support this scheme. Packages are
+collections of OCaml modules for which metainformation can be stored. The
+packages are kept in the filesystem hierarchy, but with strict directory
+structure. The library contains functions to look the directory up that stores
+a package, to query metainformation about a package, and to retrieve dependency
+information about multiple packages. There is also a tool that allows the user
+to enter queries on the command-line. In order to simplify compilation and
+linkage, there are new frontends of the various OCaml compilers that can
+directly deal with packages.
+
+Together with the packages metainformation is stored. This includes a version
+string, the archives the package consists of, and additional linker options.
+Packages can also be dependent on other packages. There is a query which finds
+out all predecessors of a list of packages and sorts them topologically. The
+new compiler frontends do this implicitly.
+
+Metainformation can be conditional, i.e. depend on a set of predicates. This is
+mainly used to be able to react on certain properties of the environment, such
+as if the bytecode or the native compiler is invoked, if the application is
+multi-threaded, and a few more. If the new compiler frontends are used, most
+predicates are found out automatically.
+
+There is special support for scripts. A new directive, "#require", loads
+packages into scripts. Of course, this works only with newly created toploops
+which include the findlib library.
+
+==============================================================================
+Where to get findlib
+==============================================================================
+
+The manual of findlib is available online [1]. You can download findlib here
+[2].
+
+
+--------------------------
+
+[1] see http://www.ocaml-programming.de/packages/documentation/findlib/
+
+[2] see http://www.ocaml-programming.de/packages/findlib-0.3.1.tar.gz
+
+
+
diff --git a/helm/DEVEL/pxp/netstring/doc/ABOUT-FINDLIB.xml b/helm/DEVEL/pxp/netstring/doc/ABOUT-FINDLIB.xml
new file mode 100644
index 000000000..d1dc5b04e
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/doc/ABOUT-FINDLIB.xml
@@ -0,0 +1,61 @@
+
+
+%common;
+
+findlib">
+Findlib">
+
+]>
+
+
+
+ Abstract
+
+The &f; library provides a scheme to manage reusable software
+components (packages), and includes tools that support this
+scheme. Packages are collections of OCaml modules for which
+metainformation can be stored. The packages are kept in the filesystem
+hierarchy, but with strict directory structure. The library contains
+functions to look the directory up that stores a package, to query
+metainformation about a package, and to retrieve dependency
+information about multiple packages. There is also a tool that allows
+the user to enter queries on the command-line. In order to simplify
+compilation and linkage, there are new frontends of the various OCaml
+compilers that can directly deal with packages.
+
+
+
+Together with the packages metainformation is stored. This includes a
+version string, the archives the package consists of, and additional
+linker options. Packages can also be dependent on other
+packages. There is a query which finds out all predecessors of a list
+of packages and sorts them topologically. The new compiler frontends
+do this implicitly.
+
+
+
+Metainformation can be conditional, i.e. depend on a set of
+predicates. This is mainly used to be able to react on certain
+properties of the environment, such as if the bytecode or the native
+compiler is invoked, if the application is multi-threaded, and a few
+more. If the new compiler frontends are used, most predicates are
+found out automatically.
+
+
+
+There is special support for scripts. A new directive, "#require",
+loads packages into scripts. Of course, this works only with newly
+created toploops which include the &f; library.
+
+
+
+
+ Where to get findlib
+
+The manual of &f; is available online .
+You can download &f; here .
+
+
+
diff --git a/helm/DEVEL/pxp/netstring/doc/INSTALL b/helm/DEVEL/pxp/netstring/doc/INSTALL
new file mode 100644
index 000000000..cca39944b
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/doc/INSTALL
@@ -0,0 +1,128 @@
+******************************************************************************
+INSTALL - Netstring, string processing functions for the net
+******************************************************************************
+
+
+==============================================================================
+The "Netstring" package
+==============================================================================
+
+------------------------------------------------------------------------------
+Prerequisites
+------------------------------------------------------------------------------
+
+Netstring does not need any other packages besides the O'Caml core. Netstring
+needs at least O'Caml 3.00. The installation procedure defined in the Makefile
+requires findlib [1] to work [2].
+
+------------------------------------------------------------------------------
+Configuration
+------------------------------------------------------------------------------
+
+It is not necessary to configure "Netstring".
+
+------------------------------------------------------------------------------
+Compilation
+------------------------------------------------------------------------------
+
+The Makefile defines the following goals:
+
+- make all
+ compiles with the bytecode compiler and creates netstring.cma,
+ netstring_mt.cmo, netstring_top.cmo, netmappings_iso.cmo, and
+ netmappings_other.cmo
+
+- make opt
+ compiles with the native compiler and creates netstring.cmxa,
+ netstring_mt.cmx, netmappings_iso.cmx, and netmappings_other.cmx
+
+The archive netstring.cmx?a contains the functionality, and the two
+single-module files netmappings_iso.cm[ox] and netmappings_other.cm[ox] add
+configurations to the character set conversion module. These configurations are
+optional:
+
+- Netmappings_iso: Conversion tables for the character sets ISO-8859-2, -3,
+ -4, -5, -6, -7, -8, -9, -10, 13, -14, and -15.
+
+- Netmappings_other: Conversion tables for the character sets WINDOWS-1250,
+ -1251, -1252, -1253, -1254, -1255, -1256, -1257, -1258; code pages 037, 424,
+ 437, 500, 737, 775, 850, 852, 855, 856, 857, 860, 861, 862, 863, 864, 865,
+ 866, 869, 874, 875, 1006, 1026; JIS-0201; KOI8R; Macintosh Roman encoding;
+ Adobe Standard Encoding, Symbol Encoding, and Zapf Dingbats Encodings.
+
+Even without these configuration files, the conversion module is able to handle
+the encodings ISO-8859-1, US-ASCII, UTF-16, UTF-8, and the Java variant of
+UTF-8.
+
+The module Netstring_mt must be linked into multi-threaded applications;
+otherwise some mutexes remain uninitialized.
+
+The module Netstring_top loads several printers for abstract values (for
+toploops).
+
+------------------------------------------------------------------------------
+Installation
+------------------------------------------------------------------------------
+
+The Makefile defines the following goals:
+
+- make install
+ installs the bytecode archive, the interface definitions, and if present,
+ the native archive in the default location of findlib
+
+- make install-cgi
+ Installs a pseudo package "cgi" which is compatible with the old cgi
+ package. This has the effect that software searching the "cgi" package will
+ find the netstring package instead. This is recommended.
+
+- make install-base64
+ Installs a pseudo package "base64" which is compatible with the old base64
+ package. This has the effect that software searching the "base64" package
+ will find the netstring package instead. This is recommended.
+
+- make uninstall
+ removes the package
+
+- make uninstall-cgi
+ removes the "cgi" compatibility package
+
+- make uninstall-base64
+ removes the "base64" compatibility package
+
+------------------------------------------------------------------------------
+Linking netstring with findlib
+------------------------------------------------------------------------------
+
+The command
+
+ocamlfind ocamlc ... -package netstring ... -linkpkg ...
+
+links as much as possible code from netstring into your application: All
+conversion tables; when -thread is specified, the initialization code for
+multi-threaded programs; when a toploop is created, the code setting the value
+printers.
+
+The following predicates reduce the amount of linked code:
+
+- netstring_only_iso: Only the conversion tables for the ISO-8859 series of
+ character sets are linked.
+
+- netstring_minimum: No additional conversion tables are linked; only
+ ISO-8859-1 and the UTF encodings work.
+
+For example, the command may look like
+
+ocamlfind ocamlc ...
+ -package netstring -predicates netstring_only_iso ... -linkpkg ...
+
+to link only the ISO-8859 conversion tables.
+
+
+--------------------------
+
+[1] see http://www.ocaml-programming.de/packages/documentation/findlib/
+
+[2] Findlib is a package manager, see the file ABOUT-FINDLIB.
+
+
+
diff --git a/helm/DEVEL/pxp/netstring/doc/INSTALL.xml b/helm/DEVEL/pxp/netstring/doc/INSTALL.xml
new file mode 100644
index 000000000..b5b53eddc
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/doc/INSTALL.xml
@@ -0,0 +1,153 @@
+
+
+%common;
+
+Netstring">
+
+]>
+
+
+ The "Netstring" package
+ Prerequisites
+
+&m; does not need any other packages besides the O'Caml core. &m; needs
+at least O'Caml 3.00. The installation procedure defined in the Makefile
+requires findlib to
+workFindlib is a package manager, see the file
+ABOUT-FINDLIB. .
+
+
+
+ Configuration
+
+It is not necessary to configure "Netstring".
+
+
+
+ Compilation
+
+The Makefile defines the following goals:
+
+
+
+ make all
+ compiles with the bytecode compiler and creates netstring.cma,
+netstring_mt.cmo, netstring_top.cmo, netmappings_iso.cmo, and
+netmappings_other.cmo
+
+
+ make opt
+ compiles with the native compiler and creates netstring.cmxa,
+netstring_mt.cmx, netmappings_iso.cmx, and netmappings_other.cmx
+
+
+
+ The archive netstring.cmx?a contains the functionality, and the two
+single-module files netmappings_iso.cm[ox] and netmappings_other.cm[ox] add
+configurations to the character set conversion module. These configurations are
+optional:
+
+
+ Netmappings_iso: Conversion tables for the character sets
+ISO-8859-2, -3, -4, -5, -6, -7, -8, -9, -10, 13, -14, and -15.
+
+ Netmappings_other: Conversion tables for the character sets
+WINDOWS-1250, -1251, -1252, -1253, -1254, -1255, -1256, -1257, -1258;
+code pages 037, 424, 437, 500, 737, 775, 850, 852, 855, 856, 857, 860, 861,
+862, 863, 864, 865, 866, 869, 874, 875, 1006, 1026; JIS-0201; KOI8R; Macintosh
+Roman encoding; Adobe Standard Encoding, Symbol Encoding, and Zapf Dingbats
+Encodings.
+
+
+
+Even without these configuration files, the conversion module is able to
+handle the encodings ISO-8859-1, US-ASCII, UTF-16, UTF-8, and the Java variant
+of UTF-8.
+
+The module Netstring_mt must be linked into multi-threaded applications;
+otherwise some mutexes remain uninitialized.
+
+The module Netstring_top loads several printers for abstract values (for
+toploops).
+
+
+
+ Installation
+
+The Makefile defines the following goals:
+
+
+ make install
+ installs the bytecode archive, the interface definitions, and if
+present, the native archive in the default location of findlib
+
+
+
+
+ make install-cgi
+ Installs a pseudo package "cgi" which is compatible with the old
+cgi package. This has the effect that software searching the "cgi" package will
+find the netstring package instead. This is recommended.
+
+
+
+ make install-base64
Installs a pseudo package "base64"
+which is compatible with the old base64 package. This has the effect that
+software searching the "base64" package will find the netstring package
+instead. This is recommended.
+
+
+
+ make uninstall
+ removes the package
+
+
+
+ make uninstall-cgi
+ removes the "cgi" compatibility package
+
+
+
+ make uninstall-base64
+ removes the "base64" compatibility package
+
+
+
+
+
+
+ Linking netstring with findlib
+ The command
+
+ocamlfind ocamlc ... -package netstring ... -linkpkg ...
+
+links as much as possible code from netstring into your application: All
+conversion tables; when -thread is specified, the initialization code for
+multi-threaded programs; when a toploop is created, the code setting the value
+printers.
+
+The following predicates reduce the amount of linked code:
+
+
+ netstring_only_iso: Only the conversion tables for the ISO-8859
+series of character sets are linked.
+
+ netstring_minimum: No additional conversion tables are linked;
+only ISO-8859-1 and the UTF encodings work.
+
+
+
+For example, the command may look like
+
+
+ocamlfind ocamlc ...
+ -package netstring -predicates netstring_only_iso ... -linkpkg ...
+
+
+to link only the ISO-8859 conversion tables.
+
+
+
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/netstring/doc/Makefile b/helm/DEVEL/pxp/netstring/doc/Makefile
new file mode 100644
index 000000000..7f8450be3
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/doc/Makefile
@@ -0,0 +1,22 @@
+.PHONY: all
+all: README INSTALL ABOUT-FINDLIB
+
+README: README.xml common.xml config.xml
+ readme -text README.xml >README
+
+INSTALL: INSTALL.xml common.xml config.xml
+ readme -text INSTALL.xml >INSTALL
+
+ABOUT-FINDLIB: ABOUT-FINDLIB.xml common.xml config.xml
+ readme -text ABOUT-FINDLIB.xml >ABOUT-FINDLIB
+
+.PHONY: clean
+clean:
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~
+
diff --git a/helm/DEVEL/pxp/netstring/doc/README b/helm/DEVEL/pxp/netstring/doc/README
new file mode 100644
index 000000000..b590416be
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/doc/README
@@ -0,0 +1,212 @@
+******************************************************************************
+README - Netstring, string processing functions for the net
+******************************************************************************
+
+
+==============================================================================
+Abstract
+==============================================================================
+
+Netstring is a collection of string processing functions that are useful in
+conjunction with Internet messages and protocols. In particular, it contains
+functions for the following purposes:
+
+- Parsing MIME messages
+
+- Several encoding/decoding functions (Base 64, Quoted Printable, Q,
+ URL-encoding)
+
+- A new implementation of the CGI interface that allows users to upload files
+
+- A simple HTML parser
+
+- URL parsing, printing and processing
+
+- Conversion between character sets
+
+==============================================================================
+Download
+==============================================================================
+
+You can download Netstring as gzip'ed tarball [1].
+
+==============================================================================
+Documentation
+==============================================================================
+
+Sorry, there is no manual. The mli files describe each function in detail.
+Furthermore, the following additional information may be useful.
+
+------------------------------------------------------------------------------
+New CGI implementation
+------------------------------------------------------------------------------
+
+For a long time, the CGI implementation by Jean-Christophe Filliatre has been
+the only freely available module that implemented the CGI interface (it also
+based on code by Daniel de Rauglaudre). It worked well, but it did not support
+file uploads because this requires a parser for MIME messages.
+
+The main goal of Netstring is to realize such uploads, and because of this it
+contains an almost complete parser for MIME messages.
+
+The new CGI implementation provides the same functions than the old one, and
+some extensions. If you call Cgi.parse_args(), you get the CGI parameters as
+before, but as already explained this works also if the parameters are
+encaspulated as MIME message. In the HTML code, you can select the MIME format
+by using
+
+
+
+
+- this "enctype" attribute forces the browser to send the form parameters as
+multipart MIME message (Note: You can neither send the parameters of a
+conventional hyperlink as MIME message nor the form parameters if the "method"
+is "get"). In many browsers only this particular encoding enables the file
+upload elements, you cannot perform file uploads with other encodings.
+
+As MIME messages can transport MIME types, filename, and other additional
+properties, it is also possible to get these using the enhanced interface.
+After calling
+
+Cgi.parse_arguments config
+
+you can get all available information about a certain parameter by invoking
+
+let param = Cgi.argument "name"
+
+- where "param" has the type "argument". There are several accessor functions
+to extract the various aspects of arguments (name, filename, value by string,
+value by temporary file, MIME type, MIME header) from "argument" values.
+
+------------------------------------------------------------------------------
+Base64, and other encodings
+------------------------------------------------------------------------------
+
+Netstring is also the successor of the Base64 package. It provides a Base64
+compatible interface, and an enhanced API. The latter is contained in the
+Netencoding module which also offers implementations of the "quoted printable",
+"Q", and "URL" encodings. Please see netencoding.mli for details.
+
+------------------------------------------------------------------------------
+The MIME scanner functions
+------------------------------------------------------------------------------
+
+In the Mimestring module you can find several functions scanning parts of MIME
+messages. These functions already cover most aspects of MIME messages: Scanning
+of headers, analysis of structured header entries, and scanning of multipart
+bodies. Of course, a full-featured MIME scanner would require some more
+functions, especially concrete parsers for frequent structures (mail addresses
+or date strings).
+
+Please see the file mimestring.mli for details.
+
+------------------------------------------------------------------------------
+The HTML parser
+------------------------------------------------------------------------------
+
+The HTML parser should be able to read every HTML file; whether it is correct
+or not. The parser tries to recover from parsing errors as much as possible.
+
+The parser returns the HTML term as conventional recursive value (i.e. no
+object-oriented design).
+
+The parser depends a bit on knowledge about the HTML version; mainly because it
+needs to know the tags that are always empty. It may be necessary that you must
+adjust this configuration before the parser works well enough for your purpose.
+
+Please see the Nethtml module for details.
+
+------------------------------------------------------------------------------
+The abstract data type URL
+------------------------------------------------------------------------------
+
+The module Neturl contains support for URL parsing and processing. The
+implementation follows strictly the standards RFC 1738 and RFC 1808. URLs can
+be parsed, and several accessor functions allow the user to get components of
+parsed URLs, or to change components. Modifying URLs is safe; it is impossible
+to create a URL that does not have a valid string representation.
+
+Both absolute and relative URLs are supported. It is possible to apply a
+relative URL to a base URL in order to get the corresponding absolute URL.
+
+------------------------------------------------------------------------------
+Conversion between character sets and encodings
+------------------------------------------------------------------------------
+
+The module Netconversion converts strings from one characters set to another.
+It is Unicode-based, and there are conversion tables for more than 50
+encodings.
+
+==============================================================================
+Author, Copying
+==============================================================================
+
+Netstring has been written by Gerd Stolpmann [2]. You may copy it as you like,
+you may use it even for commercial purposes as long as the license conditions
+are respected, see the file LICENSE coming with the distribution. It allows
+almost everything.
+
+==============================================================================
+History
+==============================================================================
+
+- Changed in 0.9.3: Fixed a bug in the "install" rule of the Makefile.
+
+- Changed in 0.9.2: New format for the conversion tables which are now much
+ smaller.
+
+- Changed in 0.9.1: Updated the Makefile such that (native-code) compilation
+ of netmappings.ml becomes possible.
+
+- Changed in 0.9: Extended Mimestring module: It can now process RFC-2047
+ messages.
+ New Netconversion module which converts strings between character encodings.
+
+- Changed in 0.8.1: Added the component url_accepts_8bits to
+ Neturl.url_syntax. This helps processing URLs which intentionally contain
+ bytes >= 0x80.
+ Fixed a bug: Every URL containing a 'j' was malformed!
+
+- Changed in 0.8: Added the module Neturl which provides the abstract data
+ types of URLs.
+ The whole package is now thread-safe.
+ Added printers for the various opaque data types.
+ Added labels to function arguments where appropriate. The following
+ functions changed their signatures significantly: Cgi.mk_memory_arg,
+ Cgi.mk_file_arg.
+
+- Changed in 0.7: Added workarounds for frequent browser bugs. Some functions
+ take now an additional argument specifying which workarounds are enabled.
+
+- Changed in 0.6.1: Updated URLs in documentation.
+
+- Changed in 0.6: The file upload has been re-implemented to support large
+ files; the file is now read block by block and the blocks can be collected
+ either in memory or in a temporary file.
+ Furthermore, the CGI API has been revised. There is now an opaque data type
+ "argument" that hides all implementation details and that is extensible (if
+ necessary, it is possible to add features without breaking the interface
+ again).
+ The CGI argument parser can be configured; currently it is possible to limit
+ the size of uploaded data, to control by which method arguments are
+ processed, and to set up where temporary files are created.
+ The other parts of the package that have nothing to do with CGI remain
+ unchanged.
+
+- Changed in 0.5.1: A mistake in the documentation has been corrected.
+
+- Initial version 0.5: The Netstring package wants to be the successor of the
+ Base64-0.2 and the Cgi-0.3 packages. The sum of both numbers is 0.5, and
+ because of this, the first version number is 0.5.
+
+
+--------------------------
+
+[1] see http://www.ocaml-programming.de/packages/netstring-0.9.2.tar.gz
+
+[2] see mailto:gerd@gerd-stolpmann.de
+
+
+
diff --git a/helm/DEVEL/pxp/netstring/doc/README.xml b/helm/DEVEL/pxp/netstring/doc/README.xml
new file mode 100644
index 000000000..bbf473e99
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/doc/README.xml
@@ -0,0 +1,244 @@
+
+
+%common;
+
+
+up'>
+
+
+%config;
+
+]>
+
+
+
+ Abstract
+
+Netstring is a collection of string processing functions that are
+useful in conjunction with Internet messages and protocols. In particular,
+it contains functions for the following purposes:
+
+
+ Parsing MIME messages
+
+ Several encoding/decoding functions (Base 64, Quoted Printable, Q, URL-encoding)
+
+ A new implementation of the CGI interface that allows users to upload files
+
+ A simple HTML parser
+
+ URL parsing, printing and processing
+
+ Conversion between character sets
+
+
+
+
+
+
+ Download
+
+You can download Netstring as gzip'ed tarball .
+
+
+
+
+
+ Documentation
+
+Sorry, there is no manual. The mli files describe each function in
+detail. Furthermore, the following additional information may be useful.
+
+
+ New CGI implementation
+
+ For a long time, the CGI implementation by Jean-Christophe Filliatre
+has been the only freely available module that implemented the CGI interface
+(it also based on code by Daniel de Rauglaudre). It worked well, but it did not
+support file uploads because this requires a parser for MIME messages.
+ The main goal of Netstring is to realize such uploads, and because of
+this it contains an almost complete parser for MIME messages.
+ The new CGI implementation provides the same functions than the old
+one, and some extensions. If you call Cgi.parse_args(), you get the CGI
+parameters as before, but as already explained this works also if the
+parameters are encaspulated as MIME message. In the HTML code, you can select
+the MIME format by using
+
+...
+
+]]>
+
+- this "enctype" attribute forces the browser to send the form parameters
+as multipart MIME message (Note: You can neither send the parameters of a
+conventional hyperlink as MIME message nor the form parameters if the
+"method" is "get"). In many browsers only this particular encoding enables
+the file upload elements, you cannot perform file uploads with other encodings.
+
+
+ As MIME messages can transport MIME types, filename, and other
+additional properties, it is also possible to get these using the enhanced
+interface. After calling
+
+you can get all available information about a certain parameter by invoking
+
+- where "param" has the type "argument". There are several accessor functions
+to extract the various aspects of arguments (name, filename, value by string,
+value by temporary file, MIME type, MIME header) from "argument" values.
+
+
+
+
+
+
+ Base64, and other encodings
+
+ Netstring is also the successor of the Base64 package. It provides a
+Base64 compatible interface, and an enhanced API. The latter is contained in
+the Netencoding module which also offers implementations of the "quoted
+printable", "Q", and "URL" encodings. Please see netencoding.mli for
+details.
+
+
+
+
+
+ The MIME scanner functions
+
+ In the Mimestring module you can find several functions scanning parts
+of MIME messages. These functions already cover most aspects of MIME messages:
+Scanning of headers, analysis of structured header entries, and scanning of
+multipart bodies. Of course, a full-featured MIME scanner would require some
+more functions, especially concrete parsers for frequent structures
+(mail addresses or date strings).
+
+ Please see the file mimestring.mli for details.
+
+
+
+
+ The HTML parser
+
+ The HTML parser should be able to read every HTML file; whether it is
+correct or not. The parser tries to recover from parsing errors as much as
+possible.
+
+ The parser returns the HTML term as conventional recursive value
+(i.e. no object-oriented design).
+ The parser depends a bit on knowledge about the HTML version; mainly
+because it needs to know the tags that are always empty. It may be necessary
+that you must adjust this configuration before the parser works well enough for
+your purpose.
+
+ Please see the Nethtml module for details.
+
+
+
+ The abstract data type URL
+ The module Neturl contains support for URL parsing and processing.
+The implementation follows strictly the standards RFC 1738 and RFC 1808.
+URLs can be parsed, and several accessor functions allow the user to
+get components of parsed URLs, or to change components. Modifying URLs
+is safe; it is impossible to create a URL that does not have a valid
+string representation.
+
+ Both absolute and relative URLs are supported. It is possible to
+apply a relative URL to a base URL in order to get the corresponding
+absolute URL.
+
+
+
+ Conversion between character sets and encodings
+ The module Netconversion converts strings from one characters set
+to another. It is Unicode-based, and there are conversion tables for more than
+50 encodings.
+
+
+
+
+
+ Author, Copying
+
+Netstring has been written by &person.gps;. You may copy it as you like,
+you may use it even for commercial purposes as long as the license conditions
+are respected, see the file LICENSE coming with the distribution. It allows
+almost everything.
+
+
+
+
+ History
+
+
+ Changed in 0.9.3: Fixed a bug in the "install" rule of
+the Makefile.
+
+ Changed in 0.9.2: New format for the conversion tables
+which are now much smaller.
+
+ Changed in 0.9.1: Updated the Makefile such that
+(native-code) compilation of netmappings.ml becomes possible.
+
+
+ Changed in 0.9: Extended Mimestring module: It can
+now process RFC-2047 messages.
+ New Netconversion module which converts strings between character
+encodings.
+
+ Changed in 0.8.1: Added the component
+url_accepts_8bits to Neturl.url_syntax. This helps processing URLs which
+intentionally contain bytes >= 0x80.
+ Fixed a bug: Every URL containing a 'j' was malformed!
+
+ Changed in 0.8: Added the module Neturl which
+provides the abstract data types of URLs.
+ The whole package is now thread-safe.
+ Added printers for the various opaque data types.
+ Added labels to function arguments where appropriate. The
+following functions changed their signatures significantly:
+Cgi.mk_memory_arg, Cgi.mk_file_arg.
+
+ Changed in 0.7: Added workarounds for frequent
+browser bugs. Some functions take now an additional argument
+specifying which workarounds are enabled.
+
+ Changed in 0.6.1: Updated URLs in documentation.
+
+
+ Changed in 0.6: The file upload has been re-implemented
+to support large files; the file is now read block by block and the blocks can
+be collected either in memory or in a temporary file.
+Furthermore, the CGI API has been revised. There is now an opaque data type
+"argument" that hides all implementation details and that is extensible (if
+necessary, it is possible to add features without breaking the interface
+again).
+The CGI argument parser can be configured; currently it is possible to
+limit the size of uploaded data, to control by which method arguments are
+processed, and to set up where temporary files are created.
+The other parts of the package that have nothing to do with CGI remain
+unchanged.
+
+
+
+ Changed in 0.5.1: A mistake in the documentation has
+been corrected.
+
+
+ Initial version 0.5:
+The Netstring package wants to be the successor of the Base64-0.2 and
+the Cgi-0.3 packages. The sum of both numbers is 0.5, and because of this,
+the first version number is 0.5.
+
+
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/netstring/mimestring.ml b/helm/DEVEL/pxp/netstring/mimestring.ml
new file mode 100644
index 000000000..8fc4bfcbe
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/mimestring.ml
@@ -0,0 +1,1035 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+module Str = Netstring_str;;
+
+let cr_or_lf_re = Str.regexp "[\013\n]";;
+
+let trim_right_spaces_re =
+ Str.regexp "[ \t]+$";;
+
+let trim_left_spaces_re =
+ Str.regexp "^[ \t]+";;
+
+let header_re =
+ Str.regexp "\\([^ \t\r\n:]+\\):\\([ \t]*.*\n\\([ \t].*\n\\)*\\)";;
+
+let empty_line_re =
+ Str.regexp "\013?\n";;
+
+let end_of_header_re =
+ Str.regexp "\n\013?\n";;
+
+
+let scan_header ?(unfold=true) parstr ~start_pos:i0 ~end_pos:i1 =
+ let rec parse_header i l =
+ match Str.string_partial_match header_re parstr i with
+ Some r ->
+ let i' = Str.match_end r in
+ if i' > i1 then
+ failwith "Mimestring.scan_header";
+ let name = String.lowercase(Str.matched_group r 1 parstr) in
+ let value_with_crlf =
+ Str.matched_group r 2 parstr in
+ let value =
+ if unfold then begin
+ let value_with_rspaces =
+ Str.global_replace cr_or_lf_re "" value_with_crlf in
+ let value_with_lspaces =
+ Str.global_replace trim_right_spaces_re "" value_with_rspaces in
+ Str.global_replace trim_left_spaces_re "" value_with_lspaces
+ end
+ else value_with_crlf
+ in
+ parse_header i' ( (name,value) :: l)
+ | None ->
+ (* The header must end with an empty line *)
+ begin match Str.string_partial_match empty_line_re parstr i with
+ Some r' ->
+ List.rev l, Str.match_end r'
+ | None ->
+ failwith "Mimestring.scan_header"
+ end
+ in
+ parse_header i0 []
+;;
+
+type s_token =
+ Atom of string
+ | EncodedWord of (string * string * string)
+ | QString of string
+ | Control of char
+ | Special of char
+ | DomainLiteral of string
+ | Comment
+ | End
+;;
+
+type s_option =
+ No_backslash_escaping
+ | Return_comments
+ | Recognize_encoded_words
+;;
+
+type s_extended_token =
+ { token : s_token;
+ token_pos : int;
+ token_line : int;
+ token_linepos : int; (* Position of the beginning of the line *)
+ token_len : int;
+ mutable token_sep : bool; (* separates adjacent encoded words *)
+ }
+;;
+
+let get_token et = et.token;;
+let get_pos et = et.token_pos;;
+let get_line et = et.token_line;;
+let get_column et = et.token_pos - et.token_linepos;;
+let get_length et = et.token_len;;
+let separates_adjacent_encoded_words et = et.token_sep;;
+
+let get_decoded_word et =
+ match et.token with
+ Atom s -> s
+ | QString s -> s
+ | Control c -> String.make 1 c
+ | Special c -> String.make 1 c
+ | DomainLiteral s -> s
+ | Comment -> ""
+ | EncodedWord (_, encoding, content) ->
+ ( match encoding with
+ ("Q"|"q") ->
+ Netencoding.Q.decode content
+ | ("B"|"b") ->
+ Netencoding.Base64.decode
+ ~url_variant:false
+ ~accept_spaces:false
+ content
+ | _ -> failwith "get_decoded_word"
+ )
+ | End ->
+ failwith "get_decoded_word"
+;;
+
+let get_charset et =
+ match et.token with
+ EncodedWord (charset, _, _) -> charset
+ | End -> failwith "get_charset"
+ | _ -> "US-ASCII"
+;;
+
+type scanner_spec =
+ { (* What the user specifies: *)
+ scanner_specials : char list;
+ scanner_options : s_option list;
+ (* Derived from that: *)
+ mutable opt_no_backslash_escaping : bool;
+ mutable opt_return_comments : bool;
+ mutable opt_recognize_encoded_words : bool;
+
+ mutable is_special : bool array;
+ mutable space_is_special : bool;
+ }
+;;
+
+type scanner_target =
+ { scanned_string : string;
+ mutable scanner_pos : int;
+ mutable scanner_line : int;
+ mutable scanner_linepos : int;
+ (* Position of the beginning of the line *)
+ mutable scanned_tokens : s_extended_token Queue.t;
+ (* A queue of already scanned tokens in order to look ahead *)
+ mutable last_token : s_token;
+ (* The last returned token. It is only important whether it is
+ * EncodedWord or not.
+ *)
+ }
+;;
+
+type mime_scanner = scanner_spec * scanner_target
+;;
+
+let get_pos_of_scanner (spec, target) =
+ if spec.opt_recognize_encoded_words then
+ failwith "get_pos_of_scanner"
+ else
+ target.scanner_pos
+;;
+
+let get_line_of_scanner (spec, target) =
+ if spec.opt_recognize_encoded_words then
+ failwith "get_line_of_scanner"
+ else
+ target.scanner_line
+;;
+
+let get_column_of_scanner (spec, target) =
+ if spec.opt_recognize_encoded_words then
+ failwith "get_column_of_scanner"
+ else
+ target.scanner_pos - target.scanner_linepos
+;;
+
+let create_mime_scanner ~specials ~scan_options =
+ let is_spcl = Array.create 256 false in
+ List.iter
+ (fun c -> is_spcl.( Char.code c ) <- true)
+ specials;
+ let spec =
+ { scanner_specials = specials;
+ scanner_options = scan_options;
+ opt_no_backslash_escaping =
+ List.mem No_backslash_escaping scan_options;
+ opt_return_comments =
+ List.mem Return_comments scan_options;
+ opt_recognize_encoded_words =
+ List.mem Recognize_encoded_words scan_options;
+ is_special = is_spcl;
+ space_is_special = is_spcl.(32);
+ }
+ in
+ (* Grab the remaining arguments: *)
+ fun ?(pos=0) ?(line=1) ?(column=0) s ->
+ let target =
+ { scanned_string = s;
+ scanner_pos = pos;
+ scanner_line = line;
+ scanner_linepos = pos - column;
+ scanned_tokens = Queue.create();
+ last_token = Comment; (* Must not be initialized with EncodedWord *)
+ }
+ in
+ spec, target
+;;
+
+
+let encoded_word_re =
+ Str.regexp "=\\?\\([^?]+\\)\\?\\([^?]+\\)\\?\\([^?]+\\)\\?=";;
+
+let scan_next_token ((spec,target) as scn) =
+ let mk_pair t len =
+ { token = t;
+ token_pos = target.scanner_pos;
+ token_line = target.scanner_line;
+ token_linepos = target.scanner_linepos;
+ token_len = len;
+ token_sep = false;
+ },
+ t
+ in
+
+ (* Note: mk_pair creates a new token pair, and it assumes that
+ * target.scanner_pos (and also scanner_line and scanner_linepos)
+ * still contain the position of the beginning of the token.
+ *)
+
+ let s = target.scanned_string in
+ let l = String.length s in
+ let rec scan i =
+ if i < l then begin
+ let c = s.[i] in
+ if spec.is_special.( Char.code c ) then begin
+ let pair = mk_pair (Special c) 1 in
+ target.scanner_pos <- target.scanner_pos + 1;
+ (match c with
+ '\n' ->
+ target.scanner_line <- target.scanner_line + 1;
+ target.scanner_linepos <- target.scanner_pos;
+ | _ -> ()
+ );
+ pair
+ end
+ else
+ match c with
+ '"' ->
+ (* Quoted string: *)
+ scan_qstring (i+1) (i+1) 0
+ | '(' ->
+ (* Comment: *)
+ let i', line, linepos =
+ scan_comment (i+1) 0 target.scanner_line target.scanner_linepos
+ in
+ let advance() =
+ target.scanner_pos <- i';
+ target.scanner_line <- line;
+ target.scanner_linepos <- linepos
+ in
+ if spec.opt_return_comments then begin
+ let pair = mk_pair Comment (i' - i) in
+ advance();
+ pair
+ end
+ else
+ if spec.space_is_special then begin
+ let pair = mk_pair (Special ' ') (i' - i) in
+ advance();
+ pair
+ end
+ else begin
+ advance();
+ scan i'
+ end
+ | (' '|'\t'|'\r') ->
+ (* Ignore whitespace by default: *)
+ target.scanner_pos <- target.scanner_pos + 1;
+ scan (i+1)
+ | '\n' ->
+ (* Ignore whitespace by default: *)
+ target.scanner_pos <- target.scanner_pos + 1;
+ target.scanner_line <- target.scanner_line + 1;
+ target.scanner_linepos <- target.scanner_pos;
+ scan (i+1)
+ | ('\000'..'\031'|'\127'..'\255') ->
+ let pair = mk_pair (Control c) 1 in
+ target.scanner_pos <- target.scanner_pos + 1;
+ pair
+ | '[' ->
+ (* Domain literal: *)
+ scan_dliteral (i+1) (i+1) 0
+ | _ ->
+ scan_atom i i
+ end
+ else
+ mk_pair End 0
+
+ and scan_atom i0 i =
+ let return_atom() =
+ let astring = String.sub s i0 (i-i0) in
+ let r =
+ if spec.opt_recognize_encoded_words then
+ Str.string_match ~groups:4 encoded_word_re astring 0
+ else
+ None
+ in
+ match r with
+ None ->
+ (* An atom contains never a linefeed character, so we can ignore
+ * scanner_line here.
+ *)
+ let pair = mk_pair (Atom astring) (i-i0) in
+ target.scanner_pos <- i;
+ pair
+ | Some mr ->
+ (* Found an encoded word. *)
+ let charset = Str.matched_group mr 1 astring in
+ let encoding = Str.matched_group mr 2 astring in
+ let content = Str.matched_group mr 3 astring in
+ let t = EncodedWord(String.uppercase charset,
+ String.uppercase encoding,
+ content) in
+ let pair = mk_pair t (i-i0) in
+ target.scanner_pos <- i;
+ pair
+ in
+
+ if i < l then
+ let c = s.[i] in
+ match c with
+ ('\000'..'\031'|'\127'..'\255'|'"'|'('|'['|' '|'\t'|'\r'|'\n') ->
+ return_atom()
+ | _ ->
+ if spec.is_special.( Char.code c ) then
+ return_atom()
+ else
+ scan_atom i0 (i+1)
+ else
+ return_atom()
+
+ and scan_qstring i0 i n =
+ if i < l then
+ let c = s.[i] in
+ match c with
+ '"' ->
+ (* Regular end of the quoted string: *)
+ let content, line, linepos = copy_qstring i0 (i-1) n in
+ let pair = mk_pair (QString content) (i-i0+2) in
+ target.scanner_pos <- i+1;
+ target.scanner_line <- line;
+ target.scanner_linepos <- linepos;
+ pair
+ | '\\' when not spec.opt_no_backslash_escaping ->
+ scan_qstring i0 (i+2) (n+1)
+ | _ ->
+ scan_qstring i0 (i+1) (n+1)
+ else
+ (* Missing right double quote *)
+ let content, line, linepos = copy_qstring i0 (l-1) n in
+ let pair = mk_pair (QString content) (l-i0+1) in
+ target.scanner_pos <- l;
+ target.scanner_line <- line;
+ target.scanner_linepos <- linepos;
+ pair
+
+ and copy_qstring i0 i1 n =
+ (* Used for quoted strings and for domain literals *)
+ let r = String.create n in
+ let k = ref 0 in
+ let line = ref target.scanner_line in
+ let linepos = ref target.scanner_linepos in
+ for i = i0 to i1 do
+ let c = s.[i] in
+ match c with
+ '\\' when i < i1 && not spec.opt_no_backslash_escaping -> ()
+ | '\n' ->
+ line := !line + 1;
+ linepos := i+1;
+ r.[ !k ] <- c;
+ incr k
+ | _ ->
+ r.[ !k ] <- c;
+ incr k
+ done;
+ assert (!k = n);
+ r, !line, !linepos
+
+ and scan_dliteral i0 i n =
+ if i < l then
+ let c = s.[i] in
+ match c with
+ ']' ->
+ (* Regular end of the domain literal: *)
+ let content, line, linepos = copy_qstring i0 (i-1) n in
+ let pair = mk_pair (DomainLiteral content) (i-i0+2) in
+ target.scanner_pos <- i+1;
+ target.scanner_line <- line;
+ target.scanner_linepos <- linepos;
+ pair
+ | '\\' when not spec.opt_no_backslash_escaping ->
+ scan_dliteral i0 (i+2) (n+1)
+ | _ ->
+ (* Note: '[' is not allowed by RFC 822; we treat it here as
+ * a regular character (questionable)
+ *)
+ scan_dliteral i0 (i+1) (n+1)
+ else
+ (* Missing right bracket *)
+ let content, line, linepos = copy_qstring i0 (l-1) n in
+ let pair = mk_pair (DomainLiteral content) (l-i0+1) in
+ target.scanner_pos <- l;
+ target.scanner_line <- line;
+ target.scanner_linepos <- linepos;
+ pair
+
+
+ and scan_comment i level line linepos =
+ if i < l then
+ let c = s.[i] in
+ match c with
+ ')' ->
+ (i+1), line, linepos
+ | '(' ->
+ (* nested comment *)
+ let i', line', linepos' =
+ scan_comment (i+1) (level+1) line linepos
+ in
+ scan_comment i' level line' linepos'
+ | '\\' when not spec.opt_no_backslash_escaping ->
+ if (i+1) < l && s.[i+1] = '\n' then
+ scan_comment (i+2) level (line+1) (i+2)
+ else
+ scan_comment (i+2) level line linepos
+ | '\n' ->
+ scan_comment (i+1) level (line+1) (i+1)
+ | _ ->
+ scan_comment (i+1) level line linepos
+ else
+ (* Missing closing ')' *)
+ i, line, linepos
+ in
+
+ scan target.scanner_pos
+;;
+
+
+let scan_token ((spec,target) as scn) =
+ (* This function handles token queueing in order to recognize white space
+ * that separates adjacent encoded words.
+ *)
+
+ let rec collect_whitespace () =
+ (* Scans whitespace tokens and returns them as:
+ * (ws_list, other_tok) if there is some other_tok following the
+ * list (other_tok = End is possible)
+ *)
+ let (et, t) as pair = scan_next_token scn in
+ ( match t with
+ (Special ' '|Special '\t'|Special '\n'|Special '\r') ->
+ let ws_list, tok = collect_whitespace() in
+ pair :: ws_list, tok
+ | _ ->
+ [], pair
+ )
+ in
+
+ try
+ (* Is there an already scanned token in the queue? *)
+ let et = Queue.take target.scanned_tokens in
+ let t = et.token in
+ target.last_token <- t;
+ et, et.token
+ with
+ Queue.Empty ->
+ (* If not: inspect the last token. If that token is an EncodedWord,
+ * the next tokens are scanned in advance to determine if there
+ * are spaces separating two EncodedWords. These tokens are put
+ * into the queue such that it is avoided that they are scanned
+ * twice. (The sole purpose of the queue.)
+ *)
+ match target.last_token with
+ EncodedWord(_,_,_) as ew ->
+ let ws_list, tok = collect_whitespace() in
+ (* If tok is an EncodedWord, too, the tokens in ws_list must
+ * be flagged as separating two adjacent encoded words.
+ *)
+ ( match tok with
+ _, EncodedWord(_,_,_) ->
+ List.iter
+ (fun (et,t) ->
+ et.token_sep <- true)
+ ws_list
+ | _ ->
+ ()
+ );
+ (* Anyway, queue the read tokens but the first up *)
+ ( match ws_list with
+ [] ->
+ (* Nothing to queue *)
+ let et, t = tok in
+ target.last_token <- t;
+ tok
+ | (et,t) as pair :: ws_list' ->
+ List.iter
+ (fun (et',_) ->
+ Queue.add et' target.scanned_tokens)
+ ws_list';
+ ( match tok with
+ | _, End ->
+ ()
+ | (et',_) ->
+ Queue.add et' target.scanned_tokens
+ );
+ (* Return the first scanned token *)
+ target.last_token <- t;
+ pair
+ )
+ | _ ->
+ (* Regular case: Scan one token; do not queue it up *)
+ let (et, t) as pair = scan_next_token scn in
+ target.last_token <- t;
+ pair
+;;
+
+
+let scan_token_list scn =
+ let rec collect() =
+ match scan_token scn with
+ _, End ->
+ []
+ | pair ->
+ pair :: collect()
+ in
+ collect()
+;;
+
+
+let scan_structured_value s specials options =
+ let rec collect scn =
+ match scan_token scn with
+ _, End ->
+ []
+ | _, t ->
+ t :: collect scn
+ in
+ let scn = create_mime_scanner specials options s in
+ collect scn
+;;
+
+
+let specials_rfc822 =
+ [ '<'; '>'; '@'; ','; ';'; ':'; '\\'; '.' ];;
+
+
+let specials_rfc2045 =
+ [ '<'; '>'; '@'; ','; ';'; ':'; '\\'; '/' ];;
+
+
+let scan_encoded_text_value s =
+ let specials = [ ' '; '\t'; '\r'; '\n'; '('; '['; '"' ] in
+ let options = [ Recognize_encoded_words ] in
+ let scn = create_mime_scanner specials options s in
+
+ let rec collect () =
+ match scan_token scn with
+ _, End ->
+ []
+ | et, _ when separates_adjacent_encoded_words et ->
+ collect()
+ | et, (Special _|Atom _|EncodedWord(_,_,_)) ->
+ et :: collect ()
+ | _, _ ->
+ assert false
+ in
+ collect()
+;;
+
+
+let scan_value_with_parameters s options =
+ let rec parse_params tl =
+ match tl with
+ Atom n :: Special '=' :: Atom v :: tl' ->
+ (n,v) :: parse_rest tl'
+ | Atom n :: Special '=' :: QString v :: tl' ->
+ (n,v) :: parse_rest tl'
+ | _ ->
+ failwith "Mimestring.scan_value_with_parameters"
+ and parse_rest tl =
+ match tl with
+ [] -> []
+ | Special ';' :: tl' ->
+ parse_params tl'
+ | _ ->
+ failwith "Mimestring.scan_value_with_parameters"
+ in
+
+ (* Note: Even if not used here, the comma is a very common separator
+ * and should be recognized as being special. You will get a
+ * failure if there is a comma in the scanned string.
+ *)
+ let tl = scan_structured_value s [ ';'; '='; ',' ] options in
+ match tl with
+ [ Atom n ] -> n, []
+ | [ QString n ] -> n, []
+ | Atom n :: Special ';' :: tl' ->
+ n, parse_params tl'
+ | QString n :: Special ';' :: tl' ->
+ n, parse_params tl'
+ | _ ->
+ failwith "Mimestring.scan_value_with_parameters"
+;;
+
+
+let scan_mime_type s options =
+ let n, params = scan_value_with_parameters s options in
+ (String.lowercase n),
+ (List.map (fun (n,v) -> (String.lowercase n, v)) params)
+;;
+
+
+let lf_re = Str.regexp "[\n]";;
+
+let scan_multipart_body s ~start_pos:i0 ~end_pos:i1 ~boundary =
+ let l_s = String.length s in
+ if i0 < 0 or i1 < 0 or i0 > l_s or i1 >l_s then
+ invalid_arg "Mimestring.scan_multipart_body";
+
+ (* First compile the regexps scanning for 'boundary': *)
+ let boundary1_re =
+ Str.regexp ("\n--" ^ Str.quote boundary) in
+ let boundary2_re =
+ Str.regexp ("--" ^ Str.quote boundary) in
+
+ let rec parse i =
+ (* i: Beginning of the current part (position directly after the
+ * boundary line
+ *)
+ (* Search for next boundary at position i *)
+ let i' =
+ try min (fst (Str.search_forward boundary1_re s i) + 1) i1
+ with
+ Not_found -> i1
+ in
+ (* i': Either the position of the first '-' of the boundary line,
+ * or i1 if no boundary has been found
+ *)
+ if i' >= i1 then
+ [] (* Ignore everything after the last boundary *)
+ else
+ let i'' =
+ try min (fst (Str.search_forward lf_re s i') + 1) i1
+ with
+ Not_found -> i1
+ in
+ (* i'': The position after the boundary line *)
+(*
+ print_int i; print_newline();
+ print_int i'; print_newline();
+ print_int i''; print_newline();
+ flush stdout;
+*)
+ let header, k = scan_header s i i' in
+ (* header: the header of the part
+ * k: beginning of the body
+ *)
+
+ let value =
+ (* We know that i'-1 is a linefeed character. i'-2 should be a CR
+ * character. Both characters are not part of the value.
+ *)
+ if i' >= 2 then
+ match s.[i'-2] with
+ '\013' -> String.sub s k (i'-2-k)
+ | _ -> String.sub s k (i'-1-k)
+ else
+ String.sub s k (i'-1-k)
+ in
+
+ let pair =
+ (header, value) in
+
+ if i'' >= i1
+ then
+ [ pair ]
+ else
+ pair :: parse i''
+ in
+
+ (* Find the first boundary. This is a special case, because it may be
+ * right at the beginning of the string (no preceding CRLF)
+ *)
+
+ let i_bnd =
+ if Str.string_partial_match boundary2_re s i0 <> None then
+ i0
+ else
+ try min (fst (Str.search_forward boundary1_re s i0)) i1
+ with
+ Not_found -> i1
+ in
+
+ if i_bnd >= i1 then
+ []
+ else
+ let i_bnd' =
+ try min (fst (Str.search_forward lf_re s (i_bnd + 1)) + 1) i1
+ with
+ Not_found -> i1
+ in
+ if i_bnd' >= i1 then
+ []
+ else
+ parse i_bnd'
+;;
+
+
+let scan_multipart_body_and_decode s ~start_pos:i0 ~end_pos:i1 ~boundary =
+ let parts = scan_multipart_body s i0 i1 boundary in
+ List.map
+ (fun (params, value) ->
+ let encoding =
+ try List.assoc "content-transfer-encoding" params
+ with Not_found -> "7bit"
+ in
+
+ (* NOTE: In the case of "base64" and "quoted-printable", the allocation
+ * of the string "value" could be avoided.
+ *)
+
+ let value' =
+ match encoding with
+ ("7bit"|"8bit"|"binary") -> value
+ | "base64" ->
+ Netencoding.Base64.decode_substring
+ value 0 (String.length value) false true
+ | "quoted-printable" ->
+ Netencoding.QuotedPrintable.decode_substring
+ value 0 (String.length value)
+ | _ ->
+ failwith "Mimestring.scan_multipart_body_and_decode: Unknown content-transfer-encoding"
+ in
+ (params, value')
+ )
+ parts
+;;
+
+
+let scan_multipart_body_from_netstream s ~boundary ~create ~add ~stop =
+
+ (* The block size of s must be at least the length of the boundary + 3.
+ * Otherwise it is not guaranteed that the boundary is always recognized.
+ *)
+ if Netstream.block_size s < String.length boundary + 3 then
+ invalid_arg "Mimestring.scan_multipart_body_from_netstream";
+
+ (* First compile the regexps scanning for 'boundary': *)
+ let boundary1_re =
+ Str.regexp ("\n--" ^ Str.quote boundary) in
+ let boundary2_re =
+ Str.regexp ("--" ^ Str.quote boundary) in
+
+ (* Subtask 1: Search the end of the MIME header: CR LF CR LF
+ * (or LF LF). Enlarge the window until the complete header
+ * is covered by the window.
+ *)
+ let rec search_end_of_header k =
+ (* Search the end of the header beginning at position k of the
+ * current window.
+ * Return the position of the first character of the body.
+ *)
+ try
+ (* Search for LF CR? LF: *)
+ let i, r = Str.search_forward
+ end_of_header_re
+ (Netbuffer.unsafe_buffer (Netstream.window s))
+ k
+ in
+ (* If match_end <= window_length, the search was successful.
+ * Otherwise, we searched in the uninitialized region of the
+ * buffer.
+ *)
+ if Str.match_end r <= Netstream.window_length s then
+ Str.match_end r
+ else
+ raise Not_found
+ with
+ Not_found ->
+ (* If the end of the stream is reached, the end of the header
+ * is missing: Error.
+ * Otherwise, we try to read another block, and continue.
+ *)
+ if Netstream.at_eos s then
+ failwith "Mimestring.scan_multipart_body_from_netstream: Unexpected end of stream";
+ let w0 = Netstream.window_length s in
+ Netstream.want_another_block s;
+ search_end_of_header (max (w0 - 2) 0)
+ in
+
+ (* Subtask 2: Search the first boundary line. *)
+ let rec search_first_boundary() =
+ (* Search boundary per regexp; return the position of the character
+ * immediately following the boundary (on the same line), or
+ * raise Not_found.
+ *)
+ try
+ (* Search boundary per regexp: *)
+ let i, r = Str.search_forward
+ boundary1_re
+ (Netbuffer.unsafe_buffer (Netstream.window s))
+ 0
+ in
+ (* If match_end <= window_length, the search was successful.
+ * Otherwise, we searched in the uninitialized region of the
+ * buffer.
+ *)
+ if Str.match_end r <= Netstream.window_length s then begin
+ Str.match_end r
+ end
+ else raise Not_found
+ with
+ Not_found ->
+ if Netstream.at_eos s then raise Not_found;
+ (* The regexp did not match: Move the window by one block.
+ *)
+ let n =
+ min
+ (Netstream.window_length s)
+ (Netstream.block_size s)
+ in
+ Netstream.move s n;
+ search_first_boundary()
+ in
+
+ (* Subtask 3: Search the next boundary line. Invoke 'add' for every
+ * read chunk
+ *)
+ let rec search_next_boundary p =
+ (* Returns the position directly after the boundary on the same line *)
+ try
+ (* Search boundary per regexp: *)
+ let i,r = Str.search_forward
+ boundary1_re
+ (Netbuffer.unsafe_buffer (Netstream.window s))
+ 0
+ in
+ (* If match_end <= window_length, the search was successful.
+ * Otherwise, we searched in the uninitialized region of the
+ * buffer.
+ *)
+ if Str.match_end r <= Netstream.window_length s then begin
+ (* Add the last chunk of the part. *)
+ let n =
+ (* i is a LF. i - 1 should be CR. Ignore these characters. *)
+ if i >= 1 then
+ match (Netbuffer.unsafe_buffer (Netstream.window s)).[ i - 1 ] with
+ '\013' -> i - 1
+ | _ -> i
+ else
+ i
+ in
+ (* Printf.printf "add n=%d\n" n; *)
+ add p s 0 n;
+ Str.match_end r
+ end
+ else raise Not_found
+ with
+ Not_found ->
+ if Netstream.at_eos s then
+ failwith "Mimestring.scan_multipart_body_from_netstream: next MIME boundary not found";
+ (* The regexp did not match: Add the first block of the window;
+ * and move the window.
+ *)
+ let n =
+ min
+ (Netstream.window_length s)
+ (Netstream.block_size s)
+ in
+ (* Printf.printf "add n=%d\n" n; *)
+ add p s 0 n;
+ Netstream.move s n;
+ search_next_boundary p
+ in
+
+ (* Subtask 4: Search the end of the boundary line *)
+ let rec search_end_of_line k =
+ (* Search LF beginning at position k. Discard any contents until that. *)
+ try
+ let i,r = Str.search_forward
+ lf_re
+ (Netbuffer.unsafe_buffer (Netstream.window s))
+ k
+ in
+ (* If match_end <= window_length, the search was successful.
+ * Otherwise, we searched in the uninitialized region of the
+ * buffer.
+ *)
+ if Str.match_end r <= Netstream.window_length s then begin
+ Str.match_end r
+ end
+ else raise Not_found
+ with
+ Not_found ->
+ if Netstream.at_eos s then
+ failwith "Mimestring.scan_multipart_body_from_netstream: MIME boundary without line end";
+ (* The regexp did not match: move the window.
+ *)
+ let n = Netstream.window_length s in
+ Netstream.move s n;
+ search_end_of_line 0
+ in
+
+ (* Subtask 5: Check whether "--" follows the boundary on the same line *)
+ let check_whether_last_boundary k =
+ (* k: The position directly after the boundary. *)
+ Netstream.want s (k+2);
+ let str = Netbuffer.unsafe_buffer (Netstream.window s) in
+ (Netstream.window_length s >= k+2) && str.[k] = '-' && str.[k+1] = '-'
+ in
+
+ (* Subtask 6: Check whether the buffer begins with a boundary. *)
+ let check_beginning_is_boundary () =
+ let m = String.length boundary + 2 in
+ Netstream.want s m;
+ let str = Netbuffer.unsafe_buffer (Netstream.window s) in
+ (Netstream.window_length s >= m) &&
+ (Str.string_partial_match boundary2_re str 0 <> None)
+ in
+
+ let rec parse_part () =
+ (* The first byte of the current window of s contains the character
+ * directly following the boundary line that starts this part.
+ *)
+ (* Search the end of the MIME header: *)
+ let k_eoh = search_end_of_header 0 in
+ (* Printf.printf "k_eoh=%d\n" k_eoh; *)
+ (* Get the MIME header: *)
+ let str = Netbuffer.unsafe_buffer (Netstream.window s) in
+ let header, k_eoh' = scan_header str 0 k_eoh in
+ assert (k_eoh = k_eoh');
+ (* Move the window over the header: *)
+ Netstream.move s k_eoh;
+ (* Create the part: *)
+ let p = create header in
+ let continue =
+ begin try
+ (* Search the next boundary; add the chunks while searching: *)
+ let k_eob = search_next_boundary p in
+ (* Printf.printf "k_eob=%d\n" k_eob; *)
+ (* Is this the last boundary? *)
+ if check_whether_last_boundary k_eob then begin
+ (* Skip the rest: *)
+ while not (Netstream.at_eos s) do
+ Netstream.move s (Netstream.window_length s)
+ done;
+ Netstream.move s (Netstream.window_length s);
+ false
+ end
+ else begin
+ (* Move to the beginning of the next line: *)
+ let k_eol = search_end_of_line k_eob in
+ Netstream.move s k_eol;
+ true
+ end
+ with
+ any ->
+ (try stop p with _ -> ());
+ raise any
+ end in
+ stop p;
+ if continue then
+ (* Continue with next part: *)
+ parse_part()
+ in
+
+ (* Check whether s directly begins with a boundary: *)
+ if check_beginning_is_boundary() then begin
+ (* Move to the beginning of the next line: *)
+ let k_eol = search_end_of_line 0 in
+ Netstream.move s k_eol;
+ (* Begin with first part: *)
+ parse_part()
+ end
+ else begin
+ (* Search the first boundary: *)
+ try
+ let k_eob = search_first_boundary() in
+ (* Printf.printf "k_eob=%d\n" k_eob; *)
+ (* Move to the beginning of the next line: *)
+ let k_eol = search_end_of_line k_eob in
+ (* Printf.printf "k_eol=%d\n" k_eol; *)
+ Netstream.move s k_eol;
+ (* Begin with first part: *)
+ parse_part()
+ with
+ Not_found ->
+ (* No boundary at all: The body is empty. *)
+ ()
+ end;
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.8 2000/08/13 00:04:36 gerd
+ * Encoded_word -> EncodedWord
+ * Bugfixes.
+ *
+ * Revision 1.7 2000/08/07 00:25:14 gerd
+ * Implemented the new functions for structured field lexing.
+ *
+ * Revision 1.6 2000/06/25 22:34:43 gerd
+ * Added labels to arguments.
+ *
+ * Revision 1.5 2000/06/25 21:15:48 gerd
+ * Checked thread-safety.
+ *
+ * Revision 1.4 2000/05/16 22:30:14 gerd
+ * Added support for some types of malformed MIME messages.
+ *
+ * Revision 1.3 2000/04/15 13:09:01 gerd
+ * Implemented uploads to temporary files.
+ *
+ * Revision 1.2 2000/03/02 01:15:30 gerd
+ * Updated.
+ *
+ * Revision 1.1 2000/02/25 15:21:12 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/mimestring.mli b/helm/DEVEL/pxp/netstring/mimestring.mli
new file mode 100644
index 000000000..39634b59c
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/mimestring.mli
@@ -0,0 +1,683 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(**********************************************************************)
+(* Collection of auxiliary functions to parse MIME headers *)
+(**********************************************************************)
+
+
+val scan_header :
+ ?unfold:bool ->
+ string -> start_pos:int -> end_pos:int ->
+ ((string * string) list * int)
+ (* let params, i2 = scan_header s i0 i1:
+ *
+ * DESCRIPTION
+ *
+ * Scans the MIME header that begins at position i0 in the string s
+ * and that must end somewhere before position i1. It is intended
+ * that in i1 the character position following the end of the body of the
+ * MIME message is passed.
+ * Returns the parameters of the header as (name,value) pairs (in
+ * params), and in i2 the position of the character following
+ * directly after the header (i.e. after the blank line separating
+ * the header from the body).
+ * The following normalizations have already been applied:
+ * - The names are all in lowercase
+ * - Newline characters (CR and LF) have been removed (unless
+ * ?unfold:false has been passed)
+ * - Whitespace at the beginning and at the end of values has been
+ * removed (unless ?unfold:false is specified)
+ * The rules of RFC 2047 have NOT been applied.
+ * The function fails if the header violates the header format
+ * strongly. (Some minor deviations are tolerated, e.g. it is sufficient
+ * to separate lines by only LF instead of CRLF.)
+ *
+ * OPTIONS:
+ *
+ * unfold: If true (the default), folded lines are concatenated and
+ * returned as one line. This means that CR and LF characters are
+ * deleted and that whitespace at the beginning and the end of the
+ * string is removed.
+ * You may set ?unfold:false to locate individual characters in the
+ * parameter value exactly.
+ *
+ * ABOUT MIME MESSAGE FORMAT:
+ *
+ * This is the modern name for messages in "E-Mail format". Messages
+ * consist of a header and a body; the first empty line separates both
+ * parts. The header contains lines "param-name: param-value" where
+ * the param-name must begin on column 0 of the line, and the ":"
+ * separates the name and the value. So the format is roughly:
+ *
+ * param1-name: param1-value
+ * ...
+ * paramN-name: paramN-value
+ *
+ * body
+ *
+ * This function wants in i0 the position of the first character of
+ * param1-name in the string, and in i1 the position of the character
+ * following the body. It returns as i2 the position where the body
+ * begins. Furthermore, in 'params' all parameters are returned that
+ * exist in the header.
+ *
+ * DETAILS
+ *
+ * Note that parameter values are restricted; you cannot represent
+ * arbitrary strings. The following problems can arise:
+ * - Values cannot begin with whitespace characters, because there
+ * may be an arbitrary number of whitespaces between the ':' and the
+ * value.
+ * - Values (and names of parameters, too) must only be formed of
+ * 7 bit ASCII characters. (If this is not enough, the MIME standard
+ * knows the extension RFC 2047 that allows that header values may
+ * be composed of arbitrary characters of arbitrary character sets.)
+ * - Header values may be broken into several lines, the continuation
+ * lines must begin with whitespace characters. This means that values
+ * must not contain line breaks as semantical part of the value.
+ * And it may mean that ONE whitespace character is not distinguishable
+ * from SEVERAL whitespace characters.
+ * - Header lines must not be longer than 76 characters. Values that
+ * would result into longer lines must be broken into several lines.
+ * This means that you cannot represent strings that contain too few
+ * whitespace characters.
+ * - Some gateways pad the lines with spaces at the end of the lines.
+ *
+ * This implementation of a MIME scanner tolerates a number of
+ * deviations from the standard: long lines are not rejected; 8 bit
+ * values are accepted; lines may be ended only with LF instead of
+ * CRLF.
+ * Furthermore, header values are transformed:
+ * - leading and trailing spaces are always removed
+ * - CRs and LFs are deleted; it is guaranteed that there is at least
+ * one space or tab where CR/LFs are deleted.
+ * Last but not least, the names of the header values are converted
+ * to lowercase; MIME specifies that they are case-independent.
+ *
+ * COMPATIBILITY WITH THE STANDARD
+ *
+ * This function can parse all MIME headers that conform to RFC 822.
+ * But there may be still problems, as RFC 822 allows some crazy
+ * representations that are actually not used in practice.
+ * In particular, RFC 822 allows it to use backslashes to "indicate"
+ * that a CRLF sequence is semantically meant as line break. As this
+ * function normally deletes CRLFs, it is not possible to recognize such
+ * indicators in the result of the function.
+ *)
+
+(**********************************************************************)
+
+(* The following types and functions allow it to build scanners for
+ * structured MIME values in a highly configurable way.
+ *
+ * WHAT ARE STRUCTURED VALUES?
+ *
+ * RFC 822 (together with some other RFCs) defines lexical rules
+ * how formal MIME header values should be divided up into tokens. Formal
+ * MIME headers are those headers that are formed according to some
+ * grammar, e.g. mail addresses or MIME types.
+ * Some of the characters separate phrases of the value; these are
+ * the "special" characters. For example, '@' is normally a special
+ * character for mail addresses, because it separates the user name
+ * from the domain name. RFC 822 defines a fixed set of special
+ * characters, but other RFCs use different sets. Because of this,
+ * the following functions allow it to configure the set of special characters.
+ * Every sequence of characters may be embraced by double quotes,
+ * which means that the sequence is meant as literal data item;
+ * special characters are not recognized inside a quoted string. You may
+ * use the backslash to insert any character (including double quotes)
+ * verbatim into the quoted string (e.g. "He said: \"Give it to me!\"").
+ * The sequence of a backslash character and another character is called
+ * a quoted pair.
+ * Structured values may contain comments. The beginning of a comment
+ * is indicated by '(', and the end by ')'. Comments may be nested.
+ * Comments may contain quoted pairs. A
+ * comment counts as if a space character were written instead of it.
+ * Control characters are the ASCII characters 0 to 31, and 127.
+ * RFC 822 demands that MIME headers are 7 bit ASCII strings. Because
+ * of this, this function also counts the characters 128 to 255 as
+ * control characters.
+ * Domain literals are strings embraced by '[' and ']'; such literals
+ * may contain quoted pairs. Today, domain literals are used to specify
+ * IP addresses.
+ * Every character sequence not falling in one of the above categories
+ * is an atom (a sequence of non-special and non-control characters).
+ * When recognized, atoms may be encoded in a character set different than
+ * US-ASCII; such atoms are called encoded words (see RFC 2047).
+ *
+ * EXTENDED INTERFACE:
+ *
+ * In order to scan a string containing a MIME value, you must first
+ * create a mime_scanner using the function create_mime_scanner.
+ * The scanner contains the reference to the scanned string, and a
+ * specification how the string is to be scanned. The specification
+ * consists of the lists 'specials' and 'scan_options'.
+ *
+ * The character list 'specials' specifies the set of special characters.
+ * These characters are returned as Special c token; the following additional
+ * rules apply:
+ *
+ * - Spaces:
+ * If ' ' in specials: A space character is returned as Special ' '.
+ * Note that there may also be an effect on how comments are returned
+ * (see below).
+ * If ' ' not in specials: Spaces are ignored.
+ *
+ * - Tabs, CRs, LFs:
+ * If '\t' in specials: A tab character is returned as Special '\t'.
+ * If '\t' not in specials: Tabs are ignored.
+ *
+ * If '\r' in specials: A CR character is returned as Special '\r'.
+ * If '\r' not in specials: CRs are ignored.
+ *
+ * If '\n' in specials: A LF character is returned as Special '\n'.
+ * If '\n' not in specials: LFs are ignored.
+ *
+ * - Comments:
+ * If '(' in specials: Comments are not recognized. The character '('
+ * is returned as Special '('.
+ * If '(' not in specials: Comments are recognized. How comments are
+ * returned, depends on the following:
+ * If Return_comments in scan_options: Outer comments are returned as
+ * Comment (note that inner comments count but
+ * are not returned as tokens)
+ * If otherwise ' ' in specials: Outer comments are returned as
+ * Special ' '
+ * Otherwise: Comments are recognized but ignored.
+ *
+ * - Quoted strings:
+ * If '"' in specials: Quoted strings are not recognized, and double quotes
+ * are returned as Special '"'.
+ * If '"' not in specials: Quoted strings are returned as QString tokens.
+ *
+ * - Domain literals:
+ * If '[' in specials: Domain literals are not recognized, and left brackets
+ * are returned as Special '['.
+ * If '[' not in specials: Domain literals are returned as DomainLiteral
+ * tokens.
+ *
+ * Note that the rule for domain literals is completely new in netstring-0.9.
+ * It may cause incompatibilities with previous versions if '[' is not
+ * special.
+ *
+ * The general rule for special characters: Every special character c is
+ * returned as Special c, and any additional scanning functionality
+ * for this character is turned off.
+ *
+ * If recognized, quoted strings are returned as QString s, where
+ * s is the string without the embracing quotes, and with already
+ * decoded quoted pairs.
+ *
+ * Control characters c are returned as Control c.
+ *
+ * If recognized, comments may either be returned as spaces (in the case
+ * you are not interested in the contents of comments), or as Comment tokens.
+ * The contents of comments are not further scanned; you must start a
+ * subscanner to analyze comments as structured values.
+ *
+ * If recognized, domain literals are returned as DomainLiteral s, where
+ * s is the literal without brackets, and with decoded quoted pairs.
+ *
+ * Atoms are returned as Atom s where s is a longest sequence of
+ * atomic characters (all characters which are neither special nor control
+ * characters nor delimiters for substructures). If the option
+ * Recognize_encoded_words is on, atoms which look like encoded words
+ * are returned as EncodedWord tokens. (Important note: Neither '?' nor
+ * '=' must be special in order to enable this functionality.)
+ *
+ * After the mime_scanner has been created, you can scan the tokens by
+ * invoking scan_token which returns one token at a time, or by invoking
+ * scan_token_list which returns all following tokens.
+ *
+ * There are two token types: s_token is the base type and is intended to
+ * be used for pattern matching. s_extended_token is a wrapper that
+ * additionally contains information where the token occurs.
+ *
+ * SIMPLE INTERFACE
+ *
+ * Instead of creating a mime_scanner and calling the scan functions,
+ * you may also invoke scan_structured_value. This function returns the
+ * list of tokens directly; however, it is restricted to s_token.
+ *
+ * EXAMPLES
+ *
+ * scan_structured_value "user@domain.com" [ '@'; '.' ] []
+ * = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+ *
+ * scan_structured_value "user @ domain . com" [ '@'; '.' ] []
+ * = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+ *
+ * scan_structured_value "user(Do you know him?)@domain.com" [ '@'; '.' ] []
+ * = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+ *
+ * scan_structured_value "user(Do you know him?)@domain.com" [ '@'; '.' ]
+ * [ Return_comments ]
+ * = [ Atom "user"; Comment; Special '@'; Atom "domain"; Special '.';
+ * Atom "com" ]
+ *
+ * scan_structured_value "user (Do you know him?) @ domain . com"
+ * [ '@'; '.'; ' ' ] []
+ * = [ Atom "user"; Special ' '; Special ' '; Special ' '; Special '@';
+ * Special ' '; Atom "domain";
+ * Special ' '; Special '.'; Special ' '; Atom "com" ]
+ *
+ * scan_structured_value "user (Do you know him?) @ domain . com"
+ * [ '@'; '.'; ' ' ] [ Return_comments ]
+ * = [ Atom "user"; Special ' '; Comment; Special ' '; Special '@';
+ * Special ' '; Atom "domain";
+ * Special ' '; Special '.'; Special ' '; Atom "com" ]
+ *
+ * scan_structured_value "user @ domain . com" [ '@'; '.'; ' ' ] []
+ * = [ Atom "user"; Special ' '; Special '@'; Special ' '; Atom "domain";
+ * Special ' '; Special '.'; Special ' '; Atom "com" ]
+ *
+ * scan_structured_value "user(Do you know him?)@domain.com" ['@'; '.'; '(']
+ * []
+ * = [ Atom "user"; Special '('; Atom "Do"; Atom "you"; Atom "know";
+ * Atom "him?)"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+ *
+ * scan_structured_value "\"My.name\"@domain.com" [ '@'; '.' ] []
+ * = [ QString "My.name"; Special '@'; Atom "domain"; Special '.';
+ * Atom "com" ]
+ *
+ * scan_structured_value "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?="
+ * [ ] [ ]
+ * = [ Atom "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" ]
+ *
+ * scan_structured_value "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?="
+ * [ ] [ Recognize_encoded_words ]
+ * = [ EncodedWord("ISO-8859-1", "Q", "Keld_J=F8rn_Simonsen") ]
+ *
+ *)
+
+
+
+type s_token =
+ Atom of string
+ | EncodedWord of (string * string * string)
+ | QString of string
+ | Control of char
+ | Special of char
+ | DomainLiteral of string
+ | Comment
+ | End
+
+(* - Words are: Atom, EncodedWord, QString.
+ * - Atom s: The character sequence forming the atom is contained in s
+ * - EncodedWord(charset, encoding, encoded_string) means:
+ * * charset is the (uppercase) character set
+ * * encoding is either "Q" or "B"
+ * * encoded_string: contains the text of the word; the text is represented
+ * as octet string following the conventions for character set charset and
+ * then encoded either as "Q" or "B" string.
+ * - QString s: Here, s are the characters inside the double quotes after
+ * decoding any quoted pairs (backslash + character pairs)
+ * - Control c: The control character c
+ * - Special c: The special character c
+ * - DomainLiteral s: s contains the characters inside the brackets after
+ * decoding any quoted pairs
+ * - Comment: if the option Return_comments is specified, this token
+ * represents the whole comment.
+ * - End: Is returned after the last token
+ *)
+
+
+type s_option =
+ No_backslash_escaping
+ (* Do not handle backslashes in quoted string and comments as escape
+ * characters; backslashes are handled as normal characters.
+ * For example: "C:\dir\file" will be returned as
+ * QString "C:\dir\file", and not as QString "C:dirfile".
+ * - This is a common error in many MIME implementations.
+ *)
+ | Return_comments
+ (* Comments are returned as token Comment (unless '(' is included
+ * in the list of special characters, in which case comments are
+ * not recognized at all).
+ * You may get the exact location of the comment by applying
+ * get_pos and get_length to the extended token.
+ *)
+ | Recognize_encoded_words
+ (* Enables that encoded words are recognized and returned as
+ * EncodedWord(charset,encoding,content) instead of Atom.
+ *)
+
+type s_extended_token
+ (* An opaque type containing s_token plus:
+ * - where the token occurs
+ * - RFC-2047 access functions
+ *)
+
+val get_token : s_extended_token -> s_token
+ (* Return the s_token within the s_extended_token *)
+
+val get_decoded_word : s_extended_token -> string
+val get_charset : s_extended_token -> string
+ (* Return the decoded word (the contents of the word after decoding the
+ * "Q" or "B" representation), and the character set of the decoded word
+ * (uppercase).
+ * These functions not only work for EncodedWord:
+ * - Atom: Returns the atom without decoding it
+ * - QString: Returns the characters inside the double quotes, and
+ * decodes any quoted pairs (backslash + character)
+ * - Control: Returns the one-character string
+ * - Special: Returns the one-character string
+ * - DomainLiteral: Returns the characters inside the brackets, and
+ * decodes any quoted pairs
+ * - Comment: Returns ""
+ * The character set is "US-ASCII" for these tokens.
+ *)
+
+val get_pos : s_extended_token -> int
+ (* Return the byte position where the token starts in the string
+ * (the first byte has position 0)
+ *)
+
+val get_line : s_extended_token -> int
+ (* Return the line number where the token starts (numbering begins
+ * usually with 1)
+ *)
+
+val get_column : s_extended_token -> int
+ (* Return the column of the line where the token starts (first column
+ * is number 0)
+ *)
+
+val get_length : s_extended_token -> int
+ (* Return the length of the token in bytes *)
+
+val separates_adjacent_encoded_words : s_extended_token -> bool
+ (* True iff the current token is white space (Special ' ', Special '\t',
+ * Special '\r' or Special '\n') and the last non-white space token
+ * was EncodedWord and the next non-white space token will be
+ * EncodedWord.
+ * Such spaces do not count and must be ignored by any application.
+ *)
+
+
+type mime_scanner
+
+val create_mime_scanner :
+ specials:char list ->
+ scan_options:s_option list ->
+ ?pos:int ->
+ ?line:int ->
+ ?column:int ->
+ string ->
+ mime_scanner
+ (* Creates a new mime_scanner scanning the passed string.
+ * specials: The list of characters recognized as special characters.
+ * scan_options: The list of global options modifying the behaviour
+ * of the scanner
+ * pos: The position of the byte where the scanner starts in the
+ * passed string. Defaults to 0.
+ * line: The line number of this byte. Defaults to 1.
+ * column: The column number of this byte. Default to 0.
+ *
+ * The optional parameters pos, line, column are intentionally after
+ * scan_options and before the string argument, so you can specify
+ * scanners by partially applying arguments to create_mime_scanner
+ * which are not yet connected with a particular string:
+ * let my_scanner_spec = create_mime_scanner my_specials my_options in
+ * ...
+ * let my_scanner = my_scanner_spec my_string in
+ * ...
+ *)
+
+val get_pos_of_scanner : mime_scanner -> int
+val get_line_of_scanner : mime_scanner -> int
+val get_column_of_scanner : mime_scanner -> int
+ (* Return the current position, line, and column of a mime_scanner.
+ * The primary purpose of these functions is to simplify switching
+ * from one mime_scanner to another within a string:
+ *
+ * let scanner1 = create_mime_scanner ... s in
+ * ... now scanning some tokens from s using scanner1 ...
+ * let scanner2 = create_mime_scanner ...
+ * ?pos:(get_pos_of_scanner scanner1)
+ * ?line:(get_line_of_scanner scanner1)
+ * ?column:(get_column_of_scanner scanner1)
+ * s in
+ * ... scanning more tokens from s using scanner2 ...
+ *
+ * RESTRICTION: These functions are not available if the option
+ * Recognize_encoded_words is on. The reason is that this option
+ * enables look-ahead scanning; please use the location of the last
+ * scanned token instead.
+ * It is currently not clear whether a better implementation is needed
+ * (costs a bit more time).
+ *
+ * Note: To improve the performance of switching, it is recommended to
+ * create scanner specs in advance (see the example my_scanner_spec
+ * above).
+ *)
+
+val scan_token : mime_scanner -> (s_extended_token * s_token)
+ (* Returns the next token, or End if there is no more token. *)
+
+val scan_token_list : mime_scanner -> (s_extended_token * s_token) list
+ (* Returns all following tokens as a list (excluding End) *)
+
+val scan_structured_value : string -> char list -> s_option list -> s_token list
+ (* This function is included for backwards compatibility, and for all
+ * cases not requiring extended tokens.
+ *
+ * It scans the passed string according to the list of special characters
+ * and the list of options, and returns the list of all tokens.
+ *)
+
+val specials_rfc822 : char list
+val specials_rfc2045 : char list
+ (* The sets of special characters defined by the RFCs 822 and 2045.
+ *
+ * CHANGE in netstring-0.9: '[' and ']' are no longer special because
+ * there is now support for domain literals.
+ * '?' and '=' are not special in the rfc2045 version because there is
+ * already support for encoded words.
+ *)
+
+
+(**********************************************************************)
+
+(* Widely used scanners: *)
+
+
+val scan_encoded_text_value : string -> s_extended_token list
+ (* Scans a "text" value. The returned token list contains only
+ * Special, Atom and EncodedWord tokens.
+ * Spaces, TABs, CRs, LFs are returned unless
+ * they occur between adjacent encoded words in which case
+ * they are ignored.
+ *)
+
+
+val scan_value_with_parameters : string -> s_option list ->
+ (string * (string * string) list)
+ (* let name, params = scan_value_with_parameters s options:
+ * Scans phrases like
+ * name ; p1=v1 ; p2=v2 ; ...
+ * The scan is done with the set of special characters [';', '='].
+ *)
+
+val scan_mime_type : string -> s_option list ->
+ (string * (string * string) list)
+ (* let name, params = scan_mime_type s options:
+ * Scans MIME types like
+ * text/plain; charset=iso-8859-1
+ * The name of the type and the names of the parameters are converted
+ * to lower case.
+ *)
+
+
+(**********************************************************************)
+
+(* Scanners for MIME bodies *)
+
+val scan_multipart_body : string -> start_pos:int -> end_pos:int ->
+ boundary:string ->
+ ((string * string) list * string) list
+ (* let [params1, value1; params2, value2; ...]
+ * = scan_multipart_body s i0 i1 b
+ *
+ * Scans the string s that is the body of a multipart message.
+ * The multipart message begins at position i0 in s and i1 the position
+ * of the character following the message. In b the boundary string
+ * must be passed (this is the "boundary" parameter of the multipart
+ * MIME type, e.g. multipart/mixed;boundary="some string" ).
+ * The return value is the list of the parts, where each part
+ * is returned as pair (params, value). The left component params
+ * is the list of name/value pairs of the header of the part. The
+ * right component is the RAW content of the part, i.e. if the part
+ * is encoded ("content-transfer-encoding"), the content is returned
+ * in the encoded representation. The caller must himself decode
+ * the content.
+ * The material before the first boundary and after the last
+ * boundary is not returned.
+ *
+ * MULTIPART MESSAGES
+ *
+ * The MIME standard defines a way to group several message parts to
+ * a larger message (for E-Mails this technique is known as "attaching"
+ * files to messages); these are the so-called multipart messages.
+ * Such messages are recognized by the major type string "multipart",
+ * e.g. multipart/mixed or multipart/form-data. Multipart types MUST
+ * have a boundary parameter because boundaries are essential for the
+ * representation.
+ * Multipart messages have a format like
+ *
+ * ...Header...
+ * Content-type: multipart/xyz; boundary="abc"
+ * ...Header...
+ *
+ * Body begins here ("prologue")
+ * --abc
+ * ...Header part 1...
+ *
+ * ...Body part 1...
+ * --abc
+ * ...Header part 2...
+ *
+ *
+ * ...Body part 2
+ * --abc
+ * ...
+ * --abc--
+ * Epilogue
+ *
+ * The parts are separated by boundary lines which begin with "--" and
+ * the string passed as boundary parameter. (Note that there may follow
+ * arbitrary text on boundary lines after "--abc".) The boundary is
+ * chosen such that it does not occur as prefix of any line of the
+ * inner parts of the message.
+ * The parts are again MIME messages, with header and body. Note
+ * that it is explicitely allowed that the parts are even multipart
+ * messages.
+ * The texts before the first boundary and after the last boundary
+ * are ignored.
+ * Note that multipart messages as a whole MUST NOT be encoded.
+ * Only the PARTS of the messages may be encoded (if they are not
+ * multipart messages themselves).
+ *
+ * Please read RFC 2046 if want to know the gory details of this
+ * brain-dead format.
+ *)
+
+val scan_multipart_body_and_decode : string -> start_pos:int -> end_pos:int ->
+ boundary:string ->
+ ((string * string) list * string) list
+ (* Same as scan_multipart_body, but decodes the bodies of the parts
+ * if they are encoded using the methods "base64" or "quoted printable".
+ * Fails, if an unknown encoding is used.
+ *)
+
+val scan_multipart_body_from_netstream
+ : Netstream.t ->
+ boundary:string ->
+ create:((string * string) list -> 'a) ->
+ add:('a -> Netstream.t -> int -> int -> unit) ->
+ stop:('a -> unit) ->
+ unit
+ (* scan_multipart_body_from_netstream s b create add stop:
+ *
+ * Reads the MIME message from the netstream s block by block. The
+ * parts are delimited by the boundary b.
+ *
+ * Once a new part is detected and begins, the function 'create' is
+ * called with the MIME header as argument. The result p of this function
+ * may be of any type.
+ *
+ * For every chunk of the part that is being read, the function 'add'
+ * is invoked: add p s k n.
+ * Here, p is the value returned by the 'create' invocation for the
+ * current part. s is the netstream. The current window of s contains
+ * the read chunk completely; the chunk begins at position k of the
+ * window (relative to the beginning of the window) and has a length
+ * of n bytes.
+ *
+ * When the part has been fully read, the function 'stop' is
+ * called with p as argument.
+ *
+ * That means, for every part the following is executed:
+ * - let p = create h
+ * - add p s k1 n1
+ * - add p s k2 n2
+ * - ...
+ * - add p s kN nN
+ * - stop p
+ *
+ * IMPORTANT PRECONDITION:
+ * - The block size of the netstream s must be at least
+ * String.length b + 3
+ *
+ * EXCEPTIONS:
+ * - Exceptions can happen because of ill-formed input, and within
+ * the callbacks of the functions 'create', 'add', 'stop'.
+ * - If the exception happens while part p is being read, and the
+ * 'create' function has already been called (successfully), the
+ * 'stop' function is also called (you have the chance to close files).
+ *)
+
+
+(* THREAD-SAFETY:
+ * The functions are thread-safe as long as the threads do not share
+ * values.
+ *)
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.8 2000/08/13 00:04:36 gerd
+ * Encoded_word -> EncodedWord
+ * Bugfixes.
+ *
+ * Revision 1.7 2000/08/07 00:25:00 gerd
+ * Major update of the interface for structured field lexing.
+ *
+ * Revision 1.6 2000/06/25 22:34:43 gerd
+ * Added labels to arguments.
+ *
+ * Revision 1.5 2000/06/25 21:15:48 gerd
+ * Checked thread-safety.
+ *
+ * Revision 1.4 2000/05/16 22:29:12 gerd
+ * New "option" arguments specifying the level of MIME
+ * compatibility.
+ *
+ * Revision 1.3 2000/04/15 13:09:01 gerd
+ * Implemented uploads to temporary files.
+ *
+ * Revision 1.2 2000/03/02 01:15:30 gerd
+ * Updated.
+ *
+ * Revision 1.1 2000/02/25 15:21:12 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netbuffer.ml b/helm/DEVEL/pxp/netstring/netbuffer.ml
new file mode 100644
index 000000000..d6fc40ff7
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/netbuffer.ml
@@ -0,0 +1,145 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+type t =
+ { mutable buffer : string;
+ mutable length : int;
+ }
+
+(* To help the garbage collector:
+ * The 'buffer' has a minimum length of 31 bytes. This minimum can still
+ * be stored in the minor heap.
+ * The 'buffer' has a length which is always near a multiple of two. This
+ * limits the number of different bucket sizes, and simplifies reallocation
+ * of freed memory.
+ *)
+
+(* Optimal string length:
+ * Every string takes: 1 word for the header, enough words for the
+ * contents + 1 Null byte (for C compatibility).
+ * If the buffer grows, it is best to use a new string length such
+ * that the number of words is exactly twice as large as for the previous
+ * string.
+ * n: length of the previous string in bytes
+ * w: storage size of the previous string in words
+ * n': length of the new string in bytes
+ * w' = 2*w: storage size of the new string in words
+ *
+ * w = (n+1) / word_length + 1
+ * [it is assumed that (n+1) is always a multiple of word_length]
+ *
+ * n' = (2*w - 1) * word_length - 1
+ *
+ * n' = [2 * ( [n+1] / word_length + 1) - 1] * word_length - 1
+ * = ...
+ * = (2*n + 2) + word_length - 1
+ * = 2 * n + word_length + 1
+ *
+ * n'+1 is again a multiple of word_length:
+ * n'+1 = 2*n + 2 + word_length
+ * = 2*(n+1) + word_length
+ * = a multiple of word_length because n+1 is a multiple of word_length
+ *)
+
+let word_length = Sys.word_size / 8 (* in bytes *)
+
+let create n =
+ { buffer = String.create (max n 31); length = 0; }
+
+let contents b =
+ String.sub b.buffer 0 b.length
+
+let sub b ~pos:k ~len:n =
+ if k+n > b.length then
+ raise (Invalid_argument "Netbuffer.sub");
+ String.sub b.buffer k n
+
+let unsafe_buffer b =
+ b.buffer
+
+let length b =
+ b.length
+
+let add_string b s =
+ let l = String.length s in
+ if l + b.length > String.length b.buffer then begin
+ let l' = l + b.length in
+ let rec new_size s =
+ if s >= l' then s else new_size(2*s + word_length + 1)
+ in
+ let buffer' = String.create (new_size (String.length b.buffer)) in
+ String.blit b.buffer 0 buffer' 0 b.length;
+ b.buffer <- buffer'
+ end;
+ String.blit s 0 b.buffer b.length l;
+ b.length <- b.length + l
+
+let add_sub_string b s ~pos:k ~len:l =
+ if l + b.length > String.length b.buffer then begin
+ let l' = l + b.length in
+ let rec new_size s =
+ if s >= l' then s else new_size(2*s + word_length + 1)
+ in
+ let buffer' = String.create (new_size (String.length b.buffer)) in
+ String.blit b.buffer 0 buffer' 0 b.length;
+ b.buffer <- buffer'
+ end;
+ String.blit s k b.buffer b.length l;
+ b.length <- b.length + l
+
+let delete b ~pos:k ~len:l =
+ (* deletes l bytes at position k in b *)
+ let n = String.length b.buffer in
+ if k+l <> n & k <> n then
+ String.blit b.buffer (k+l) b.buffer k (n-l-k);
+ b.length <- b.length - l;
+ ()
+
+let try_shrinking b =
+ (* If the buffer size decreases drastically, reallocate the buffer *)
+ if b.length < (String.length b.buffer / 2) then begin
+ let rec new_size s =
+ if s >= b.length then s else new_size(2*s + word_length + 1)
+ in
+ let buffer' = String.create (new_size 31) in
+ String.blit b.buffer 0 buffer' 0 b.length;
+ b.buffer <- buffer'
+ end
+
+let clear b =
+ delete b 0 (b.length)
+
+let index_from b k c =
+ if k > b.length then
+ raise (Invalid_argument "Netbuffer.index_from");
+ let p = String.index_from b.buffer k c in
+ if p >= b.length then raise Not_found;
+ p
+
+let print_buffer b =
+ Format.printf
+ ""
+ b.length
+ (String.length b.buffer)
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/06/25 22:34:43 gerd
+ * Added labels to arguments.
+ *
+ * Revision 1.2 2000/06/24 20:20:33 gerd
+ * Added the toploop printer.
+ *
+ * Revision 1.1 2000/04/15 13:07:48 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netbuffer.mli b/helm/DEVEL/pxp/netstring/netbuffer.mli
new file mode 100644
index 000000000..0ecd61e6a
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/netbuffer.mli
@@ -0,0 +1,93 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+(* A Netbuffer.t is a buffer that can grow and shrink dynamically. *)
+
+type t
+
+val create : int -> t
+ (* Creates a netbuffer which allocates initially this number of bytes.
+ * The logical length is zero.
+ *)
+
+val contents : t -> string
+ (* Returns the contents of the buffer as fresh string. *)
+
+val sub : t -> pos:int -> len:int -> string
+ (* sub nb k n: returns the n characters starting at position n from
+ * netbuffer nb as fresh string
+ *)
+
+val length : t -> int
+ (* Returns the logical length of the buffer *)
+
+val add_string : t -> string -> unit
+ (* add_string nb s: Adds a copy of the string s to the logical end of
+ * the netbuffer nb. If necessary, the nb grows.
+ *)
+
+val add_sub_string : t -> string -> pos:int -> len:int -> unit
+ (* add_sub_string nb s k n: Adds the substring of s starting at position
+ * k with length n to the logical end of the netbuffer nb. If necessary,
+ * the nb grows.
+ * This is semantically the same as
+ * add_string nb (String.sub s k n), but the extra copy is avoided.
+ *)
+
+val delete : t -> pos:int -> len:int -> unit
+ (* delete nb k n: Deletes the n bytes at position k of netbuffer nb
+ * in-place.
+ * The netbuffer does not shrink!
+ *)
+
+val clear : t -> unit
+ (* deletes all contents from the buffer. As 'delete', the netbuffer does
+ * not shrink.
+ *)
+
+val try_shrinking : t -> unit
+ (* try_shrinking nb: If the length of the buffer is less than half of
+ * the allocated space, the netbuffer is reallocated in order to save
+ * memory.
+ *)
+
+val index_from : t -> int -> char -> int
+ (* index_from nb k c: Searches the character c in the netbuffer beginning
+ * at position k. If found, the position of the left-most occurence is
+ * returned. Otherwise, Not_found is raised.
+ *)
+
+val unsafe_buffer : t -> string
+ (* WARNING! This is a low-level function!
+ * Returns the current string that internally holds the buffer.
+ * The byte positions 0 to length - 1 actually store the contents of
+ * the buffer. You can directly read and modify the buffer. Note that
+ * there is no protection if you read or write positions beyond the
+ * length of the buffer.
+ *)
+
+val print_buffer : t -> unit
+ (* For the toploop *)
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/06/25 22:34:43 gerd
+ * Added labels to arguments.
+ *
+ * Revision 1.2 2000/06/24 20:20:33 gerd
+ * Added the toploop printer.
+ *
+ * Revision 1.1 2000/04/15 13:07:48 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netconversion.ml b/helm/DEVEL/pxp/netstring/netconversion.ml
new file mode 100644
index 000000000..e740654ad
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/netconversion.ml
@@ -0,0 +1,864 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+exception Malformed_code
+
+
+type encoding =
+ [ `Enc_utf8 (* UTF-8 *)
+ | `Enc_java
+ | `Enc_utf16 (* UTF-16 with unspecified endianess (restricted usage) *)
+ | `Enc_utf16_le (* UTF-16 little endian *)
+ | `Enc_utf16_be (* UTF-16 big endian *)
+ | `Enc_usascii (* US-ASCII (only 7 bit) *)
+ | `Enc_iso88591 (* ISO-8859-1 *)
+ | `Enc_iso88592 (* ISO-8859-2 *)
+ | `Enc_iso88593 (* ISO-8859-3 *)
+ | `Enc_iso88594 (* ISO-8859-4 *)
+ | `Enc_iso88595 (* ISO-8859-5 *)
+ | `Enc_iso88596 (* ISO-8859-6 *)
+ | `Enc_iso88597 (* ISO-8859-7 *)
+ | `Enc_iso88598 (* ISO-8859-8 *)
+ | `Enc_iso88599 (* ISO-8859-9 *)
+ | `Enc_iso885910 (* ISO-8859-10 *)
+ | `Enc_iso885913 (* ISO-8859-13 *)
+ | `Enc_iso885914 (* ISO-8859-14 *)
+ | `Enc_iso885915 (* ISO-8859-15 *)
+ | `Enc_koi8r (* KOI8-R *)
+ | `Enc_jis0201 (* JIS-0201 *)
+ (* Microsoft: *)
+ | `Enc_windows1250 (* WINDOWS-1250 *)
+ | `Enc_windows1251 (* WINDOWS-1251 *)
+ | `Enc_windows1252 (* WINDOWS-1252 *)
+ | `Enc_windows1253 (* WINDOWS-1253 *)
+ | `Enc_windows1254 (* WINDOWS-1254 *)
+ | `Enc_windows1255 (* WINDOWS-1255 *)
+ | `Enc_windows1256 (* WINDOWS-1256 *)
+ | `Enc_windows1257 (* WINDOWS-1257 *)
+ | `Enc_windows1258 (* WINDOWS-1258 *)
+ (* IBM, ASCII-based: *)
+ | `Enc_cp437
+ | `Enc_cp737
+ | `Enc_cp775
+ | `Enc_cp850
+ | `Enc_cp852
+ | `Enc_cp855
+ | `Enc_cp856
+ | `Enc_cp857
+ | `Enc_cp860
+ | `Enc_cp861
+ | `Enc_cp862
+ | `Enc_cp863
+ | `Enc_cp864
+ | `Enc_cp865
+ | `Enc_cp866
+ | `Enc_cp869
+ | `Enc_cp874
+ | `Enc_cp1006
+ (* IBM, EBCDIC-based: *)
+ | `Enc_cp037
+ | `Enc_cp424
+ | `Enc_cp500
+ | `Enc_cp875
+ | `Enc_cp1026
+ (* Adobe: *)
+ | `Enc_adobe_standard_encoding
+ | `Enc_adobe_symbol_encoding
+ | `Enc_adobe_zapf_dingbats_encoding
+ (* Apple: *)
+ | `Enc_macroman
+
+ ]
+;;
+
+
+let norm_enc_name e =
+ (* Removes some characters from e; uppercase *)
+ let e' = String.create (String.length e) in
+ let rec next i j =
+ if i < String.length e then
+ match e.[i] with
+ ('-'|'_'|'.') -> next (i+1) j
+ | c -> e'.[j] <- c; next (i+1) (j+1)
+ else
+ j
+ in
+ let l = next 0 0 in
+ String.uppercase(String.sub e' 0 l)
+;;
+
+
+let encoding_of_string e =
+ match norm_enc_name e with
+ ("UTF16"|"UCS2"|"ISO10646UCS2") -> `Enc_utf16
+ | "UTF16BE" -> `Enc_utf16_be
+ | "UTF16LE" -> `Enc_utf16_le
+ | "UTF8" -> `Enc_utf8
+ | ("UTF8JAVA"|"JAVA") -> `Enc_java
+ | ("USASCII"|"ASCII"|"ISO646US"|"IBM367"|"CP367") -> `Enc_usascii
+ | ("ISO88591"|"LATIN1"|"IBM819"|"CP819") -> `Enc_iso88591
+ | ("ISO88592"|"LATIN2") -> `Enc_iso88592
+ | ("ISO88593"|"LATIN3") -> `Enc_iso88593
+ | ("ISO88594"|"LATIN4") -> `Enc_iso88594
+ | ("ISO88595"|"CYRILLIC") -> `Enc_iso88595
+ | ("ISO88596"|"ARABIC"|"ECMA114"|"ASMO708") -> `Enc_iso88596
+ | ("ISO88597"|"GREEK"|"GREEK8"|"ELOT928"|"ECMA118") -> `Enc_iso88597
+ | ("ISO88598"|"HEBREW") -> `Enc_iso88598
+ | ("ISO88599"|"LATIN5") -> `Enc_iso88599
+ | ("ISO885910"|"LATIN6") -> `Enc_iso885910
+ | "ISO885913" -> `Enc_iso885913
+ | "ISO885914" -> `Enc_iso885914
+ | "ISO885915" -> `Enc_iso885915
+ | "KOI8R" -> `Enc_koi8r
+ | ("JIS0201"|"JISX0201"|"X0201") -> `Enc_jis0201
+
+ | "WINDOWS1250" -> `Enc_windows1250
+ | "WINDOWS1251" -> `Enc_windows1251
+ | "WINDOWS1252" -> `Enc_windows1252
+ | "WINDOWS1253" -> `Enc_windows1253
+ | "WINDOWS1254" -> `Enc_windows1254
+ | "WINDOWS1255" -> `Enc_windows1255
+ | "WINDOWS1256" -> `Enc_windows1256
+ | "WINDOWS1257" -> `Enc_windows1257
+ | "WINDOWS1258" -> `Enc_windows1258
+
+ | ("CP437"|"IBM437") -> `Enc_cp437
+ | ("CP737"|"IBM737") -> `Enc_cp737
+ | ("CP775"|"IBM775") -> `Enc_cp775
+ | ("CP850"|"IBM850") -> `Enc_cp850
+ | ("CP852"|"IBM852") -> `Enc_cp852
+ | ("CP855"|"IBM855") -> `Enc_cp855
+ | ("CP856"|"IBM856") -> `Enc_cp856
+ | ("CP857"|"IBM857") -> `Enc_cp857
+ | ("CP860"|"IBM860") -> `Enc_cp860
+ | ("CP861"|"IBM861") -> `Enc_cp861
+ | ("CP862"|"IBM862") -> `Enc_cp862
+ | ("CP863"|"IBM863") -> `Enc_cp863
+ | ("CP864"|"IBM864") -> `Enc_cp864
+ | ("CP865"|"IBM865") -> `Enc_cp865
+ | ("CP866"|"IBM866") -> `Enc_cp866
+ | ("CP869"|"IBM869") -> `Enc_cp869
+ | ("CP874"|"IBM874") -> `Enc_cp874
+ | ("CP1006"|"IBM1006") -> `Enc_cp1006
+
+ | ("CP037"|"IBM037"|"EBCDICCPUS"|"EBCDICCPCA"|"EBCDICCPWT"|
+ "EBCDICCPNL") -> `Enc_cp037
+ | ("CP424"|"IBM424"|"EBCDICCPHE") -> `Enc_cp424
+ | ("CP500"|"IBM500"|"EBCDICCPBE"|"EBCDICCPCH") -> `Enc_cp500
+ | ("CP875"|"IBM875") -> `Enc_cp875
+ | ("CP1026"|"IBM1026") -> `Enc_cp1026
+
+ | "ADOBESTANDARDENCODING" -> `Enc_adobe_standard_encoding
+ | "ADOBESYMBOLENCODING" -> `Enc_adobe_symbol_encoding
+ | "ADOBEZAPFDINGBATSENCODING" -> `Enc_adobe_zapf_dingbats_encoding
+
+ | "MACINTOSH" -> `Enc_macroman
+
+ | _ ->
+ failwith "Netconversion.encoding_of_string: unknown encoding"
+;;
+
+
+let string_of_encoding (e : encoding) =
+ (* If there is a "preferred MIME name", this name is returned (see IANA). *)
+ match e with
+ `Enc_utf16 -> "UTF-16"
+ | `Enc_utf16_be -> "UTF-16-BE"
+ | `Enc_utf16_le -> "UTF-16-LE"
+ | `Enc_utf8 -> "UTF-8"
+ | `Enc_java -> "UTF-8-JAVA"
+ | `Enc_usascii -> "US-ASCII"
+ | `Enc_iso88591 -> "ISO-8859-1"
+ | `Enc_iso88592 -> "ISO-8859-2"
+ | `Enc_iso88593 -> "ISO-8859-3"
+ | `Enc_iso88594 -> "ISO-8859-4"
+ | `Enc_iso88595 -> "ISO-8859-5"
+ | `Enc_iso88596 -> "ISO-8859-6"
+ | `Enc_iso88597 -> "ISO-8859-7"
+ | `Enc_iso88598 -> "ISO-8859-8"
+ | `Enc_iso88599 -> "ISO-8859-9"
+ | `Enc_iso885910 -> "ISO-8859-10"
+ | `Enc_iso885913 -> "ISO-8859-13"
+ | `Enc_iso885914 -> "ISO-8859-14"
+ | `Enc_iso885915 -> "ISO-8859-15"
+ | `Enc_koi8r -> "KOI8-R"
+ | `Enc_jis0201 -> "JIS_X0201"
+ | `Enc_windows1250 -> "WINDOWS-1250"
+ | `Enc_windows1251 -> "WINDOWS-1251"
+ | `Enc_windows1252 -> "WINDOWS-1252"
+ | `Enc_windows1253 -> "WINDOWS-1253"
+ | `Enc_windows1254 -> "WINDOWS-1254"
+ | `Enc_windows1255 -> "WINDOWS-1255"
+ | `Enc_windows1256 -> "WINDOWS-1256"
+ | `Enc_windows1257 -> "WINDOWS-1257"
+ | `Enc_windows1258 -> "WINDOWS-1258"
+ | `Enc_cp437 -> "CP437"
+ | `Enc_cp737 -> "CP737"
+ | `Enc_cp775 -> "CP775"
+ | `Enc_cp850 -> "CP850"
+ | `Enc_cp852 -> "CP852"
+ | `Enc_cp855 -> "CP855"
+ | `Enc_cp856 -> "CP856"
+ | `Enc_cp857 -> "CP857"
+ | `Enc_cp860 -> "CP860"
+ | `Enc_cp861 -> "CP861"
+ | `Enc_cp862 -> "CP862"
+ | `Enc_cp863 -> "CP863"
+ | `Enc_cp864 -> "CP864"
+ | `Enc_cp865 -> "CP865"
+ | `Enc_cp866 -> "CP866"
+ | `Enc_cp869 -> "CP869"
+ | `Enc_cp874 -> "CP874"
+ | `Enc_cp1006 -> "CP1006"
+ | `Enc_cp037 -> "CP037"
+ | `Enc_cp424 -> "CP424"
+ | `Enc_cp500 -> "CP500"
+ | `Enc_cp875 -> "CP875"
+ | `Enc_cp1026 -> "CP1026"
+ | `Enc_adobe_standard_encoding -> "ADOBE-STANDARD-ENCODING"
+ | `Enc_adobe_symbol_encoding -> "ADOBE-SYMBOL-ENCODING"
+ | `Enc_adobe_zapf_dingbats_encoding -> "ADOBE-ZAPF-DINGBATS-ENCODING"
+ | `Enc_macroman -> "MACINTOSH"
+;;
+
+
+let read_iso88591 write s_in p_in l_in =
+ let rec scan k_in k_out c_out =
+ if k_in < l_in then begin
+ let p = Char.code s_in.[p_in + k_in] in
+ let n = write p k_out c_out in
+ if n < 0 then
+ k_in, k_out, `Enc_iso88591
+ else
+ scan (k_in + 1) (k_out + n) (c_out + 1)
+ end
+ else
+ k_in, k_out, `Enc_iso88591
+ in
+ scan 0 0 0
+;;
+
+
+let read_usascii write s_in p_in l_in =
+ let rec scan k_in k_out c_out =
+ if k_in < l_in then begin
+ let p = Char.code s_in.[p_in + k_in] in
+ if p >= 0x80 then raise Malformed_code;
+ let n = write p k_out c_out in
+ if n < 0 then
+ k_in, k_out, `Enc_usascii
+ else
+ scan (k_in + 1) (k_out + n) (c_out + 1)
+ end
+ else
+ k_in, k_out, `Enc_usascii
+ in
+ scan 0 0 0
+;;
+
+
+let read_8bit m_to_unicode enc write s_in p_in l_in =
+ let rec scan k_in k_out c_out =
+ if k_in < l_in then begin
+ let p_local = Char.code s_in.[p_in + k_in] in
+ let p_uni = Array.unsafe_get m_to_unicode p_local in
+ if p_uni < 0 then raise Malformed_code;
+ let n = write p_uni k_out c_out in
+ if n < 0 then
+ k_in, k_out, enc
+ else
+ scan (k_in + 1) (k_out + n) (c_out + 1)
+ end
+ else
+ k_in, k_out, enc
+ in
+ scan 0 0 0
+;;
+
+
+let read_utf8 is_java write s_in p_in l_in =
+ let rec scan k_in k_out c_out =
+ if k_in < l_in then begin
+ let n_out, n_in =
+ match s_in.[p_in + k_in] with
+ '\000' ->
+ if is_java then raise Malformed_code;
+ write 0 k_out c_out, 1
+ | ('\001'..'\127' as c) ->
+ write (Char.code c) k_out c_out, 1
+ | ('\128'..'\223' as c) ->
+ if k_in + 1 >= l_in then
+ -1, 0
+ else begin
+ let n1 = Char.code c in
+ let n2 = Char.code (s_in.[p_in + k_in + 1]) in
+ if is_java && (n1 = 0x80 && n2 = 0xc0) then
+ write 0 k_out c_out, 2
+ else begin
+ if n2 < 128 or n2 > 191 then raise Malformed_code;
+ let p = ((n1 land 0b11111) lsl 6) lor (n2 land 0b111111) in
+ if p < 128 then raise Malformed_code;
+ write p k_out c_out, 2
+ end
+ end
+ | ('\224'..'\239' as c) ->
+ if k_in + 2 >= l_in then
+ -1, 0
+ else begin
+ let n1 = Char.code c in
+ let n2 = Char.code (s_in.[p_in + k_in + 1]) in
+ let n3 = Char.code (s_in.[p_in + k_in + 2]) in
+ if n2 < 128 or n2 > 191 then raise Malformed_code;
+ if n3 < 128 or n3 > 191 then raise Malformed_code;
+ let p =
+ ((n1 land 0b1111) lsl 12) lor
+ ((n2 land 0b111111) lsl 6) lor
+ (n3 land 0b111111)
+ in
+ if p < 0x800 then raise Malformed_code;
+ if (p >= 0xd800 && p < 0xe000) then
+ (* Surrogate pairs are not supported in UTF-8 *)
+ raise Malformed_code;
+ if (p >= 0xfffe && p <= 0xffff) then
+ raise Malformed_code;
+ write p k_out c_out, 3
+ end
+ | ('\240'..'\247' as c) ->
+ if k_in + 3 >= l_in then
+ -1, 0
+ else begin
+ let n1 = Char.code c in
+ let n2 = Char.code (s_in.[p_in + k_in + 1]) in
+ let n3 = Char.code (s_in.[p_in + k_in + 2]) in
+ let n4 = Char.code (s_in.[p_in + k_in + 3]) in
+ if n2 < 128 or n2 > 191 then raise Malformed_code;
+ if n3 < 128 or n3 > 191 then raise Malformed_code;
+ if n4 < 128 or n4 > 191 then raise Malformed_code;
+ let p = ((n1 land 0b111) lsl 18) lor
+ ((n2 land 0b111111) lsl 12) lor
+ ((n3 land 0b111111) lsl 6) lor
+ (n4 land 0b111111)
+ in
+ if p < 0x10000 then raise Malformed_code;
+ if p >= 0x110000 then
+ (* These code points are not supported. *)
+ raise Malformed_code;
+ write p k_out c_out, 4
+ end
+ | _ ->
+ (* Outside the valid range of XML characters *)
+ raise Malformed_code;
+ in
+ (* n_out: number of written bytes; -1 means out buf is full
+ * n_in: number of read bytes; 0 means end of in buf reached
+ * n_in = 0 implies n_out = -1
+ *)
+ if n_out < 0 then
+ k_in, k_out, `Enc_utf8
+ else
+ scan (k_in + n_in) (k_out + n_out) (c_out + 1)
+ end
+ else
+ k_in, k_out, `Enc_utf8
+ in
+ scan 0 0 0
+;;
+
+
+let surrogate_offset = 0x10000 - (0xD800 lsl 10) - 0xDC00;;
+
+let read_utf16_le k_in_0 write s_in p_in l_in =
+ let rec scan k_in k_out c_out =
+ if k_in + 1 < l_in then begin
+ let p = (Char.code s_in.[p_in + k_in]) lor ((Char.code s_in.[p_in + k_in + 1]) lsl 8) in
+
+ if p >= 0xd800 & p < 0xe000 then begin
+ (* This is a surrogate pair. *)
+ if k_in + 3 < l_in then begin
+ if p <= 0xdbff then begin
+ let q = (Char.code s_in.[p_in + k_in + 2 ]) lor
+ ((Char.code s_in.[p_in + k_in + 3]) lsl 8) in
+ if q < 0xdc00 or q > 0xdfff then raise Malformed_code;
+ let eff_p = (p lsl 10) + q + surrogate_offset in
+ let n = write eff_p k_out c_out in
+ if n < 0 then
+ k_in, k_out, `Enc_utf16_le
+ else
+ scan (k_in + 4) (k_out + n) (c_out + 1)
+ end
+ else
+ (* Malformed pair: *)
+ raise Malformed_code;
+ end
+ else
+ (* Incomplete pair: *)
+ k_in, k_out, `Enc_utf16_le
+ end
+
+ else
+ if p = 0xfffe then
+ (* Big endian byte order mark: It is illegal here *)
+ raise Malformed_code
+ else begin
+ (* A regular code point *)
+ let n = write p k_out c_out in
+ if n < 0 then
+ k_in, k_out, `Enc_utf16_le
+ else
+ scan (k_in + 2) (k_out + n) (c_out + 1)
+ end
+ end
+ else
+ (* Incomplete character: *)
+ k_in, k_out, `Enc_utf16_le
+ in
+ scan k_in_0 0 0
+;;
+
+
+let read_utf16_be k_in_0 write s_in p_in l_in =
+ let rec scan k_in k_out c_out =
+ if k_in + 1 < l_in then begin
+ let p = (Char.code s_in.[p_in + k_in + 1]) lor ((Char.code s_in.[p_in + k_in]) lsl 8) in
+
+ if p >= 0xd800 & p < 0xe000 then begin
+ (* This is a surrogate pair. *)
+ if k_in + 3 < l_in then begin
+ if p <= 0xdbff then begin
+ let q = (Char.code s_in.[p_in + k_in + 3 ]) lor
+ ((Char.code s_in.[p_in + k_in + 2]) lsl 8) in
+ if q < 0xdc00 or q > 0xdfff then raise Malformed_code;
+ let eff_p = (p lsl 10) + q + surrogate_offset in
+ let n = write eff_p k_out c_out in
+ if n < 0 then
+ k_in, k_out, `Enc_utf16_be
+ else
+ scan (k_in + 4) (k_out + n) (c_out + 1)
+ end
+ else
+ (* Malformed pair: *)
+ raise Malformed_code;
+ end
+ else
+ (* Incomplete pair: *)
+ k_in, k_out, `Enc_utf16_be
+ end
+
+ else
+ if p = 0xfffe then
+ (* Little endian byte order mark: It is illegal here *)
+ raise Malformed_code
+ else begin
+ (* A regular code point *)
+ let n = write p k_out c_out in
+ if n < 0 then
+ k_in, k_out, `Enc_utf16_be
+ else
+ scan (k_in + 2) (k_out + n) (c_out + 1)
+ end
+
+ end
+ else
+ (* Incomplete character: *)
+ k_in, k_out, `Enc_utf16_be
+ in
+ scan k_in_0 0 0
+;;
+
+
+let read_utf16 write s_in p_in l_in =
+ (* Expect a BOM at the beginning of the text *)
+ if l_in >= 2 then begin
+ let c0 = s_in.[p_in + 0] in
+ let c1 = s_in.[p_in + 1] in
+ if c0 = '\254' & c1 = '\255' then begin
+ (* 0xfeff as big endian *)
+ read_utf16_be 2 write s_in p_in l_in
+ end
+ else
+ if c0 = '\255' & c1 = '\254' then begin
+ (* 0xfeff as little endian *)
+ read_utf16_le 2 write s_in p_in l_in
+ end
+ else
+ (* byte order mark missing *)
+ raise Malformed_code
+ end
+ else
+ 0, 0, `Enc_utf16
+;;
+
+
+let write_iso88591 s_out p_out l_out max_chars w p k_out c_out =
+ if k_out < l_out && c_out < max_chars then begin
+ if p > 255 then begin
+ let subst = w p in
+ let l_subst = String.length subst in
+ if k_out + l_subst <= l_out then begin
+ (* Enough space to store 'subst': *)
+ String.blit subst 0 s_out (k_out+p_out) l_subst;
+ l_subst
+ end
+ else
+ (* Not enough space: Stop this round of recoding *)
+ -1
+ end
+ else begin
+ s_out.[p_out + k_out] <- Char.chr p;
+ 1
+ end
+ end
+ else
+ -1 (* End-of-buffer indicator *)
+;;
+
+
+let write_usascii s_out p_out l_out max_chars w p k_out c_out =
+ if k_out < l_out && c_out < max_chars then begin
+ if p > 127 then begin
+ let subst = w p in
+ let l_subst = String.length subst in
+ if k_out + l_subst <= l_out then begin
+ (* Enough space to store 'subst': *)
+ String.blit subst 0 s_out (k_out+p_out) l_subst;
+ l_subst
+ end
+ else
+ (* Not enough space: Stop this round of recoding *)
+ -1
+ end
+ else begin
+ s_out.[p_out + k_out] <- Char.chr p;
+ 1
+ end
+ end
+ else
+ -1 (* End-of-buffer indicator *)
+;;
+
+
+let write_8bit from_unicode s_out p_out l_out max_chars w p k_out c_out =
+ if k_out < l_out && c_out < max_chars then begin
+ let p' =
+ match Array.unsafe_get from_unicode (p land 255) with
+ Netmappings.U_nil -> -1
+ | Netmappings.U_single (p0,q0) ->
+ if p0 = p then q0 else -1
+ | Netmappings.U_list l ->
+ (try List.assoc p l with Not_found -> -1)
+ in
+ if p' < 0 then begin
+ let subst = w p in
+ let l_subst = String.length subst in
+ if k_out + l_subst <= l_out then begin
+ (* Enough space to store 'subst': *)
+ String.blit subst 0 s_out (k_out+p_out) l_subst;
+ l_subst
+ end
+ else
+ (* Not enough space: Stop this round of recoding *)
+ -1
+ end
+ else begin
+ s_out.[p_out + k_out] <- Char.chr p';
+ 1
+ end
+ end
+ else
+ -1 (* End-of-buffer indicator *)
+;;
+
+
+let write_utf8 is_java s_out p_out l_out max_chars w p k_out c_out =
+ if p <= 127 && (not is_java || p <> 0) then begin
+ if k_out < l_out && c_out < max_chars then begin
+ s_out.[p_out + k_out] <- Char.chr p;
+ 1
+ end
+ else -1
+ end
+ else if p <= 0x7ff then begin
+ if k_out + 1 < l_out && c_out < max_chars then begin
+ s_out.[p_out + k_out] <- Char.chr (0xc0 lor (p lsr 6));
+ s_out.[p_out + k_out + 1] <- Char.chr (0x80 lor (p land 0x3f));
+ 2
+ end
+ else -1
+ end
+ else if p <= 0xffff then begin
+ (* Refuse writing surrogate pairs, and fffe, ffff *)
+ if (p >= 0xd800 & p < 0xe000) or (p >= 0xfffe) then
+ failwith "Netconversion.write_utf8";
+ if k_out + 2 < l_out && c_out < max_chars then begin
+ s_out.[p_out + k_out] <- Char.chr (0xe0 lor (p lsr 12));
+ s_out.[p_out + k_out + 1] <- Char.chr (0x80 lor ((p lsr 6) land 0x3f));
+ s_out.[p_out + k_out + 2] <- Char.chr (0x80 lor (p land 0x3f));
+ 3
+ end
+ else -1
+ end
+ else if p <= 0x10ffff then begin
+ if k_out + 3 < l_out && c_out < max_chars then begin
+ s_out.[p_out + k_out] <- Char.chr (0xf0 lor (p lsr 18));
+ s_out.[p_out + k_out + 1] <- Char.chr (0x80 lor ((p lsr 12) land 0x3f));
+ s_out.[p_out + k_out + 2] <- Char.chr (0x80 lor ((p lsr 6) land 0x3f));
+ s_out.[p_out + k_out + 3] <- Char.chr (0x80 lor (p land 0x3f));
+ 4
+ end
+ else -1
+ end
+ else
+ (* Higher code points are not possible in XML: *)
+ failwith "Netconversion.write_utf8"
+;;
+
+
+let write_utf16_le s_out p_out l_out max_chars w p k_out c_out =
+ if p >= 0xfffe then begin
+ if p <= 0xffff or p > 0x10ffff then failwith "Netconversion.write_utf16_le";
+ (* Must be written as surrogate pair *)
+ if k_out + 3 < l_out && c_out < max_chars then begin
+ let high = (p lsr 10) + 0xd800 in
+ let low = (p land 0x3ff) + 0xdc00 in
+ s_out.[p_out + k_out ] <- Char.chr (high land 0xff);
+ s_out.[p_out + k_out + 1] <- Char.chr (high lsr 8);
+ s_out.[p_out + k_out + 2] <- Char.chr (low land 0xff);
+ s_out.[p_out + k_out + 3] <- Char.chr (low lsr 8);
+ 4
+ end
+ else -1
+ end
+ else begin
+ if k_out + 1 < l_out && c_out < max_chars then begin
+ s_out.[p_out + k_out ] <- Char.chr (p land 0xff);
+ s_out.[p_out + k_out + 1] <- Char.chr (p lsr 8);
+ 2
+ end
+ else
+ -1
+ end
+;;
+
+
+let write_utf16_be s_out p_out l_out max_chars w p k_out c_out =
+ if p >= 0xfffe then begin
+ if p <= 0xffff or p > 0x10ffff then failwith "Netconversion.write_utf16_be";
+ (* Must be written as surrogate pair *)
+ if k_out + 3 < l_out && c_out < max_chars then begin
+ let high = (p lsr 10) + 0xd800 in
+ let low = (p land 0x3ff) + 0xdc00 in
+ s_out.[p_out + k_out + 1] <- Char.chr (high land 0xff);
+ s_out.[p_out + k_out ] <- Char.chr (high lsr 8);
+ s_out.[p_out + k_out + 3] <- Char.chr (low land 0xff);
+ s_out.[p_out + k_out + 2] <- Char.chr (low lsr 8);
+ 4
+ end
+ else -1
+ end
+ else begin
+ if k_out + 1 < l_out && c_out < max_chars then begin
+ s_out.[p_out + k_out + 1] <- Char.chr (p land 0xff);
+ s_out.[p_out + k_out ] <- Char.chr (p lsr 8);
+ 2
+ end
+ else
+ -1
+ end
+;;
+
+
+let recode ~in_enc
+ ~in_buf
+ ~in_pos
+ ~in_len
+ ~out_enc
+ ~out_buf
+ ~out_pos
+ ~out_len
+ ~max_chars
+ ~subst =
+ if (in_pos < 0 || in_len < 0 || in_pos + in_len > String.length in_buf ||
+ out_pos < 0 || out_len < 0 || out_pos + out_len > String.length out_buf)
+ then
+ invalid_arg "Netconversion.recode";
+
+ let reader =
+ match in_enc with
+ `Enc_iso88591 -> read_iso88591
+ | `Enc_usascii -> read_usascii
+ | `Enc_utf8 -> read_utf8 false
+ | `Enc_java -> read_utf8 true
+ | `Enc_utf16 -> read_utf16
+ | `Enc_utf16_le -> read_utf16_le 0
+ | `Enc_utf16_be -> read_utf16_be 0
+ | _ ->
+ (try
+ let to_unicode' = Hashtbl.find Netmappings.to_unicode in_enc in
+ let to_unicode =
+ Netmappings.lock();
+ Lazy.force to_unicode' in
+ Netmappings.unlock();
+ read_8bit to_unicode in_enc
+ with
+ Not_found ->
+ failwith("Support for the encoding `" ^
+ string_of_encoding in_enc ^
+ "' has not been compiled into Netstring")
+ )
+ in
+ let writer =
+ match out_enc with
+ `Enc_iso88591 -> write_iso88591 out_buf out_pos out_len max_chars subst
+ | `Enc_usascii -> write_usascii out_buf out_pos out_len max_chars subst
+ | `Enc_utf8 -> write_utf8 false
+ out_buf out_pos out_len max_chars subst
+ | `Enc_java -> write_utf8 true out_buf out_pos out_len max_chars subst
+ | `Enc_utf16 -> failwith "Netconversion.recode"
+ | `Enc_utf16_le -> write_utf16_le out_buf out_pos out_len max_chars subst
+ | `Enc_utf16_be -> write_utf16_be out_buf out_pos out_len max_chars subst
+ | _ ->
+ (try
+ let from_unicode' = Hashtbl.find Netmappings.from_unicode out_enc
+ in
+ let from_unicode =
+ Netmappings.lock();
+ Lazy.force from_unicode' in
+ Netmappings.unlock();
+ write_8bit from_unicode out_buf out_pos out_len max_chars subst
+ with
+ Not_found ->
+ failwith("Support for the encoding `" ^
+ string_of_encoding out_enc ^
+ "' has not been compiled into Netstring")
+ )
+ in
+ reader writer in_buf in_pos in_len
+;;
+
+
+let makechar enc p =
+ match enc with
+ `Enc_iso88591 ->
+ if p > 255 then raise Not_found;
+ String.make 1 (Char.chr p)
+ | `Enc_usascii ->
+ if p > 127 then raise Not_found;
+ String.make 1 (Char.chr p)
+ | `Enc_utf8 ->
+ let s = String.create 4 in
+ let n = write_utf8 false s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
+ String.sub s 0 n
+ | `Enc_java ->
+ let s = String.create 4 in
+ let n = write_utf8 true s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
+ String.sub s 0 n
+ | `Enc_utf16_le ->
+ let s = String.create 4 in
+ let n = write_utf16_le s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
+ String.sub s 0 n
+ | `Enc_utf16_be ->
+ let s = String.create 4 in
+ let n = write_utf16_be s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
+ String.sub s 0 n
+ | `Enc_utf16 ->
+ failwith "Netconversion.makechar"
+ | _ ->
+ let s = String.create 1 in
+ let from_unicode' =
+ try
+ Hashtbl.find Netmappings.from_unicode enc
+ with
+ Not_found ->
+ failwith("Support for the encoding `" ^
+ string_of_encoding enc ^
+ "' has not been compiled into Netstring")
+ in
+ let from_unicode =
+ Netmappings.lock();
+ Lazy.force from_unicode' in
+ Netmappings.unlock();
+ let n =
+ write_8bit from_unicode s 0 1 1 (fun _ -> raise Not_found) p 0 0 in
+ s
+;;
+
+
+let recode_string ~in_enc ~out_enc ?(subst = (fun _ -> raise Not_found)) s =
+
+ let length = String.length s in
+ let size = 1024 in
+ let out_buf = String.create size in
+
+ let rec recode_loop k s_done in_enc =
+ (* 'k' bytes of 's' have already been processed, and the result is in
+ * 's_done'.
+ *)
+ (* Recode to 'out_buf': *)
+ let in_len = length - k in
+ let in_done, out_done, in_enc' =
+ recode ~in_enc:in_enc ~in_buf:s ~in_pos:k ~in_len:in_len
+ ~out_enc:out_enc ~out_buf:out_buf ~out_pos:0 ~out_len:size
+ ~max_chars:size ~subst:subst in
+ (* Collect the results: *)
+ let k' = k + in_done in
+ let s_done' = String.sub out_buf 0 out_done :: s_done in
+ (* Still something to do? *)
+ if k' < length then
+ recode_loop k' s_done' in_enc'
+ else
+ (* No: Concatenate s_done' to get the final result. *)
+ String.concat "" (List.rev s_done')
+ in
+
+ recode_loop 0 [] in_enc
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/08/29 00:46:41 gerd
+ * New type for the Unicode to 8 bit translation table.
+ * The Netmappings tables are now Lazy.t.
+ *
+ * Revision 1.1 2000/08/13 00:02:57 gerd
+ * Initial revision.
+ *
+ *
+ * ======================================================================
+ * OLD LOGS FROM THE PXP PACKAGE (FILE NAME pxp_encoding.ml):
+ *
+ * Revision 1.5 2000/07/27 00:41:14 gerd
+ * new 8 bit codes
+ *
+ * Revision 1.4 2000/07/04 22:11:41 gerd
+ * Implemented the enhancements and extensions of
+ * rev. 1.4 of pxp_encoding.mli.
+ *
+ * Revision 1.3 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * Revision 1.2 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.1 2000/05/20 20:30:50 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netconversion.mli b/helm/DEVEL/pxp/netstring/netconversion.mli
new file mode 100644
index 000000000..5e3e4b4e1
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/netconversion.mli
@@ -0,0 +1,241 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+exception Malformed_code
+
+(* Encodings:
+ * - With the exception of UTF-8 and UTF-16, only single-byte character sets
+ * are supported.
+ * - I took the mappings from www.unicode.org, and the standard names of
+ * the character sets from IANA. Obviously, many character sets are missing
+ * that can be supported; especially ISO646 character sets, many EBCDIC
+ * code pages.
+ * - Because of the copyright statement from Unicode, I cannot put the
+ * source tables that describe the mappings into the distribution. They
+ * are publicly available from www.unicode.org.
+ * - Because of this, it is difficult for you to extend the list of character
+ * sets; you need the source tables I am not allowed to distribute.
+ * These tables have a very simple format: Every line describes a pair
+ * of code points; the left code (<= 0xff) is the code in the character
+ * set, the right code (<= 0xffff) is the Unicode equivalent.
+ * For an example, see
+ * http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-2.TXT
+ * You can send me such files, and I will integrate them into the
+ * distribution (if possible).
+ * - I really do not know very much about the character sets used in
+ * East Asia. If you need them, please write the necessary conversion
+ * functions and send them to me.
+ *
+ * KNOWN PROBLEMS:
+ * - The following charsets do not have a bijective mapping to Unicode:
+ * adobe_standard_encoding, adobe_symbol_encoding,
+ * adobe_zapf_dingbats_encoding, cp1002 (0xFEBE). The current implementation
+ * simply removes one of the conflicting code point pairs - this might
+ * not what you want.
+ *)
+
+type encoding =
+ [ `Enc_utf8 (* UTF-8 *)
+ | `Enc_java (* The variant of UTF-8 used by Java *)
+ | `Enc_utf16 (* UTF-16 with unspecified endianess (restricted usage) *)
+ | `Enc_utf16_le (* UTF-16 little endian *)
+ | `Enc_utf16_be (* UTF-16 big endian *)
+ | `Enc_usascii (* US-ASCII (only 7 bit) *)
+ | `Enc_iso88591 (* ISO-8859-1 *)
+ | `Enc_iso88592 (* ISO-8859-2 *)
+ | `Enc_iso88593 (* ISO-8859-3 *)
+ | `Enc_iso88594 (* ISO-8859-4 *)
+ | `Enc_iso88595 (* ISO-8859-5 *)
+ | `Enc_iso88596 (* ISO-8859-6 *)
+ | `Enc_iso88597 (* ISO-8859-7 *)
+ | `Enc_iso88598 (* ISO-8859-8 *)
+ | `Enc_iso88599 (* ISO-8859-9 *)
+ | `Enc_iso885910 (* ISO-8859-10 *)
+ | `Enc_iso885913 (* ISO-8859-13 *)
+ | `Enc_iso885914 (* ISO-8859-14 *)
+ | `Enc_iso885915 (* ISO-8859-15 *)
+ | `Enc_koi8r (* KOI8-R *)
+ | `Enc_jis0201 (* JIS-0201 *)
+ (* Microsoft: *)
+ | `Enc_windows1250 (* WINDOWS-1250 *)
+ | `Enc_windows1251 (* WINDOWS-1251 *)
+ | `Enc_windows1252 (* WINDOWS-1252 *)
+ | `Enc_windows1253 (* WINDOWS-1253 *)
+ | `Enc_windows1254 (* WINDOWS-1254 *)
+ | `Enc_windows1255 (* WINDOWS-1255 *)
+ | `Enc_windows1256 (* WINDOWS-1256 *)
+ | `Enc_windows1257 (* WINDOWS-1257 *)
+ | `Enc_windows1258 (* WINDOWS-1258 *)
+ (* IBM, ASCII-based: *)
+ | `Enc_cp437
+ | `Enc_cp737
+ | `Enc_cp775
+ | `Enc_cp850
+ | `Enc_cp852
+ | `Enc_cp855
+ | `Enc_cp856
+ | `Enc_cp857
+ | `Enc_cp860
+ | `Enc_cp861
+ | `Enc_cp862
+ | `Enc_cp863
+ | `Enc_cp864
+ | `Enc_cp865
+ | `Enc_cp866
+ | `Enc_cp869
+ | `Enc_cp874
+ | `Enc_cp1006
+ (* IBM, EBCDIC-based: *)
+ | `Enc_cp037
+ | `Enc_cp424
+ | `Enc_cp500
+ | `Enc_cp875
+ | `Enc_cp1026
+ (* Adobe: *)
+ | `Enc_adobe_standard_encoding
+ | `Enc_adobe_symbol_encoding
+ | `Enc_adobe_zapf_dingbats_encoding
+ (* Apple: *)
+ | `Enc_macroman
+
+ ]
+
+
+val encoding_of_string : string -> encoding;;
+ (* Returns the encoding of the name of the encoding. Fails if the
+ * encoding is unknown.
+ * E.g. encoding_of_string "iso-8859-1" = `Enc_iso88591
+ *)
+
+val string_of_encoding : encoding -> string;;
+ (* Returns the name of the encoding. *)
+
+
+val makechar : encoding -> int -> string
+ (* makechar enc i:
+ * Creates the string representing the code point i in encoding enc.
+ * Raises Not_found if the character is legal but cannot be represented
+ * in enc.
+ *
+ * Possible encodings: everything but `Enc_utf16.
+ *)
+
+val recode : in_enc:encoding ->
+ in_buf:string ->
+ in_pos:int ->
+ in_len:int ->
+ out_enc:encoding ->
+ out_buf:string ->
+ out_pos:int ->
+ out_len:int ->
+ max_chars:int ->
+ subst:(int -> string) -> (int * int * encoding)
+ (*
+ * let (in_n, out_n, in_enc') =
+ * recode in_enc in_buf in_len out_enc out_buf out_pos out_len max_chars
+ * subst:
+ * Converts the character sequence contained in the at most in_len bytes
+ * of in_buf starting at position in_pos, and writes the result
+ * into at most out_len bytes of out_buf starting at out_pos.
+ * At most max_chars are written into out_buf.
+ * The characters in in_buf are assumed to be encoded as in_enc, and the
+ * characters in out_buf will be encoded as out_enc.
+ * If there is a code point which cannot be represented in out_enc,
+ * the function subst is called with the code point as argument, and the
+ * resulting string (which must already be encoded as out_enc) is
+ * inserted instead.
+ * Note: It is possible that subst is called several times for the same
+ * character.
+ * Return value: out_n is the actual number of bytes written into out_buf.
+ * in_n is the actual number of bytes that have been converted from
+ * in_buf; in_n may be smaller than in_len because of incomplete
+ * multi-byte characters, or because the output buffer has less space
+ * for characters than the input buffer, or because of a change
+ * of the encoding variant.
+ * If there is at least one complete character in in_buf, and at least
+ * space for one complete character in out_buf, and max_chars >= 1, it is
+ * guaranteed that in_n > 0 or out_n > 0.
+ * in_enc' is normally identical to in_enc. However, there are cases
+ * in which the encoding can be refined when looking at the byte
+ * sequence; for example whether a little endian or big endian variant
+ * of the encoding is used. in_enc' is the variant of in_enc that was
+ * used for the last character that has been converted.
+ *
+ * NOTES:
+ *
+ * Supported range of code points: 0 to 0xd7ff, 0xe000 to 0xfffd,
+ * 0x10000 to 0x10ffff.
+ *
+ * Enc_utf8: Malformed UTF-8 byte sequences are always rejected. This
+ * is also true for the sequence 0xc0 0x80 which is used by some software
+ * (Java) as paraphrase for the code point 0.
+ *
+ * Enc_utf16: When reading from a string encoded as Enc_utf16, a byte
+ * order mark is expected at the beginning. The detected variant
+ * (Enc_utf16_le or Enc_utf16_be) is returned. The byte order mark is
+ * not included into the output string. - It is not possible to
+ * write as Enc_utf16.
+ *
+ * Enc_utf16_le, Enc_utf16_be: When reading from such a string, the
+ * code point 0xfeff is returned as it is; it is a "zero-width
+ * non-breaking space". The code point 0xfffe is rejected.
+ *
+ * Surrogate pairs: These are recognized (or written) only for a
+ * UTF-16 encoding; and rejected for any other encoding.
+ *
+ * Rejected byte sequences cause the exception Bad_character_stream.
+ *)
+
+val recode_string : in_enc:encoding ->
+ out_enc:encoding ->
+ ?subst:(int -> string) ->
+ string ->
+ string
+ (* Recodes a complete string from in_enc to out_enc, and returns it.
+ * The function subst is invoked for code points of in_enc that cannot
+ * be represented in out_enc, and the result of the function invocation
+ * is substituted.
+ * If subst is missing, Not_found is raised in this case.
+ *)
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/08/13 00:02:57 gerd
+ * Initial revision.
+ *
+ *
+ * ======================================================================
+ * OLD LOGS FROM THE PXP PACKAGE (FILE NAME pxp_encoding.mli):
+ *
+ * Revision 1.4 2000/07/04 22:05:58 gerd
+ * Enhanced version of 'recode'. Labeled arguments.
+ * New function 'recode_string'.
+ *
+ * Revision 1.3 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * Revision 1.2 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.1 2000/05/20 20:30:50 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netencoding.ml b/helm/DEVEL/pxp/netstring/netencoding.ml
new file mode 100644
index 000000000..e87c4c397
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/netencoding.ml
@@ -0,0 +1,903 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+module Str = Netstring_str;;
+
+module Base64 = struct
+ let b64_pattern plus slash =
+ [| 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M';
+ 'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z';
+ 'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm';
+ 'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z';
+ '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9'; plus; slash |];;
+
+
+ let rfc_pattern = b64_pattern '+' '/';;
+ let url_pattern = b64_pattern '-' '/';;
+
+ let encode_with_options b64 equal s pos len linelen crlf =
+ (* encode using "base64".
+ * 'b64': The encoding table, created by b64_pattern.
+ * 'equal': The character that should be used instead of '=' in the original
+ * encoding scheme. Pass '=' to get the original encoding scheme.
+ * s, pos, len, linelen: See the interface description of encode_substring.
+ *)
+ assert (Array.length b64 = 64);
+ if len < 0 or pos < 0 or pos > String.length s or linelen < 0 then
+ invalid_arg "Netencoding.Base64.encode_with_options";
+ if pos + len > String.length s then
+ invalid_arg "Netencoding.Base64.encode_with_options";
+
+ let linelen =
+ (linelen/4) * 4 in
+
+ let l_t = if len = 0 then 0 else ((len - 1) / 3 + 1) * 4 in
+ (* l_t: length of the result without additional line endings *)
+
+ let l_t' =
+ if linelen < 4 then
+ l_t
+ else
+ if l_t = 0 then 0 else
+ let n_lines = ((l_t - 1) / linelen) + 1 in
+ l_t + n_lines * (if crlf then 2 else 1)
+ in
+ (* l_t': length of the result with CRLF or LF characters *)
+
+ let t = String.make l_t' equal in
+ let j = ref 0 in
+ let q = ref 0 in
+ for k = 0 to len / 3 - 1 do
+ let p = pos + 3*k in
+ (* p >= pos >= 0: this is evident
+ * p+2 < pos+len <= String.length s:
+ * Because k <= len/3-1
+ * 3*k <= 3*(len/3-1) = len - 3
+ * pos+3*k+2 <= pos + len - 3 + 2 = pos + len - 1 < pos + len
+ * So it is proved that the following unsafe string accesses always
+ * work.
+ *)
+ let bits = (Char.code (String.unsafe_get s (p)) lsl 16) lor
+ (Char.code (String.unsafe_get s (p+1)) lsl 8) lor
+ (Char.code (String.unsafe_get s (p+2))) in
+ (* Obviously, 'bits' is a 24 bit entity (i.e. bits < 2**24) *)
+ assert(!j + 3 < l_t');
+ String.unsafe_set t !j (Array.unsafe_get b64 ( bits lsr 18));
+ String.unsafe_set t (!j+1) (Array.unsafe_get b64 ((bits lsr 12) land 63));
+ String.unsafe_set t (!j+2) (Array.unsafe_get b64 ((bits lsr 6) land 63));
+ String.unsafe_set t (!j+3) (Array.unsafe_get b64 ( bits land 63));
+ j := !j + 4;
+ if linelen > 3 then begin
+ q := !q + 4;
+ if !q + 4 > linelen then begin
+ (* The next 4 characters won't fit on the current line. So insert
+ * a line ending.
+ *)
+ if crlf then begin
+ t.[ !j ] <- '\013';
+ t.[ !j+1 ] <- '\010';
+ j := !j + 2;
+ end
+ else begin
+ t.[ !j ] <- '\010';
+ incr j
+ end;
+ q := 0;
+ end;
+ end;
+ done;
+ (* padding if needed: *)
+ let m = len mod 3 in
+ begin
+ match m with
+ 0 -> ()
+ | 1 ->
+ let bits = Char.code (s.[pos + len - 1]) in
+ t.[ !j ] <- b64.( bits lsr 2);
+ t.[ !j + 1 ] <- b64.( (bits land 0x03) lsl 4);
+ j := !j + 4;
+ q := !q + 4;
+ | 2 ->
+ let bits = (Char.code (s.[pos + len - 2]) lsl 8) lor
+ (Char.code (s.[pos + len - 1])) in
+ t.[ !j ] <- b64.( bits lsr 10);
+ t.[ !j + 1 ] <- b64.((bits lsr 4) land 0x3f);
+ t.[ !j + 2 ] <- b64.((bits lsl 2) land 0x3f);
+ j := !j + 4;
+ q := !q + 4;
+ | _ -> assert false
+ end;
+
+ (* If required, add another line end: *)
+
+ if linelen > 3 & !q > 0 then begin
+ if crlf then begin
+ t.[ !j ] <- '\013';
+ t.[ !j+1 ] <- '\010';
+ j := !j + 2;
+ end
+ else begin
+ t.[ !j ] <- '\010';
+ incr j
+ end;
+ end;
+
+ t ;;
+
+
+
+ let encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s =
+ let l = match len with None -> String.length s - pos | Some x -> x in
+ encode_with_options rfc_pattern '=' s pos l linelength crlf;;
+
+
+ let encode_substring s ~pos ~len ~linelength ~crlf =
+ encode_with_options rfc_pattern '=' s pos len linelength crlf;;
+
+
+ let url_encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s =
+ let l = match len with None -> String.length s - pos | Some x -> x in
+ encode_with_options url_pattern '.' s pos l linelength crlf;;
+
+
+ let decode_substring t ~pos ~len ~url_variant:p_url ~accept_spaces:p_spaces =
+ if len < 0 or pos < 0 or pos > String.length t then
+ invalid_arg "Netencoding.Base64.decode_substring";
+ if pos + len > String.length t then
+ invalid_arg "Netencoding.Base64.decode_substring";
+
+ (* Compute the number of effective characters l_t in 't';
+ * pad_chars: number of '=' characters at the end of the string.
+ *)
+ let l_t, pad_chars =
+ if p_spaces then begin
+ (* Count all non-whitespace characters: *)
+ let c = ref 0 in
+ let p = ref 0 in
+ for i = pos to pos + len - 1 do
+ match String.unsafe_get t i with
+ (' '|'\t'|'\r'|'\n') -> ()
+ | ('='|'.') as ch ->
+ if ch = '.' & not p_url then
+ invalid_arg "Netencoding.Base64.decode_substring";
+ incr c;
+ incr p;
+ if !p > 2 then
+ invalid_arg "Netencoding.Base64.decode_substring";
+ for j = i+1 to pos + len - 1 do
+ match String.unsafe_get t j with
+ (' '|'\t'|'\r'|'\n'|'.'|'=') -> ()
+ | _ ->
+ (* Only another '=' or spaces allowed *)
+ invalid_arg "Netencoding.Base64.decode_substring";
+ done
+ | _ -> incr c
+ done;
+ if !c mod 4 <> 0 then
+ invalid_arg "Netencoding.Base64.decode_substring";
+ !c, !p
+ end
+ else
+ len,
+ ( if len mod 4 <> 0 then
+ invalid_arg "Netencoding.Base64.decode_substring";
+ if len > 0 then (
+ if String.sub t (len - 2) 2 = "==" or
+ (p_url & String.sub t (len - 2) 2 = "..") then 2
+ else
+ if String.sub t (len - 1) 1 = "=" or
+ (p_url & String.sub t (len - 1) 1 = ".") then 1
+ else
+ 0
+ )
+ else 0
+ )
+ in
+
+ let l_s = (l_t / 4) * 3 - pad_chars in (* sic! *)
+ let s = String.create l_s in
+
+ let decode_char c =
+ match c with
+ 'A' .. 'Z' -> Char.code(c) - 65 (* 65 = Char.code 'A' *)
+ | 'a' .. 'z' -> Char.code(c) - 71 (* 71 = Char.code 'a' - 26 *)
+ | '0' .. '9' -> Char.code(c) + 4 (* -4 = Char.code '0' - 52 *)
+ | '+' -> 62
+ | '-' -> if not p_url then
+ invalid_arg "Netencoding.Base64.decode_substring";
+ 62
+ | '/' -> 63
+ | _ -> invalid_arg "Netencoding.Base64.decode_substring";
+ in
+
+ (* Decode all but the last quartet: *)
+
+ let cursor = ref pos in
+ let rec next_char() =
+ match t.[ !cursor ] with
+ (' '|'\t'|'\r'|'\n') ->
+ if p_spaces then (incr cursor; next_char())
+ else invalid_arg "Netencoding.Base64.decode_substring"
+ | c ->
+ incr cursor; c
+ in
+
+ if p_spaces then begin
+ for k = 0 to l_t / 4 - 2 do
+ let q = 3*k in
+ let c0 = next_char() in
+ let c1 = next_char() in
+ let c2 = next_char() in
+ let c3 = next_char() in
+ let n0 = decode_char c0 in
+ let n1 = decode_char c1 in
+ let n2 = decode_char c2 in
+ let n3 = decode_char c3 in
+ let x0 = (n0 lsl 2) lor (n1 lsr 4) in
+ let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
+ let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
+ String.unsafe_set s q (Char.chr x0);
+ String.unsafe_set s (q+1) (Char.chr x1);
+ String.unsafe_set s (q+2) (Char.chr x2);
+ done;
+ end
+ else begin
+ (* Much faster: *)
+ for k = 0 to l_t / 4 - 2 do
+ let p = pos + 4*k in
+ let q = 3*k in
+ let c0 = String.unsafe_get t p in
+ let c1 = String.unsafe_get t (p + 1) in
+ let c2 = String.unsafe_get t (p + 2) in
+ let c3 = String.unsafe_get t (p + 3) in
+ let n0 = decode_char c0 in
+ let n1 = decode_char c1 in
+ let n2 = decode_char c2 in
+ let n3 = decode_char c3 in
+ let x0 = (n0 lsl 2) lor (n1 lsr 4) in
+ let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
+ let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
+ String.unsafe_set s q (Char.chr x0);
+ String.unsafe_set s (q+1) (Char.chr x1);
+ String.unsafe_set s (q+2) (Char.chr x2);
+ done;
+ cursor := pos + l_t - 4;
+ end;
+
+ (* Decode the last quartet: *)
+
+ if l_t > 0 then begin
+ let q = 3*(l_t / 4 - 1) in
+ let c0 = next_char() in
+ let c1 = next_char() in
+ let c2 = next_char() in
+ let c3 = next_char() in
+
+ if (c2 = '=' & c3 = '=') or (p_url & c2 = '.' & c3 = '.') then begin
+ let n0 = decode_char c0 in
+ let n1 = decode_char c1 in
+ let x0 = (n0 lsl 2) lor (n1 lsr 4) in
+ s.[ q ] <- Char.chr x0;
+ end
+ else
+ if (c3 = '=') or (p_url & c3 = '.') then begin
+ let n0 = decode_char c0 in
+ let n1 = decode_char c1 in
+ let n2 = decode_char c2 in
+ let x0 = (n0 lsl 2) lor (n1 lsr 4) in
+ let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
+ s.[ q ] <- Char.chr x0;
+ s.[ q+1 ] <- Char.chr x1;
+ end
+ else begin
+ let n0 = decode_char c0 in
+ let n1 = decode_char c1 in
+ let n2 = decode_char c2 in
+ let n3 = decode_char c3 in
+ let x0 = (n0 lsl 2) lor (n1 lsr 4) in
+ let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
+ let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
+ s.[ q ] <- Char.chr x0;
+ s.[ q+1 ] <- Char.chr x1;
+ s.[ q+2 ] <- Char.chr x2;
+ end
+
+ end;
+
+ s ;;
+
+
+
+ let decode ?(pos=0) ?len ?(url_variant=true) ?(accept_spaces=false) s =
+ let l = match len with None -> String.length s - pos | Some x -> x in
+ decode_substring s pos l url_variant accept_spaces;;
+
+ let decode_ignore_spaces s =
+ decode_substring s 0 (String.length s) true true;;
+
+
+end
+
+
+
+module QuotedPrintable = struct
+
+ let encode_substring s ~pos ~len =
+
+ if len < 0 or pos < 0 or pos > String.length s then
+ invalid_arg "Netencoding.QuotedPrintable.encode_substring";
+ if pos + len > String.length s then
+ invalid_arg "Netencoding.QuotedPrintable.encode_substring";
+
+ let rec count n i =
+ if i < len then
+ match String.unsafe_get s (pos+i) with
+ ('\r'|'\n') ->
+ count (n+1) (i+1)
+ | ('\000'..'\031'|'\127'..'\255'|
+ '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') ->
+ count (n+3) (i+1)
+ | ' ' ->
+ (* Protect spaces only if they occur at the end of a line *)
+ if i+1 < len then
+ match s.[pos+i+1] with
+ ('\r'|'\n') ->
+ count (n+3) (i+1)
+ | _ ->
+ count (n+1) (i+1)
+ else
+ count (n+3) (i+1)
+ | _ ->
+ count (n+1) (i+1)
+ else
+ n
+ in
+
+ let l = count 0 0 in
+ let t = String.create l in
+
+ let hexdigit =
+ [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
+ '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in
+
+ let k = ref 0 in
+
+ let add_quoted c =
+ t.[ !k ] <- '=';
+ t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 );
+ t.[ !k+2 ] <- hexdigit.( Char.code c land 15 )
+ in
+
+ for i = 0 to len - 1 do
+ match String.unsafe_get s i with
+ ('\r'|'\n') as c ->
+ String.unsafe_set t !k c;
+ incr k
+ | ('\000'..'\031'|'\127'..'\255'|
+ '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') as c ->
+ add_quoted c;
+ k := !k + 3
+ | ' ' ->
+ (* Protect spaces only if they occur at the end of a line *)
+ if i+1 < len then
+ match s.[pos+i+1] with
+ ('\r'|'\n') ->
+ add_quoted ' ';
+ k := !k + 3;
+ | _ ->
+ String.unsafe_set t !k ' ';
+ incr k
+ else begin
+ add_quoted ' ';
+ k := !k + 3;
+ end
+ | c ->
+ String.unsafe_set t !k c;
+ incr k
+ done;
+
+ t ;;
+
+
+ let encode ?(pos=0) ?len s =
+ let l = match len with None -> String.length s - pos | Some x -> x in
+ encode_substring s pos l;;
+
+
+
+ let decode_substring s ~pos ~len =
+
+ if len < 0 or pos < 0 or pos > String.length s then
+ invalid_arg "Netencoding.QuotedPrintable.decode_substring";
+ if pos + len > String.length s then
+ invalid_arg "Netencoding.QuotedPrintable.decode_substring";
+
+ let decode_hex c =
+ match c with
+ '0'..'9' -> Char.code c - 48
+ | 'A'..'F' -> Char.code c - 55
+ | 'a'..'f' -> Char.code c - 87
+ | _ ->
+ invalid_arg "Netencoding.QuotedPrintable.decode_substring";
+ in
+
+ let rec count n i =
+ if i < len then
+ match String.unsafe_get s (pos+i) with
+ '=' ->
+ if i+1 = len then
+ (* A '=' at EOF is ignored *)
+ count n (i+1)
+ else
+ if i+1 < len then
+ match s.[pos+i+1] with
+ '\r' ->
+ (* Official soft break *)
+ if i+2 < len & s.[pos+i+2] = '\n' then
+ count n (i+3)
+ else
+ count n (i+2)
+ | '\n' ->
+ (* Inofficial soft break *)
+ count n (i+2)
+ | _ ->
+ if i+2 >= len then
+ invalid_arg
+ "Netencoding.QuotedPrintable.decode_substring";
+ let _ = decode_hex s.[pos+i+1] in
+ let _ = decode_hex s.[pos+i+2] in
+ count (n+1) (i+3)
+ else
+ invalid_arg "Netencoding.QuotedPrintable.decode_substring"
+ | _ ->
+ count (n+1) (i+1)
+ else
+ n
+ in
+
+ let l = count 0 0 in
+ let t = String.create l in
+ let k = ref pos in
+ let e = pos + len in
+ let i = ref 0 in
+
+ while !i < l do
+ match String.unsafe_get s !k with
+ '=' ->
+ if !k+1 = e then
+ (* A '=' at EOF is ignored *)
+ ()
+ else
+ if !k+1 < e then
+ match s.[!k+1] with
+ '\r' ->
+ (* Official soft break *)
+ if !k+2 < e & s.[!k+2] = '\n' then
+ k := !k + 3
+ else
+ k := !k + 2
+ | '\n' ->
+ (* Inofficial soft break *)
+ k := !k + 2
+ | _ ->
+ if !k+2 >= e then
+ invalid_arg
+ "Netencoding.QuotedPrintable.decode_substring";
+ let x1 = decode_hex s.[!k+1] in
+ let x2 = decode_hex s.[!k+2] in
+ t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2);
+ k := !k + 3;
+ incr i
+ else
+ invalid_arg "Netencoding.QuotedPrintable.decode_substring"
+ | c ->
+ String.unsafe_set t !i c;
+ incr k;
+ incr i
+ done;
+
+ t ;;
+
+
+ let decode ?(pos=0) ?len s =
+ let l = match len with None -> String.length s - pos | Some x -> x in
+ decode_substring s pos l;;
+
+end
+
+
+module Q = struct
+
+ let encode_substring s ~pos ~len =
+
+ if len < 0 or pos < 0 or pos > String.length s then
+ invalid_arg "Netencoding.Q.encode_substring";
+ if pos + len > String.length s then
+ invalid_arg "Netencoding.Q.encode_substring";
+
+ let rec count n i =
+ if i < len then
+ match String.unsafe_get s (pos+i) with
+ | ('A'..'Z'|'a'..'z'|'0'..'9') ->
+ count (n+1) (i+1)
+ | _ ->
+ count (n+3) (i+1)
+ else
+ n
+ in
+
+ let l = count 0 0 in
+ let t = String.create l in
+
+ let hexdigit =
+ [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
+ '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in
+
+ let k = ref 0 in
+
+ let add_quoted c =
+ t.[ !k ] <- '=';
+ t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 );
+ t.[ !k+2 ] <- hexdigit.( Char.code c land 15 )
+ in
+
+ for i = 0 to len - 1 do
+ match String.unsafe_get s i with
+ | ('A'..'Z'|'a'..'z'|'0'..'9') as c ->
+ String.unsafe_set t !k c;
+ incr k
+ | c ->
+ add_quoted c;
+ k := !k + 3
+ done;
+
+ t ;;
+
+
+ let encode ?(pos=0) ?len s =
+ let l = match len with None -> String.length s - pos | Some x -> x in
+ encode_substring s pos l;;
+
+
+
+ let decode_substring s ~pos ~len =
+
+ if len < 0 or pos < 0 or pos > String.length s then
+ invalid_arg "Netencoding.Q.decode_substring";
+ if pos + len > String.length s then
+ invalid_arg "Netencoding.Q.decode_substring";
+
+ let decode_hex c =
+ match c with
+ '0'..'9' -> Char.code c - 48
+ | 'A'..'F' -> Char.code c - 55
+ | 'a'..'f' -> Char.code c - 87
+ | _ ->
+ invalid_arg "Netencoding.Q.decode_substring";
+ in
+
+ let rec count n i =
+ if i < len then
+ match String.unsafe_get s (pos+i) with
+ '=' ->
+ if i+2 >= len then
+ invalid_arg "Netencoding.Q.decode_substring";
+ let _ = decode_hex s.[pos+i+1] in
+ let _ = decode_hex s.[pos+i+2] in
+ count (n+1) (i+3)
+ | _ -> (* including '_' *)
+ count (n+1) (i+1)
+ else
+ n
+ in
+
+ let l = count 0 0 in
+ let t = String.create l in
+ let k = ref pos in
+ let e = pos + len in
+ let i = ref 0 in
+
+ while !i < l do
+ match String.unsafe_get s !k with
+ '=' ->
+ if !k+2 >= e then
+ invalid_arg "Netencoding.Q.decode_substring";
+ let x1 = decode_hex s.[!k+1] in
+ let x2 = decode_hex s.[!k+2] in
+ t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2);
+ k := !k + 3;
+ incr i
+ | '_' ->
+ String.unsafe_set t !i ' ';
+ incr k;
+ incr i
+ | c ->
+ String.unsafe_set t !i c;
+ incr k;
+ incr i
+ done;
+
+ t ;;
+
+
+ let decode ?(pos=0) ?len s =
+ let l = match len with None -> String.length s - pos | Some x -> x in
+ decode_substring s pos l ;;
+
+end
+
+
+module Url = struct
+ let hex_digits =
+ [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
+ '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F' |];;
+
+ let to_hex2 k =
+ (* Converts k to a 2-digit hex string *)
+ let s = String.create 2 in
+ s.[0] <- hex_digits.( (k lsr 4) land 15 );
+ s.[1] <- hex_digits.( k land 15 );
+ s ;;
+
+
+ let of_hex1 c =
+ match c with
+ ('0'..'9') -> Char.code c - Char.code '0'
+ | ('A'..'F') -> Char.code c - Char.code 'A' + 10
+ | ('a'..'f') -> Char.code c - Char.code 'a' + 10
+ | _ ->
+ raise Not_found ;;
+
+
+
+ let url_encoding_re =
+ Str.regexp "[^A-Za-z0-9$_.!*'(),-]";;
+
+ let url_decoding_re =
+ Str.regexp "\\+\\|%..\\|%.\\|%";;
+
+
+ let encode s =
+ Str.global_substitute
+ url_encoding_re
+ (fun r _ ->
+ match Str.matched_string r s with
+ " " -> "+"
+ | x ->
+ let k = Char.code(x.[0]) in
+ "%" ^ to_hex2 k
+ )
+ s ;;
+
+
+ let decode s =
+ let l = String.length s in
+ Str.global_substitute
+ url_decoding_re
+ (fun r _ ->
+ match Str.matched_string r s with
+ | "+" -> " "
+ | _ ->
+ let i = Str.match_beginning r in
+ (* Assertion: s.[i] = '%' *)
+ if i+2 >= l then failwith "Cgi.decode";
+ let c1 = s.[i+1] in
+ let c2 = s.[i+2] in
+ begin
+ try
+ let k1 = of_hex1 c1 in
+ let k2 = of_hex1 c2 in
+ String.make 1 (Char.chr((k1 lsl 4) lor k2))
+ with
+ Not_found ->
+ failwith "Cgi.decode"
+ end
+ )
+ s ;;
+
+end
+
+
+module Html = struct
+
+ let eref_re =
+ Str.regexp
+ "&\\(#\\([0-9]+\\);\\|\\([a-zA-Z]+\\);\\)" ;;
+ let unsafe_re = Str.regexp "[<>&\"\000-\008\011-\012\014-\031\127-\255]" ;;
+
+ let etable =
+ [ "lt", "<";
+ "gt", ">";
+ "amp", "&";
+ "quot", "\"";
+ (* Note: " is new in HTML-4.0, but it has been widely used
+ * much earlier.
+ *)
+ "nbsp", "\160";
+ "iexcl", "\161";
+ "cent", "\162";
+ "pound", "\163";
+ "curren", "\164";
+ "yen", "\165";
+ "brvbar", "\166";
+ "sect", "\167";
+ "uml", "\168";
+ "copy", "\169";
+ "ordf", "\170";
+ "laquo", "\171";
+ "not", "\172";
+ "shy", "\173";
+ "reg", "\174";
+ "macr", "\175";
+ "deg", "\176";
+ "plusmn", "\177";
+ "sup2", "\178";
+ "sup3", "\179";
+ "acute", "\180";
+ "micro", "\181";
+ "para", "\182";
+ "middot", "\183";
+ "cedil", "\184";
+ "sup1", "\185";
+ "ordm", "\186";
+ "raquo", "\187";
+ "frac14", "\188";
+ "frac12", "\189";
+ "frac34", "\190";
+ "iquest", "\191";
+ "Agrave", "\192";
+ "Aacute", "\193";
+ "Acirc", "\194";
+ "Atilde", "\195";
+ "Auml", "\196";
+ "Aring", "\197";
+ "AElig", "\198";
+ "Ccedil", "\199";
+ "Egrave", "\200";
+ "Eacute", "\201";
+ "Ecirc", "\202";
+ "Euml", "\203";
+ "Igrave", "\204";
+ "Iacute", "\205";
+ "Icirc", "\206";
+ "Iuml", "\207";
+ "ETH", "\208";
+ "Ntilde", "\209";
+ "Ograve", "\210";
+ "Oacute", "\211";
+ "Ocirc", "\212";
+ "Otilde", "\213";
+ "Ouml", "\214";
+ "times", "\215";
+ "Oslash", "\216";
+ "Ugrave", "\217";
+ "Uacute", "\218";
+ "Ucirc", "\219";
+ "Uuml", "\220";
+ "Yacute", "\221";
+ "THORN", "\222";
+ "szlig", "\223";
+ "agrave", "\224";
+ "aacute", "\225";
+ "acirc", "\226";
+ "atilde", "\227";
+ "auml", "\228";
+ "aring", "\229";
+ "aelig", "\230";
+ "ccedil", "\231";
+ "egrave", "\232";
+ "eacute", "\233";
+ "ecirc", "\234";
+ "euml", "\235";
+ "igrave", "\236";
+ "iacute", "\237";
+ "icirc", "\238";
+ "iuml", "\239";
+ "eth", "\240";
+ "ntilde", "\241";
+ "ograve", "\242";
+ "oacute", "\243";
+ "ocirc", "\244";
+ "otilde", "\245";
+ "ouml", "\246";
+ "divide", "\247";
+ "oslash", "\248";
+ "ugrave", "\249";
+ "uacute", "\250";
+ "ucirc", "\251";
+ "uuml", "\252";
+ "yacute", "\253";
+ "thorn", "\254";
+ "yuml", "\255";
+ ] ;;
+
+ let quick_etable =
+ let ht = Hashtbl.create 50 in
+ List.iter (fun (name,value) -> Hashtbl.add ht name value) etable;
+ (* Entities to be decoded, but that must not be encoded: *)
+ Hashtbl.add ht "apos" "'"; (* used in XML documents *)
+ ht ;;
+
+ let rev_etable =
+ let a = Array.create 256 "" in
+ List.iter (fun (name,value) ->
+ a.(Char.code(value.[0])) <- "&" ^ name ^ ";") etable;
+ for i = 0 to 8 do
+ a.(i) <- "" ^ string_of_int i ^ ";"
+ done;
+ for i = 11 to 12 do
+ a.(i) <- "" ^ string_of_int i ^ ";"
+ done;
+ for i = 14 to 31 do
+ a.(i) <- "" ^ string_of_int i ^ ";"
+ done;
+ for i = 127 to 159 do
+ a.(i) <- "" ^ string_of_int i ^ ";"
+ done;
+ a ;;
+
+ let decode_to_latin1 s =
+ Str.global_substitute
+ eref_re
+ (fun r _ ->
+ let t = Str.matched_string r s in
+ try
+ let n = int_of_string(Str.matched_group r 2 s) in
+ if n < 256 then
+ String.make 1 (Char.chr n)
+ else
+ t
+ with
+ Not_found ->
+ try
+ let name = Str.matched_group r 3 s in
+ try
+ Hashtbl.find quick_etable name
+ with
+ Not_found ->
+ t
+ with
+ Not_found -> assert false
+ )
+ s ;;
+
+ let encode_from_latin1 s =
+ Str.global_substitute
+ unsafe_re
+ (fun r _ ->
+ let t = Str.matched_string r s in
+ let i = Char.code (t.[0]) in
+ rev_etable.(i)
+ )
+ s ;;
+end
+
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.5 2000/06/25 22:34:43 gerd
+ * Added labels to arguments.
+ *
+ * Revision 1.4 2000/06/25 21:15:48 gerd
+ * Checked thread-safety.
+ *
+ * Revision 1.3 2000/03/03 17:03:16 gerd
+ * Q encoding: CR and LF are quoted.
+ *
+ * Revision 1.2 2000/03/03 01:08:29 gerd
+ * Added Netencoding.Html functions.
+ *
+ * Revision 1.1 2000/03/02 01:14:48 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netencoding.mli b/helm/DEVEL/pxp/netstring/netencoding.mli
new file mode 100644
index 000000000..6466572b3
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/netencoding.mli
@@ -0,0 +1,271 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(**********************************************************************)
+(* Several encodings important for the net *)
+(**********************************************************************)
+
+
+(**********************************************************************)
+(* Base 64 encoding *)
+(**********************************************************************)
+
+(* See RFC 2045 for a description of Base 64 encoding. *)
+
+(* THREAD-SAFETY:
+ * All Base64 functions are reentrant and thus thread-safe.
+ *)
+
+module Base64 : sig
+
+ val encode : ?pos:int -> ?len:int -> ?linelength:int -> ?crlf:bool ->
+ string -> string
+ (* Compute the "base 64" encoding of the given string argument.
+ * Note that the result is a string that only contains the characters
+ * a-z, A-Z, 0-9, +, /, =, and optionally spaces, CR and LF characters.
+ *
+ * If pos and/or len are passed, only the substring starting at
+ * pos (default: 0) with length len (default: rest of the string)
+ * is encoded.
+ *
+ * The result is divided up into lines not longer than 'linelength'
+ * (without counting the line separator); default: do not divide lines.
+ * If 'linelength' is smaller than 4, no line division is performed.
+ * If 'linelength' is not divisible by 4, the produced lines are a
+ * bit shorter than 'linelength'.
+ *
+ * If 'crlf' (default: false) the lines are ended by CRLF; otherwise
+ * they are only ended by LF.
+ * (You need the crlf option to produce correct MIME messages.)
+ *
+ *)
+
+ val url_encode : ?pos:int -> ?len:int -> ?linelength:int -> ?crlf:bool ->
+ string -> string
+ (* Same as 'encode' but use slightly different characters that can be
+ * part of URLs without additional encodings.
+ * The encoded string consists only of the characters a-z, A-Z, 0-9,
+ * -, /, .
+ * 'url_encode' does NOT implement the Base 64 encoding as described
+ * in the standard!
+ *)
+
+ val encode_substring : string -> pos:int -> len:int -> linelength:int ->
+ crlf:bool -> string
+ (* *** DEPRECATED FUNCTION *** Use 'encode' instead! ***
+ *
+ * encode_substring s pos len linelen crlf:
+ * Encodes the substring at position 'pos' in 's' with length 'len'.
+ * The result is divided up into lines not longer than 'linelen' (without
+ * counting the line separator).
+ * If 'linelen' is smaller than 4, no line division is performed.
+ * If 'linelen' is not divisible by 4, the produced lines are a
+ * bit shorter than 'linelen'.
+ * If 'crlf' the lines are ended by CRLF; otherwise they are only
+ * ended by LF.
+ * (You need the crlf option to produce correct MIME messages.)
+ *)
+
+ val decode : ?pos:int -> ?len:int -> ?url_variant:bool ->
+ ?accept_spaces:bool -> string -> string
+ (* Decodes the given string argument.
+ *
+ * If pos and/or len are passed, only the substring starting at
+ * pos (default: 0) with length len (default: rest of the string)
+ * is decoded.
+ *
+ * If url_variant (default: true) is set, the functions also
+ * accepts the characters '-' and '.' as produced by 'url_encode'.
+ *
+ * If accept_spaces (default: false) is set, the function ignores
+ * white space contained in the string to decode (otherwise the
+ * function fails if it finds white space).
+ *)
+
+ val decode_ignore_spaces : string -> string
+ (* *** DEPRECATED FUNCTION *** Use 'decode' instead! ***
+ *
+ * Decodes the string, too, but it is allowed that the string contains
+ * whitespace characters.
+ * This function is slower than 'decode'.
+ *)
+
+ val decode_substring : string -> pos:int -> len:int -> url_variant:bool ->
+ accept_spaces:bool -> string
+ (* *** DEPRECATED FUNCTION *** Use 'decode' instead! ***
+ *
+ * decode_substring s pos len url spaces:
+ * Decodes the substring of 's' beginning at 'pos' with length 'len'.
+ * If 'url', strings created by 'url_encode' are accepted, too.
+ * If 'spaces', whitespace characters are allowed in the string.
+ *)
+end
+
+(**********************************************************************)
+(* Quoted printable encoding *)
+(**********************************************************************)
+
+(* See RFC 2045.
+ * This implementation assumes that the encoded string has a text MIME
+ * type. Because of this, the characters CR and LF are never protected
+ * by hex tokens; they are copied literally to the output string.
+ *)
+
+(* THREAD-SAFETY:
+ * All QuotedPrintable functions are reentrant and thus thread-safe.
+ *)
+
+module QuotedPrintable :
+ sig
+ val encode : ?pos:int -> ?len:int -> string -> string
+ (* Encodes the string and returns it.
+ * Note line breaks:
+ * No additional soft line breaks are added. The characters CR
+ * and LF are not represented as =0D resp. =0A. (But other control
+ * characters ARE encoded.)
+ * Note unsafe characters:
+ * As recommended by RFC 2045, the characters !\"#$@[]^`{|}~
+ * are additionally represented as hex tokens. -- "
+ *
+ * If pos and/or len are passed, only the substring starting at
+ * pos (default: 0) with length len (default: rest of the string)
+ * is encoded.
+ *)
+
+ val encode_substring : string -> pos:int -> len:int -> string
+ (* *** DEPRECATED FUNCTION *** Use 'encode' instead! ***
+ * encode_substring s pos len:
+ * Encodes the substring of 's' beginning at 'pos' with length 'len'.
+ *)
+
+ val decode : ?pos:int -> ?len:int -> string -> string
+ (* Decodes the string and returns it.
+ * Most format errors cause an Invalid_argument exception.
+ * Note that soft line breaks can be properly decoded although
+ * 'encode' will never produce them.
+ *
+ * If pos and/or len are passed, only the substring starting at
+ * pos (default: 0) with length len (default: rest of the string)
+ * is decoded.
+ *)
+
+ val decode_substring : string -> pos:int -> len:int -> string
+ (* *** DEPRECATED FUNCTION *** Use 'decode' instead! ***
+ * decode_substring s pos len:
+ * Decodes the substring of 's' beginning at 'pos' with length 'len'.
+ *)
+
+ end
+
+(**********************************************************************)
+(* Q encoding *)
+(**********************************************************************)
+
+(* See RFC 2047.
+ * The functions behave similar to those of QuotedPrintable.
+ *)
+
+(* THREAD-SAFETY:
+ * All Q functions are reentrant and thus thread-safe.
+ *)
+
+module Q :
+ sig
+ val encode : ?pos:int -> ?len:int -> string -> string
+ (* Note:
+ * All characters except alphanumeric characters are protected by
+ * hex tokens.
+ * In particular, spaces are represented as "=20", not as "_".
+ *)
+
+ val decode : ?pos:int -> ?len:int -> string -> string
+
+ val encode_substring : string -> pos:int -> len:int -> string
+ (* *** DEPRECATED FUNCTION *** Use 'encode' instead! *** *)
+
+ val decode_substring : string -> pos:int -> len:int -> string
+ (* *** DEPRECATED FUNCTION *** Use 'decode' instead! *** *)
+ end
+
+(**********************************************************************)
+(* B encoding *)
+(**********************************************************************)
+
+(* The B encoding of RFC 2047 is the same as Base64. *)
+
+
+(**********************************************************************)
+(* URL-encoding *)
+(**********************************************************************)
+
+(* Encoding/Decoding within URLs:
+ *
+ * The following two functions perform the '%'-substitution for
+ * characters that may otherwise be interpreted as metacharacters.
+ *
+ * According to: RFC 1738, RFC 1630
+ *)
+
+(* THREAD-SAFETY:
+ * The Url functions are thread-safe.
+ *)
+
+module Url :
+ sig
+ val decode : string -> string
+ val encode : string -> string
+ end
+
+
+(**********************************************************************)
+(* HTMLization *)
+(**********************************************************************)
+
+(* Encodes characters that need protection by converting them to
+ * entity references. E.g. "<" is converted to "<".
+ * As the entities may be named, there is a dependency on the character
+ * set. Currently, there are only functions for the Latin 1 alphabet.
+ *)
+
+(* THREAD-SAFETY:
+ * The Html functions are thread-safe.
+ *)
+
+module Html :
+ sig
+ val encode_from_latin1 : string -> string
+ (* Encodes the characters 0-8, 11-12, 14-31, '<', '>', '"', '&',
+ * 127-255. If the characters have a name, a named entity is
+ * preferred over a numeric entity.
+ *)
+ val decode_to_latin1 : string -> string
+ (* Decodes the string. Unknown named entities are left as they
+ * are (i.e. decode_to_latin1 "&nonsense;" = "&nonsense;").
+ * The same applies to numeric entities greater than 255.
+ *)
+ end
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.4 2000/06/25 22:34:43 gerd
+ * Added labels to arguments.
+ *
+ * Revision 1.3 2000/06/25 21:15:48 gerd
+ * Checked thread-safety.
+ *
+ * Revision 1.2 2000/03/03 01:08:29 gerd
+ * Added Netencoding.Html functions.
+ *
+ * Revision 1.1 2000/03/02 01:14:48 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/nethtml.ml b/helm/DEVEL/pxp/netstring/nethtml.ml
new file mode 100644
index 000000000..7f9d983cd
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/nethtml.ml
@@ -0,0 +1,276 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Nethtml_scanner;;
+
+type document =
+ Element of (string * (string*string) list * document list)
+ | Data of string
+;;
+
+
+exception End_of_scan;;
+
+
+let no_end_tag = (* empty HTML elements *)
+ ref
+ [ "isindex";
+ "base";
+ "meta";
+ "link";
+ "hr";
+ "input";
+ "img";
+ "param";
+ "basefont";
+ "br";
+ "area";
+ ]
+;;
+
+
+let special_tag = (* other lexical rules *)
+ ref
+ [ "script";
+ "style";
+ ]
+;;
+
+
+let rec parse_comment buf =
+ let t = scan_comment buf in
+ match t with
+ Mcomment ->
+ parse_comment buf
+ | Eof ->
+ raise End_of_scan
+ | _ ->
+ ()
+;;
+
+
+let rec parse_doctype buf =
+ let t = scan_doctype buf in
+ match t with
+ Mdoctype ->
+ parse_doctype buf
+ | Eof ->
+ raise End_of_scan
+ | _ ->
+ ()
+;;
+
+
+let parse_document buf =
+ let current_name = ref "" in
+ let current_atts = ref [] in
+ let current_subs = ref [] in
+ let stack = Stack.create() in
+
+ let parse_atts() =
+ let rec next_no_space() =
+ match scan_element buf with
+ Space _ -> next_no_space()
+ | t -> t
+ in
+
+ let rec parse_atts_lookahead next =
+ match next with
+ Relement -> []
+ | Name n ->
+ begin match next_no_space() with
+ Is ->
+ begin match next_no_space() with
+ Name v ->
+ (String.lowercase n, String.uppercase v) ::
+ parse_atts_lookahead (next_no_space())
+ | Literal v ->
+ (String.lowercase n,v) ::
+ parse_atts_lookahead (next_no_space())
+ | Eof ->
+ raise End_of_scan
+ | Relement ->
+ (* Illegal *)
+ []
+ | _ ->
+ (* Illegal *)
+ parse_atts_lookahead (next_no_space())
+ end
+ | Eof ->
+ raise End_of_scan
+ | Relement ->
+ (* <==> *)
+ [ String.lowercase n, String.lowercase n ]
+ | next' ->
+ (* assume <==> *)
+ ( String.lowercase n, String.lowercase n ) ::
+ parse_atts_lookahead next'
+ end
+ | Eof ->
+ raise End_of_scan
+ | _ ->
+ (* Illegal *)
+ parse_atts_lookahead (next_no_space())
+ in
+ parse_atts_lookahead (next_no_space())
+ in
+
+ let rec parse_special name =
+ (* Parse until *)
+ match scan_special buf with
+ Lelementend n ->
+ if n = name then
+ ""
+ else
+ "" ^ n ^ parse_special name
+ | Eof ->
+ raise End_of_scan
+ | Cdata s ->
+ s ^ parse_special name
+ | _ ->
+ (* Illegal *)
+ parse_special name
+ in
+
+ let rec skip_element() =
+ (* Skip until ">" *)
+ match scan_element buf with
+ Relement ->
+ ()
+ | Eof ->
+ raise End_of_scan
+ | _ ->
+ skip_element()
+ in
+
+ let rec parse_next() =
+ let t = scan_document buf in
+ match t with
+ Lcomment ->
+ parse_comment buf;
+ parse_next()
+ | Ldoctype ->
+ parse_doctype buf;
+ parse_next()
+ | Lelement name ->
+ let name = String.lowercase name in
+ if List.mem name !no_end_tag then begin
+ let atts = parse_atts() in
+ current_subs := (Element(name, atts, [])) :: !current_subs;
+ parse_next()
+ end
+ else if List.mem name !special_tag then begin
+ let atts = parse_atts() in
+ let data = parse_special name in
+ (* Read until ">" *)
+ skip_element();
+ current_subs := (Element(name, atts, [Data data])) :: !current_subs;
+ parse_next()
+ end
+ else begin
+ let atts = parse_atts() in
+ Stack.push (!current_name, !current_atts, !current_subs) stack;
+ current_name := name;
+ current_atts := atts;
+ current_subs := [];
+ parse_next()
+ end
+ | Cdata data ->
+ current_subs := (Data data) :: !current_subs;
+ parse_next()
+ | Lelementend name ->
+ let name = String.lowercase name in
+ (* Read until ">" *)
+ skip_element();
+ (* Search the element to close on the stack: *)
+ let found = ref (name = !current_name) in
+ Stack.iter
+ (fun (old_name, _, _) ->
+ if name = old_name then found := true)
+ stack;
+ (* If not found, the end tag is wrong. Simply ignore it. *)
+ if not !found then
+ parse_next()
+ else begin
+ (* Put the current element on to the stack: *)
+ Stack.push (!current_name, !current_atts, !current_subs) stack;
+ (* If found: Remove the elements from the stack, and append
+ * them to the previous element as sub elements
+ *)
+ let rec remove() =
+ let old_name, old_atts, old_subs = Stack.pop stack in
+ (* or raise Stack.Empty *)
+ if old_name = name then
+ old_name, old_atts, old_subs
+ else
+ let older_name, older_atts, older_subs = remove() in
+ older_name,
+ older_atts,
+ (Element (old_name, old_atts, List.rev old_subs) :: older_subs)
+ in
+ let old_name, old_atts, old_subs = remove() in
+ (* Remove one more element: the element containing the element
+ * currently being closed.
+ *)
+ let new_name, new_atts, new_subs = Stack.pop stack in
+ current_name := new_name;
+ current_atts := new_atts;
+ current_subs := (Element (old_name, old_atts, List.rev old_subs))
+ :: new_subs;
+ (* Go on *)
+ parse_next()
+ end
+ | Eof ->
+ raise End_of_scan
+ | _ ->
+ parse_next()
+ in
+ try
+ parse_next();
+ List.rev !current_subs
+ with
+ End_of_scan ->
+ (* Close all remaining elements: *)
+ Stack.push (!current_name, !current_atts, !current_subs) stack;
+ let rec remove() =
+ let old_name, old_atts, old_subs = Stack.pop stack in
+ (* or raise Stack.Empty *)
+ try
+ let older_name, older_atts, older_subs = remove() in
+ older_name,
+ older_atts,
+ (Element (old_name, old_atts, List.rev old_subs) :: older_subs)
+ with
+ Stack.Empty ->
+ old_name, old_atts, old_subs
+ in
+ let name, atts, subs = remove() in
+ List.rev subs
+;;
+
+
+let parse_string s =
+ let buf = Lexing.from_string s in
+ parse_document buf
+;;
+
+
+let parse_file fd =
+ let buf = Lexing.from_channel fd in
+ parse_document buf
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/03/03 01:07:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/nethtml.mli b/helm/DEVEL/pxp/netstring/nethtml.mli
new file mode 100644
index 000000000..d7af381cc
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/nethtml.mli
@@ -0,0 +1,72 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+(* The type 'document' represents parsed HTML documents.
+ * Element (name, args, subnodes): is an element node for an element of
+ * type 'name' (i.e. written ... ) with arguments 'args'
+ * and subnodes 'subnodes' (the material within the element). The arguments
+ * are simply name/value pairs. Entity references (something like %xy;)
+ * occuring in the values are NOT resolved.
+ * Arguments without values (e.g. : here,
+ * "multiple" is such an argument) are represented as (name,name), i.e. the
+ * name is returned as value.
+ * As argument names are case-insensitive, the names are all lowercase.
+ * Data s: is a character data node. Again, entity references are contained
+ * as such and not as what they mean.
+ *)
+
+type document =
+ Element of (string * (string*string) list * document list)
+ | Data of string
+;;
+
+
+val no_end_tag : string list ref;;
+ (* List of tags which are always empty. This variable is pre-configured,
+ * but you may want to change it.
+ * It is important to know which elements are always empty, because HTML
+ * allows it to omit the end tag for them. For example,
+ * x is parsed as
+ * Element("a",[],[ Element("b",[],[]); Data "x" ])
+ * if we know that "a" is an empty element, but it is wrongly parsed as
+ * Element("a",[],[ Element("b",[], [ Data "x"]) ])
+ * if "a" is actually empty but we do not know it.
+ * An example of such a tag is "br".
+ *)
+
+val special_tag : string list ref;;
+ (* List of tags with a special rule for recognizing the end.
+ * This variable is pre-configured, but you may want to change it.
+ * The special rule is that the metacharacters '<', '>' and so on lose
+ * their meaning within the element, and that only the corresponding
+ * end tag stops this kind of scanning. An example is the element
+ * "javascript". Inner elements are not recognized, and the element
+ * can only be ended by . (Other elements are also ended
+ * if an embracing element ends, e.g. "j" in !)
+ *
+ * Note that comments are not recognized within special elements;
+ * comments are returned as character material.
+ *)
+
+val parse_string : string -> document list
+ (* Parses the HTML document from a string and returns it *)
+
+val parse_file : in_channel -> document list
+ (* Parses the HTML document from a file and returns it *)
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/03/03 01:07:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/nethtml_scanner.mll b/helm/DEVEL/pxp/netstring/nethtml_scanner.mll
new file mode 100644
index 000000000..03e6dea0e
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/nethtml_scanner.mll
@@ -0,0 +1,128 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+{
+ type token =
+ Lcomment
+ | Rcomment
+ | Mcomment
+ | Ldoctype
+ | Rdoctype
+ | Mdoctype
+ | Lelement of string
+ | Lelementend of string
+ | Relement
+ | Cdata of string
+ | Space of int
+ | Name of string
+ | Is
+ | Literal of string
+ | Other
+ | Eof
+}
+
+(* Simplified rules: Only Latin-1 is recognized as character set *)
+
+let letter = ['A'-'Z' 'a'-'z' '\192'-'\214' '\216'-'\246' '\248'-'\255']
+let extender = '\183'
+let digit = ['0'-'9']
+let hexdigit = ['0'-'9' 'A'-'F' 'a'-'f']
+let namechar = letter | digit | '.' | ':' | '-' | '_' | extender
+let name = ( letter | '_' | ':' ) namechar*
+let nmtoken = namechar+
+let ws = [ ' ' '\t' '\r' '\n' ]
+let string_literal1 = '"' [^ '"' '>' '<' '\n']* '"'
+let string_literal2 = "'" [^ '\'' '>' '<' '\n']* "'"
+
+
+(* This following rules reflect HTML as it is used, not the SGML
+ * rules.
+ *)
+
+rule scan_document = parse
+ | ""
+ { Rcomment }
+ | "-"
+ { Mcomment }
+ | eof
+ { Eof }
+ | [^ '-']+
+ { Mcomment }
+
+and scan_doctype = parse
+ | ">" (* Occurence in strings, and [ ] brackets ignored *)
+ { Rdoctype }
+ | eof
+ { Eof }
+ | [^ '>' ] +
+ { Mdoctype }
+
+and scan_element = parse
+ | ">"
+ { Relement }
+ | ws+
+ { Space (String.length (Lexing.lexeme lexbuf)) }
+ | name
+ { Name (Lexing.lexeme lexbuf) }
+ | "="
+ { Is }
+ | string_literal1
+ { let s = Lexing.lexeme lexbuf in
+ Literal (String.sub s 1 (String.length s - 2))
+ }
+ | string_literal2
+ { let s = Lexing.lexeme lexbuf in
+ Literal (String.sub s 1 (String.length s - 2))
+ }
+ | eof
+ { Eof }
+ | _
+ { Other }
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/03/03 01:07:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netmappings.ml b/helm/DEVEL/pxp/netstring/netmappings.ml
new file mode 100644
index 000000000..4821350ae
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/netmappings.ml
@@ -0,0 +1,38 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+type from_uni_list =
+ U_nil
+ | U_single of (int*int)
+ | U_list of (int*int) list
+;;
+
+let to_unicode = Hashtbl.create 50;;
+
+let from_unicode = Hashtbl.create 50;;
+
+let f_lock = ref (fun () -> ());;
+let f_unlock = ref (fun () -> ());;
+
+let lock () = !f_lock();;
+let unlock () = !f_unlock();;
+
+let init_mt new_f_lock new_f_unlock =
+ f_lock := new_f_lock;
+ f_unlock := new_f_unlock
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/08/28 23:17:54 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netmappings.mli b/helm/DEVEL/pxp/netstring/netmappings.mli
new file mode 100644
index 000000000..1c52d0729
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/netmappings.mli
@@ -0,0 +1,115 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+type from_uni_list =
+ U_nil
+ | U_single of (int*int)
+ | U_list of (int*int) list
+;;
+ (* A representation of (int*int) list that is optimized for the case that
+ * lists with 0 and 1 elements are the most frequent cases.
+ *)
+
+
+val to_unicode : (Netconversion.encoding,
+ int array Lazy.t) Hashtbl.t;;
+
+val from_unicode : (Netconversion.encoding,
+ from_uni_list array Lazy.t) Hashtbl.t;;
+ (* These hashtables are used internally by the parser to store
+ * the conversion tables from 8 bit encodings to Unicode and vice versa.
+ * It is normally not necessary to access these tables; the
+ * Netconversion module does it already for you.
+ *
+ * Specification of the conversion tables:
+ *
+ * to_unicode: maps an 8 bit code to Unicode, i.e.
+ * let m = Hashtbl.find `Enc_isoXXX to_unicode in
+ * let unicode = m.(isocode)
+ * - This may be (-1) to indicate that the code point is not defined.
+ *
+ * from_unicode: maps Unicode to an 8 bit code, i.e.
+ * let m = Hashtbl.find `Enc_isoXXX from_unicode in
+ * let l = m.(unicode land 255)
+ * Now search in l the pair (unicode, isocode), and return isocode.
+ *
+ * Note: It is guaranteed that both arrays have always 256 elements.
+ *)
+
+val lock : unit -> unit
+ (* In multi-threaded applications: obtains a lock which is required to
+ * Lazy.force the values found in to_unicode and from_unicode.
+ * In single-threaded applications: a NO-OP
+ *)
+
+val unlock : unit -> unit
+ (* In multi-threaded applications: releases the lock which is required to
+ * Lazy.force the values found in to_unicode and from_unicode.
+ * In single-threaded applications: a NO-OP
+ *)
+
+
+val init_mt : (unit -> unit) -> (unit -> unit) -> unit
+ (* Internally used; see netstring_mt.ml *)
+
+
+(* ---------------------------------------- *)
+
+(* The following comment was written when the conversion module belonged
+ * to the PXP package (Polymorhic XML Parser).
+ *)
+
+(* HOW TO ADD A NEW 8 BIT CODE:
+ *
+ * It is relatively simple to add a new 8 bit code to the system. This
+ * means that the parser can read and write files with the new encoding;
+ * this does not mean that the parser can represent the XML tree internally
+ * by the new encoding.
+ *
+ * - Put a new unimap file into the "mappings" directory. The file format
+ * is simple; please look at the already existing files.
+ * The name of the file determines the internal name of the code:
+ * If the file is called .unimap, the code will be called
+ * `Enc_.
+ *
+ * - Extend the type "encoding" in pxp_types.mli and pxp_types.ml
+ *
+ * - Extend the two functions encoding_of_string and string_of_encoding
+ * in pxp_types.ml
+ *
+ * - Recompile the parser
+ *
+ * Every encoding consumes at least 3kB of memory, but this may be much more
+ * if the code points are dispersed on the Unicode code space.
+ *
+ * Perhaps the addition of new codes will become even simpler in future
+ * versions of PXP; but it is currently more important to support
+ * non-8-bit codes, too.
+ * Every contribution of new codes to PXP is welcome!
+ *)
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/08/29 00:47:24 gerd
+ * New type for the conversion Unicode to 8bit.
+ * Conversion tables are now lazy. Thus also mutexes are required.
+ *
+ * Revision 1.1 2000/08/13 00:02:57 gerd
+ * Initial revision.
+ *
+ *
+ * ======================================================================
+ * OLD LOGS FROM THE PXP PACKAGE (FILE NAME pxp_mappings.mli):
+ *
+ * Revision 1.1 2000/07/27 00:40:02 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netmappings_iso.ml b/helm/DEVEL/pxp/netstring/netmappings_iso.ml
new file mode 100644
index 000000000..9b86aaefd
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/netmappings_iso.ml
@@ -0,0 +1,54 @@
+(* WARNING! This is a generated file! *)
+let iso88591_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\000\161\001\000\162\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\000\170\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\000\186\001\000\187\001\000\188\001\000\189\001\000\190\001\000\191\001\000\192\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\000\204\001\000\205\001\000\206\001\000\207\001\000\208\001\000\209\001\000\210\001\000\211\001\000\212\001\000\213\001\000\214\001\000\215\001\000\216\001\000\217\001\000\218\001\000\219\001\000\220\001\000\221\001\000\222\001\000\223\001\000\224\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\000\236\001\000\237\001\000\238\001\000\239\001\000\240\001\000\241\001\000\242\001\000\243\001\000\244\001\000\245\001\000\246\001\000\247\001\000\248\001\000\249\001\000\250\001\000\251\001\000\252\001\000\253\001\000\254\001\000\255" 0 : int array);;
+let iso88591_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\133\000\000\000\000\000\000\006\001\000\000\006\001\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\144\160[[\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160\144\160\001\000\161\001\000\161\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169\144\160\001\000\170\001\000\170\144\160\001\000\171\001\000\171\144\160\001\000\172\001\000\172\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\175\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184\144\160\001\000\185\001\000\185\144\160\001\000\186\001\000\186\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190\144\160\001\000\191\001\000\191\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\198\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203\144\160\001\000\204\001\000\204\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207\144\160\001\000\208\001\000\208\144\160\001\000\209\001\000\209\144\160\001\000\210\001\000\210\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\216\144\160\001\000\217\001\000\217\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\144\160\001\000\220\001\000\220\144\160\001\000\221\001\000\221\144\160\001\000\222\001\000\222\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239\144\160\001\000\240\001\000\240\144\160\001\000\241\001\000\241\144\160\001\000\242\001\000\242\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\248\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252\144\160\001\000\253\001\000\253\144\160\001\000\254\001\000\254\144\160\001\000\255\001\000\255" 0 : Netmappings.from_uni_list array);;
+ let iso885910_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\001\004\001\001\018\001\001\"\001\001*\001\001(\001\0016\001\000\167\001\001;\001\001\016\001\001`\001\001f\001\001}\001\000\173\001\001j\001\001J\001\000\176\001\001\005\001\001\019\001\001#\001\001+\001\001)\001\0017\001\000\183\001\001<\001\001\017\001\001a\001\001g\001\001~\001 \021\001\001k\001\001K\001\001\000\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\001.\001\001\012\001\000\201\001\001\024\001\000\203\001\001\022\001\000\205\001\000\206\001\000\207\001\000\208\001\001E\001\001L\001\000\211\001\000\212\001\000\213\001\000\214\001\001h\001\000\216\001\001r\001\000\218\001\000\219\001\000\220\001\000\221\001\000\222\001\000\223\001\001\001\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\001/\001\001\013\001\000\233\001\001\025\001\000\235\001\001\023\001\000\237\001\000\238\001\000\239\001\000\240\001\001F\001\001M\001\000\243\001\000\244\001\000\245\001\000\246\001\001i\001\000\248\001\001s\001\000\250\001\000\251\001\000\252\001\000\253\001\000\254\001\0018" 0 : int array);;
+let iso885910_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\015\000\000\000\000\000\000\006\185\000\000\006\185\008\000\004\000\000\145\160\160@@\160\160\001\001\000\001\000\192@\145\160\160AA\160\160\001\001\001\001\000\224@\144\160BB\144\160CC\145\160\160DD\160\160\001\001\004\001\000\161@\145\160\160EE\160\160\001\001\005\001\000\177@\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\200@\145\160\160MM\160\160\001\001\013\001\000\232@\144\160NN\144\160OO\145\160\160PP\160\160\001\001\016\001\000\169@\145\160\160QQ\160\160\001\001\017\001\000\185@\145\160\160RR\160\160\001\001\018\001\000\162@\145\160\160SS\160\160\001\001\019\001\000\178@\144\160TT\145\160\160UU\160\160\001 \021\001\000\189@\145\160\160VV\160\160\001\001\022\001\000\204@\145\160\160WW\160\160\001\001\023\001\000\236@\145\160\160XX\160\160\001\001\024\001\000\202@\145\160\160YY\160\160\001\001\025\001\000\234@\144\160ZZ\144\160[[\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\145\160\160bb\160\160\001\001\"\001\000\163@\145\160\160cc\160\160\001\001#\001\000\179@\144\160dd\144\160ee\144\160ff\144\160gg\145\160\160hh\160\160\001\001(\001\000\165@\145\160\160ii\160\160\001\001)\001\000\181@\145\160\160jj\160\160\001\001*\001\000\164@\145\160\160kk\160\160\001\001+\001\000\180@\144\160ll\144\160mm\145\160\160nn\160\160\001\001.\001\000\199@\145\160\160oo\160\160\001\001/\001\000\231@\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\145\160\160vv\160\160\001\0016\001\000\166@\145\160\160ww\160\160\001\0017\001\000\182@\145\160\160xx\160\160\001\0018\001\000\255@\144\160yy\144\160zz\145\160\160{{\160\160\001\001;\001\000\168@\145\160\160||\160\160\001\001<\001\000\184@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\145\160\160\000E\000E\160\160\001\001E\001\000\209@\145\160\160\000F\000F\160\160\001\001F\001\000\241@\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\145\160\160\000J\000J\160\160\001\001J\001\000\175@\145\160\160\000K\000K\160\160\001\001K\001\000\191@\145\160\160\000L\000L\160\160\001\001L\001\000\210@\145\160\160\000M\000M\160\160\001\001M\001\000\242@\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\001`\001\000\170@\145\160\160\000a\000a\160\160\001\001a\001\000\186@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001\001f\001\000\171@\145\160\160\000g\000g\160\160\001\001g\001\000\187@\145\160\160\000h\000h\160\160\001\001h\001\000\215@\145\160\160\000i\000i\160\160\001\001i\001\000\247@\145\160\160\000j\000j\160\160\001\001j\001\000\174@\145\160\160\000k\000k\160\160\001\001k\001\000\190@\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\145\160\160\000r\000r\160\160\001\001r\001\000\217@\145\160\160\000s\000s\160\160\001\001s\001\000\249@\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\145\160\160\000}\000}\160\160\001\001}\001\000\172@\145\160\160\000~\000~\160\160\001\001~\001\000\188@\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@@@@@@\144\160\001\000\167\001\000\167@@@@@\144\160\001\000\173\001\000\173@@\144\160\001\000\176\001\000\176@@@@@@\144\160\001\000\183\001\000\183@@@@@@@@@\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\198@@\144\160\001\000\201\001\000\201@\144\160\001\000\203\001\000\203@\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207\144\160\001\000\208\001\000\208@@\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214@\144\160\001\000\216\001\000\216@\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\144\160\001\000\220\001\000\220\144\160\001\000\221\001\000\221\144\160\001\000\222\001\000\222\144\160\001\000\223\001\000\223@\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230@@\144\160\001\000\233\001\000\233@\144\160\001\000\235\001\000\235@\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239\144\160\001\000\240\001\000\240@@\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246@\144\160\001\000\248\001\000\248@\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252\144\160\001\000\253\001\000\253\144\160\001\000\254\001\000\254@" 0 : Netmappings.from_uni_list array);;
+ let iso885913_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001 \029\001\000\162\001\000\163\001\000\164\001 \030\001\000\166\001\000\167\001\000\216\001\000\169\001\001V\001\000\171\001\000\172\001\000\173\001\000\174\001\000\198\001\000\176\001\000\177\001\000\178\001\000\179\001 \028\001\000\181\001\000\182\001\000\183\001\000\248\001\000\185\001\001W\001\000\187\001\000\188\001\000\189\001\000\190\001\000\230\001\001\004\001\001.\001\001\000\001\001\006\001\000\196\001\000\197\001\001\024\001\001\018\001\001\012\001\000\201\001\001y\001\001\022\001\001\"\001\0016\001\001*\001\001;\001\001`\001\001C\001\001E\001\000\211\001\001L\001\000\213\001\000\214\001\000\215\001\001r\001\001A\001\001Z\001\001j\001\000\220\001\001{\001\001}\001\000\223\001\001\005\001\001/\001\001\001\001\001\007\001\000\228\001\000\229\001\001\025\001\001\019\001\001\013\001\000\233\001\001z\001\001\023\001\001#\001\0017\001\001+\001\001<\001\001a\001\001D\001\001F\001\000\243\001\001M\001\000\245\001\000\246\001\000\247\001\001s\001\001B\001\001[\001\001k\001\000\252\001\001|\001\001~\001 \025" 0 : int array);;
+let iso885913_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\031\000\000\000\000\000\000\006\206\000\000\006\206\008\000\004\000\000\145\160\160@@\160\160\001\001\000\001\000\194@\145\160\160AA\160\160\001\001\001\001\000\226@\144\160BB\144\160CC\145\160\160DD\160\160\001\001\004\001\000\192@\145\160\160EE\160\160\001\001\005\001\000\224@\145\160\160FF\160\160\001\001\006\001\000\195@\145\160\160GG\160\160\001\001\007\001\000\227@\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\200@\145\160\160MM\160\160\001\001\013\001\000\232@\144\160NN\144\160OO\144\160PP\144\160QQ\145\160\160RR\160\160\001\001\018\001\000\199@\145\160\160SS\160\160\001\001\019\001\000\231@\144\160TT\144\160UU\145\160\160VV\160\160\001\001\022\001\000\203@\145\160\160WW\160\160\001\001\023\001\000\235@\145\160\160XX\160\160\001\001\024\001\000\198@\145\160\160YY\160\160\001\001\025\001\000\230\160\160\001 \025\001\000\255@\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\180@\145\160\160]]\160\160\001 \029\001\000\161@\145\160\160^^\160\160\001 \030\001\000\165@\144\160__\144\160``\144\160aa\145\160\160bb\160\160\001\001\"\001\000\204@\145\160\160cc\160\160\001\001#\001\000\236@\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\145\160\160jj\160\160\001\001*\001\000\206@\145\160\160kk\160\160\001\001+\001\000\238@\144\160ll\144\160mm\145\160\160nn\160\160\001\001.\001\000\193@\145\160\160oo\160\160\001\001/\001\000\225@\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\145\160\160vv\160\160\001\0016\001\000\205@\145\160\160ww\160\160\001\0017\001\000\237@\144\160xx\144\160yy\144\160zz\145\160\160{{\160\160\001\001;\001\000\207@\145\160\160||\160\160\001\001<\001\000\239@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\145\160\160\000A\000A\160\160\001\001A\001\000\217@\145\160\160\000B\000B\160\160\001\001B\001\000\249@\145\160\160\000C\000C\160\160\001\001C\001\000\209@\145\160\160\000D\000D\160\160\001\001D\001\000\241@\145\160\160\000E\000E\160\160\001\001E\001\000\210@\145\160\160\000F\000F\160\160\001\001F\001\000\242@\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\145\160\160\000L\000L\160\160\001\001L\001\000\212@\145\160\160\000M\000M\160\160\001\001M\001\000\244@\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\145\160\160\000V\000V\160\160\001\001V\001\000\170@\145\160\160\000W\000W\160\160\001\001W\001\000\186@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001\001Z\001\000\218@\145\160\160\000[\000[\160\160\001\001[\001\000\250@\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\001`\001\000\208@\145\160\160\000a\000a\160\160\001\001a\001\000\240@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\145\160\160\000j\000j\160\160\001\001j\001\000\219@\145\160\160\000k\000k\160\160\001\001k\001\000\251@\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\145\160\160\000r\000r\160\160\001\001r\001\000\216@\145\160\160\000s\000s\160\160\001\001s\001\000\248@\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\202@\145\160\160\000z\000z\160\160\001\001z\001\000\234@\145\160\160\000{\000{\160\160\001\001{\001\000\221@\145\160\160\000|\000|\160\160\001\001|\001\000\253@\145\160\160\000}\000}\160\160\001\001}\001\000\222@\145\160\160\000~\000~\160\160\001\001~\001\000\254@\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164@\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167@\144\160\001\000\169\001\000\169@\144\160\001\000\171\001\000\171\144\160\001\000\172\001\000\172\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174@\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179@\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183@\144\160\001\000\185\001\000\185@\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190@@@@@\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\175@@\144\160\001\000\201\001\000\201@@@@@@@@@\144\160\001\000\211\001\000\211@\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\168@@@\144\160\001\000\220\001\000\220@@\144\160\001\000\223\001\000\223@@@@\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\191@@\144\160\001\000\233\001\000\233@@@@@@@@@\144\160\001\000\243\001\000\243@\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\184@@@\144\160\001\000\252\001\000\252@@@" 0 : Netmappings.from_uni_list array);;
+ let iso885914_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\030\002\001\030\003\001\000\163\001\001\n\001\001\011\001\030\n\001\000\167\001\030\128\001\000\169\001\030\130\001\030\011\001\030\242\001\000\173\001\000\174\001\001x\001\030\030\001\030\031\001\001 \001\001!\001\030@\001\030A\001\000\182\001\030V\001\030\129\001\030W\001\030\131\001\030`\001\030\243\001\030\132\001\030\133\001\030a\001\000\192\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\000\204\001\000\205\001\000\206\001\000\207\001\001t\001\000\209\001\000\210\001\000\211\001\000\212\001\000\213\001\000\214\001\030j\001\000\216\001\000\217\001\000\218\001\000\219\001\000\220\001\000\221\001\001v\001\000\223\001\000\224\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\000\236\001\000\237\001\000\238\001\000\239\001\001u\001\000\241\001\000\242\001\000\243\001\000\244\001\000\245\001\000\246\001\030k\001\000\248\001\000\249\001\000\250\001\000\251\001\000\252\001\000\253\001\001w\001\000\255" 0 : int array);;
+let iso885914_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\222\000\000\000\000\000\000\006w\000\000\006w\008\000\004\000\000\144\160@@\144\160AA\145\160\160BB\160\160\001\030\002\001\000\161@\145\160\160CC\160\160\001\030\003\001\000\162@\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\145\160\160JJ\160\160\001\001\n\001\000\164\160\160\001\030\n\001\000\166@\145\160\160KK\160\160\001\001\011\001\000\165\160\160\001\030\011\001\000\171@\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\144\160[[\144\160\\\\\144\160]]\145\160\160^^\160\160\001\030\030\001\000\176@\145\160\160__\160\160\001\030\031\001\000\177@\145\160\160``\160\160\001\001 \001\000\178@\145\160\160aa\160\160\001\001!\001\000\179@\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\145\160\160\000@\000@\160\160\001\030@\001\000\180@\145\160\160\000A\000A\160\160\001\030A\001\000\181@\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\145\160\160\000V\000V\160\160\001\030V\001\000\183@\145\160\160\000W\000W\160\160\001\030W\001\000\185@\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\030`\001\000\187@\145\160\160\000a\000a\160\160\001\030a\001\000\191@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\145\160\160\000j\000j\160\160\001\030j\001\000\215@\145\160\160\000k\000k\160\160\001\030k\001\000\247@\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\145\160\160\000t\000t\160\160\001\001t\001\000\208@\145\160\160\000u\000u\160\160\001\001u\001\000\240@\145\160\160\000v\000v\160\160\001\001v\001\000\222@\145\160\160\000w\000w\160\160\001\001w\001\000\254@\145\160\160\000x\000x\160\160\001\001x\001\000\175@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\145\160\160\001\000\128\001\000\128\160\160\001\030\128\001\000\168@\145\160\160\001\000\129\001\000\129\160\160\001\030\129\001\000\184@\145\160\160\001\000\130\001\000\130\160\160\001\030\130\001\000\170@\145\160\160\001\000\131\001\000\131\160\160\001\030\131\001\000\186@\145\160\160\001\000\132\001\000\132\160\160\001\030\132\001\000\189@\145\160\160\001\000\133\001\000\133\160\160\001\030\133\001\000\190@\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@@\144\160\001\000\163\001\000\163@@@\144\160\001\000\167\001\000\167@\144\160\001\000\169\001\000\169@@@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174@@@@@@@\144\160\001\000\182\001\000\182@@@@@@@@@\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\198\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203\144\160\001\000\204\001\000\204\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207@\144\160\001\000\209\001\000\209\144\160\001\000\210\001\000\210\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214@\144\160\001\000\216\001\000\216\144\160\001\000\217\001\000\217\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\144\160\001\000\220\001\000\220\144\160\001\000\221\001\000\221@\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239@\144\160\001\000\241\001\000\241\145\160\160\001\030\242\001\000\172\160\160\001\000\242\001\000\242@\145\160\160\001\030\243\001\000\188\160\160\001\000\243\001\000\243@\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246@\144\160\001\000\248\001\000\248\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252\144\160\001\000\253\001\000\253@\144\160\001\000\255\001\000\255" 0 : Netmappings.from_uni_list array);;
+ let iso885915_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\000\161\001\000\162\001\000\163\001 \172\001\000\165\001\001`\001\000\167\001\001a\001\000\169\001\000\170\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\001}\001\000\181\001\000\182\001\000\183\001\001~\001\000\185\001\000\186\001\000\187\001\001R\001\001S\001\001x\001\000\191\001\000\192\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\000\204\001\000\205\001\000\206\001\000\207\001\000\208\001\000\209\001\000\210\001\000\211\001\000\212\001\000\213\001\000\214\001\000\215\001\000\216\001\000\217\001\000\218\001\000\219\001\000\220\001\000\221\001\000\222\001\000\223\001\000\224\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\000\236\001\000\237\001\000\238\001\000\239\001\000\240\001\000\241\001\000\242\001\000\243\001\000\244\001\000\245\001\000\246\001\000\247\001\000\248\001\000\249\001\000\250\001\000\251\001\000\252\001\000\253\001\000\254\001\000\255" 0 : int array);;
+let iso885915_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\157\000\000\000\000\000\000\006!\000\000\006!\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\144\160[[\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\145\160\160\000R\000R\160\160\001\001R\001\000\188@\145\160\160\000S\000S\160\160\001\001S\001\000\189@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\001`\001\000\166@\145\160\160\000a\000a\160\160\001\001a\001\000\168@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\145\160\160\000x\000x\160\160\001\001x\001\000\190@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\145\160\160\000}\000}\160\160\001\001}\001\000\180@\145\160\160\000~\000~\160\160\001\001~\001\000\184@\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160\144\160\001\000\161\001\000\161\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163@\144\160\001\000\165\001\000\165@\144\160\001\000\167\001\000\167@\144\160\001\000\169\001\000\169\144\160\001\000\170\001\000\170\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\164\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\175\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179@\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183@\144\160\001\000\185\001\000\185\144\160\001\000\186\001\000\186\144\160\001\000\187\001\000\187@@@\144\160\001\000\191\001\000\191\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\198\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203\144\160\001\000\204\001\000\204\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207\144\160\001\000\208\001\000\208\144\160\001\000\209\001\000\209\144\160\001\000\210\001\000\210\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\216\144\160\001\000\217\001\000\217\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\144\160\001\000\220\001\000\220\144\160\001\000\221\001\000\221\144\160\001\000\222\001\000\222\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239\144\160\001\000\240\001\000\240\144\160\001\000\241\001\000\241\144\160\001\000\242\001\000\242\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\248\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252\144\160\001\000\253\001\000\253\144\160\001\000\254\001\000\254\144\160\001\000\255\001\000\255" 0 : Netmappings.from_uni_list array);;
+ let iso88592_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\001\004\001\002\216\001\001A\001\000\164\001\001=\001\001Z\001\000\167\001\000\168\001\001`\001\001^\001\001d\001\001y\001\000\173\001\001}\001\001{\001\000\176\001\001\005\001\002\219\001\001B\001\000\180\001\001>\001\001[\001\002\199\001\000\184\001\001a\001\001_\001\001e\001\001z\001\002\221\001\001~\001\001|\001\001T\001\000\193\001\000\194\001\001\002\001\000\196\001\0019\001\001\006\001\000\199\001\001\012\001\000\201\001\001\024\001\000\203\001\001\026\001\000\205\001\000\206\001\001\014\001\001\016\001\001C\001\001G\001\000\211\001\000\212\001\001P\001\000\214\001\000\215\001\001X\001\001n\001\000\218\001\001p\001\000\220\001\000\221\001\001b\001\000\223\001\001U\001\000\225\001\000\226\001\001\003\001\000\228\001\001:\001\001\007\001\000\231\001\001\013\001\000\233\001\001\025\001\000\235\001\001\027\001\000\237\001\000\238\001\001\015\001\001\017\001\001D\001\001H\001\000\243\001\000\244\001\001Q\001\000\246\001\000\247\001\001Y\001\001o\001\000\250\001\001q\001\000\252\001\000\253\001\001c\001\002\217" 0 : int array);;
+let iso88592_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007'\000\000\000\000\000\000\006\217\000\000\006\217\008\000\004\000\000\144\160@@\144\160AA\145\160\160BB\160\160\001\001\002\001\000\195@\145\160\160CC\160\160\001\001\003\001\000\227@\145\160\160DD\160\160\001\001\004\001\000\161@\145\160\160EE\160\160\001\001\005\001\000\177@\145\160\160FF\160\160\001\001\006\001\000\198@\145\160\160GG\160\160\001\001\007\001\000\230@\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\200@\145\160\160MM\160\160\001\001\013\001\000\232@\145\160\160NN\160\160\001\001\014\001\000\207@\145\160\160OO\160\160\001\001\015\001\000\239@\145\160\160PP\160\160\001\001\016\001\000\208@\145\160\160QQ\160\160\001\001\017\001\000\240@\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001\001\024\001\000\202@\145\160\160YY\160\160\001\001\025\001\000\234@\145\160\160ZZ\160\160\001\001\026\001\000\204@\145\160\160[[\160\160\001\001\027\001\000\236@\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001\0019\001\000\197@\145\160\160zz\160\160\001\001:\001\000\229@\144\160{{\144\160||\145\160\160}}\160\160\001\001=\001\000\165@\145\160\160~~\160\160\001\001>\001\000\181@\144\160\127\127\144\160\000@\000@\145\160\160\000A\000A\160\160\001\001A\001\000\163@\145\160\160\000B\000B\160\160\001\001B\001\000\179@\145\160\160\000C\000C\160\160\001\001C\001\000\209@\145\160\160\000D\000D\160\160\001\001D\001\000\241@\144\160\000E\000E\144\160\000F\000F\145\160\160\000G\000G\160\160\001\001G\001\000\210@\145\160\160\000H\000H\160\160\001\001H\001\000\242@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001\001P\001\000\213@\145\160\160\000Q\000Q\160\160\001\001Q\001\000\245@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001\001T\001\000\192@\145\160\160\000U\000U\160\160\001\001U\001\000\224@\144\160\000V\000V\144\160\000W\000W\145\160\160\000X\000X\160\160\001\001X\001\000\216@\145\160\160\000Y\000Y\160\160\001\001Y\001\000\248@\145\160\160\000Z\000Z\160\160\001\001Z\001\000\166@\145\160\160\000[\000[\160\160\001\001[\001\000\182@\144\160\000\\\000\\\144\160\000]\000]\145\160\160\000^\000^\160\160\001\001^\001\000\170@\145\160\160\000_\000_\160\160\001\001_\001\000\186@\145\160\160\000`\000`\160\160\001\001`\001\000\169@\145\160\160\000a\000a\160\160\001\001a\001\000\185@\145\160\160\000b\000b\160\160\001\001b\001\000\222@\145\160\160\000c\000c\160\160\001\001c\001\000\254@\145\160\160\000d\000d\160\160\001\001d\001\000\171@\145\160\160\000e\000e\160\160\001\001e\001\000\187@\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\145\160\160\000n\000n\160\160\001\001n\001\000\217@\145\160\160\000o\000o\160\160\001\001o\001\000\249@\145\160\160\000p\000p\160\160\001\001p\001\000\219@\145\160\160\000q\000q\160\160\001\001q\001\000\251@\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\172@\145\160\160\000z\000z\160\160\001\001z\001\000\188@\145\160\160\000{\000{\160\160\001\001{\001\000\175@\145\160\160\000|\000|\160\160\001\001|\001\000\191@\145\160\160\000}\000}\160\160\001\001}\001\000\174@\145\160\160\000~\000~\160\160\001\001~\001\000\190@\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@@@\144\160\001\000\164\001\000\164@@\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168@@@@\144\160\001\000\173\001\000\173@@\144\160\001\000\176\001\000\176@@@\144\160\001\000\180\001\000\180@@@\144\160\001\000\184\001\000\184@@@@@@@@\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194@\144\160\001\000\196\001\000\196@@\145\160\160\001\002\199\001\000\183\160\160\001\000\199\001\000\199@@\144\160\001\000\201\001\000\201@\144\160\001\000\203\001\000\203@\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206@@@@\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212@\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\002\216\001\000\162\144\160\001\002\217\001\000\255\144\160\001\000\218\001\000\218\144\160\001\002\219\001\000\178\144\160\001\000\220\001\000\220\145\160\160\001\002\221\001\000\189\160\160\001\000\221\001\000\221@@\144\160\001\000\223\001\000\223@\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226@\144\160\001\000\228\001\000\228@@\144\160\001\000\231\001\000\231@\144\160\001\000\233\001\000\233@\144\160\001\000\235\001\000\235@\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238@@@@\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244@\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247@@\144\160\001\000\250\001\000\250@\144\160\001\000\252\001\000\252\144\160\001\000\253\001\000\253@@" 0 : Netmappings.from_uni_list array);;
+ let iso88593_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002>\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\001&\001\002\216\001\000\163\001\000\164\000\255\001\001$\001\000\167\001\000\168\001\0010\001\001^\001\001\030\001\0014\001\000\173\000\255\001\001{\001\000\176\001\001'\001\000\178\001\000\179\001\000\180\001\000\181\001\001%\001\000\183\001\000\184\001\0011\001\001_\001\001\031\001\0015\001\000\189\000\255\001\001|\001\000\192\001\000\193\001\000\194\000\255\001\000\196\001\001\n\001\001\008\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\000\204\001\000\205\001\000\206\001\000\207\000\255\001\000\209\001\000\210\001\000\211\001\000\212\001\001 \001\000\214\001\000\215\001\001\028\001\000\217\001\000\218\001\000\219\001\000\220\001\001l\001\001\\\001\000\223\001\000\224\001\000\225\001\000\226\000\255\001\000\228\001\001\011\001\001\t\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\000\236\001\000\237\001\000\238\001\000\239\000\255\001\000\241\001\000\242\001\000\243\001\000\244\001\001!\001\000\246\001\000\247\001\001\029\001\000\249\001\000\250\001\000\251\001\000\252\001\001m\001\001]\001\002\217" 0 : int array);;
+let iso88593_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\165\000\000\000\000\000\000\006J\000\000\006J\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\145\160\160HH\160\160\001\001\008\001\000\198@\145\160\160II\160\160\001\001\t\001\000\230@\145\160\160JJ\160\160\001\001\n\001\000\197@\145\160\160KK\160\160\001\001\011\001\000\229@\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001\001\028\001\000\216@\145\160\160]]\160\160\001\001\029\001\000\248@\145\160\160^^\160\160\001\001\030\001\000\171@\145\160\160__\160\160\001\001\031\001\000\187@\145\160\160``\160\160\001\001 \001\000\213@\145\160\160aa\160\160\001\001!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001\001$\001\000\166@\145\160\160ee\160\160\001\001%\001\000\182@\145\160\160ff\160\160\001\001&\001\000\161@\145\160\160gg\160\160\001\001'\001\000\177@\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001\0010\001\000\169@\145\160\160qq\160\160\001\0011\001\000\185@\144\160rr\144\160ss\145\160\160tt\160\160\001\0014\001\000\172@\145\160\160uu\160\160\001\0015\001\000\188@\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\145\160\160\000\\\000\\\160\160\001\001\\\001\000\222@\145\160\160\000]\000]\160\160\001\001]\001\000\254@\145\160\160\000^\000^\160\160\001\001^\001\000\170@\145\160\160\000_\000_\160\160\001\001_\001\000\186@\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001\001l\001\000\221@\145\160\160\000m\000m\160\160\001\001m\001\000\253@\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\145\160\160\000{\000{\160\160\001\001{\001\000\175@\145\160\160\000|\000|\160\160\001\001|\001\000\191@\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@@\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164@@\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168@@@@\144\160\001\000\173\001\000\173@@\144\160\001\000\176\001\000\176@\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181@\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184@@@@\144\160\001\000\189\001\000\189@@\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194@\144\160\001\000\196\001\000\196@@\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203\144\160\001\000\204\001\000\204\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207@\144\160\001\000\209\001\000\209\144\160\001\000\210\001\000\210\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212@\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\002\216\001\000\162\145\160\160\001\000\217\001\000\217\160\160\001\002\217\001\000\255@\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\144\160\001\000\220\001\000\220@@\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226@\144\160\001\000\228\001\000\228@@\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239@\144\160\001\000\241\001\000\241\144\160\001\000\242\001\000\242\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244@\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247@\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252@@@" 0 : Netmappings.from_uni_list array);;
+ let iso88594_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\001\004\001\0018\001\001V\001\000\164\001\001(\001\001;\001\000\167\001\000\168\001\001`\001\001\018\001\001\"\001\001f\001\000\173\001\001}\001\000\175\001\000\176\001\001\005\001\002\219\001\001W\001\000\180\001\001)\001\001<\001\002\199\001\000\184\001\001a\001\001\019\001\001#\001\001g\001\001J\001\001~\001\001K\001\001\000\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\001.\001\001\012\001\000\201\001\001\024\001\000\203\001\001\022\001\000\205\001\000\206\001\001*\001\001\016\001\001E\001\001L\001\0016\001\000\212\001\000\213\001\000\214\001\000\215\001\000\216\001\001r\001\000\218\001\000\219\001\000\220\001\001h\001\001j\001\000\223\001\001\001\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\001/\001\001\013\001\000\233\001\001\025\001\000\235\001\001\023\001\000\237\001\000\238\001\001+\001\001\017\001\001F\001\001M\001\0017\001\000\244\001\000\245\001\000\246\001\000\247\001\000\248\001\001s\001\000\250\001\000\251\001\000\252\001\001i\001\001k\001\002\217" 0 : int array);;
+let iso88594_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\021\000\000\000\000\000\000\006\193\000\000\006\193\008\000\004\000\000\145\160\160@@\160\160\001\001\000\001\000\192@\145\160\160AA\160\160\001\001\001\001\000\224@\144\160BB\144\160CC\145\160\160DD\160\160\001\001\004\001\000\161@\145\160\160EE\160\160\001\001\005\001\000\177@\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\200@\145\160\160MM\160\160\001\001\013\001\000\232@\144\160NN\144\160OO\145\160\160PP\160\160\001\001\016\001\000\208@\145\160\160QQ\160\160\001\001\017\001\000\240@\145\160\160RR\160\160\001\001\018\001\000\170@\145\160\160SS\160\160\001\001\019\001\000\186@\144\160TT\144\160UU\145\160\160VV\160\160\001\001\022\001\000\204@\145\160\160WW\160\160\001\001\023\001\000\236@\145\160\160XX\160\160\001\001\024\001\000\202@\145\160\160YY\160\160\001\001\025\001\000\234@\144\160ZZ\144\160[[\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\145\160\160bb\160\160\001\001\"\001\000\171@\145\160\160cc\160\160\001\001#\001\000\187@\144\160dd\144\160ee\144\160ff\144\160gg\145\160\160hh\160\160\001\001(\001\000\165@\145\160\160ii\160\160\001\001)\001\000\181@\145\160\160jj\160\160\001\001*\001\000\207@\145\160\160kk\160\160\001\001+\001\000\239@\144\160ll\144\160mm\145\160\160nn\160\160\001\001.\001\000\199@\145\160\160oo\160\160\001\001/\001\000\231@\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\145\160\160vv\160\160\001\0016\001\000\211@\145\160\160ww\160\160\001\0017\001\000\243@\145\160\160xx\160\160\001\0018\001\000\162@\144\160yy\144\160zz\145\160\160{{\160\160\001\001;\001\000\166@\145\160\160||\160\160\001\001<\001\000\182@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\145\160\160\000E\000E\160\160\001\001E\001\000\209@\145\160\160\000F\000F\160\160\001\001F\001\000\241@\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\145\160\160\000J\000J\160\160\001\001J\001\000\189@\145\160\160\000K\000K\160\160\001\001K\001\000\191@\145\160\160\000L\000L\160\160\001\001L\001\000\210@\145\160\160\000M\000M\160\160\001\001M\001\000\242@\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\145\160\160\000V\000V\160\160\001\001V\001\000\163@\145\160\160\000W\000W\160\160\001\001W\001\000\179@\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\001`\001\000\169@\145\160\160\000a\000a\160\160\001\001a\001\000\185@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001\001f\001\000\172@\145\160\160\000g\000g\160\160\001\001g\001\000\188@\145\160\160\000h\000h\160\160\001\001h\001\000\221@\145\160\160\000i\000i\160\160\001\001i\001\000\253@\145\160\160\000j\000j\160\160\001\001j\001\000\222@\145\160\160\000k\000k\160\160\001\001k\001\000\254@\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\145\160\160\000r\000r\160\160\001\001r\001\000\217@\145\160\160\000s\000s\160\160\001\001s\001\000\249@\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\145\160\160\000}\000}\160\160\001\001}\001\000\174@\145\160\160\000~\000~\160\160\001\001~\001\000\190@\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@@@\144\160\001\000\164\001\000\164@@\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168@@@@\144\160\001\000\173\001\000\173@\144\160\001\000\175\001\000\175\144\160\001\000\176\001\000\176@@@\144\160\001\000\180\001\000\180@@@\144\160\001\000\184\001\000\184@@@@@@@@\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\198\144\160\001\002\199\001\000\183@\144\160\001\000\201\001\000\201@\144\160\001\000\203\001\000\203@\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206@@@@@\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\216\144\160\001\002\217\001\000\255\144\160\001\000\218\001\000\218\145\160\160\001\002\219\001\000\178\160\160\001\000\219\001\000\219@\144\160\001\000\220\001\000\220@@\144\160\001\000\223\001\000\223@\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230@@\144\160\001\000\233\001\000\233@\144\160\001\000\235\001\000\235@\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238@@@@@\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\248@\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252@@@" 0 : Netmappings.from_uni_list array);;
+ let iso88595_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\004\001\001\004\002\001\004\003\001\004\004\001\004\005\001\004\006\001\004\007\001\004\008\001\004\t\001\004\n\001\004\011\001\004\012\001\000\173\001\004\014\001\004\015\001\004\016\001\004\017\001\004\018\001\004\019\001\004\020\001\004\021\001\004\022\001\004\023\001\004\024\001\004\025\001\004\026\001\004\027\001\004\028\001\004\029\001\004\030\001\004\031\001\004 \001\004!\001\004\"\001\004#\001\004$\001\004%\001\004&\001\004'\001\004(\001\004)\001\004*\001\004+\001\004,\001\004-\001\004.\001\004/\001\0040\001\0041\001\0042\001\0043\001\0044\001\0045\001\0046\001\0047\001\0048\001\0049\001\004:\001\004;\001\004<\001\004=\001\004>\001\004?\001\004@\001\004A\001\004B\001\004C\001\004D\001\004E\001\004F\001\004G\001\004H\001\004I\001\004J\001\004K\001\004L\001\004M\001\004N\001\004O\001!\022\001\004Q\001\004R\001\004S\001\004T\001\004U\001\004V\001\004W\001\004X\001\004Y\001\004Z\001\004[\001\004\\\001\000\167\001\004^\001\004_" 0 : int array);;
+let iso88595_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\154\000\000\000\000\000\000\007r\000\000\007r\008\000\004\000\000\144\160@@\145\160\160AA\160\160\001\004\001\001\000\161@\145\160\160BB\160\160\001\004\002\001\000\162@\145\160\160CC\160\160\001\004\003\001\000\163@\145\160\160DD\160\160\001\004\004\001\000\164@\145\160\160EE\160\160\001\004\005\001\000\165@\145\160\160FF\160\160\001\004\006\001\000\166@\145\160\160GG\160\160\001\004\007\001\000\167@\145\160\160HH\160\160\001\004\008\001\000\168@\145\160\160II\160\160\001\004\t\001\000\169@\145\160\160JJ\160\160\001\004\n\001\000\170@\145\160\160KK\160\160\001\004\011\001\000\171@\145\160\160LL\160\160\001\004\012\001\000\172@\144\160MM\145\160\160NN\160\160\001\004\014\001\000\174@\145\160\160OO\160\160\001\004\015\001\000\175@\145\160\160PP\160\160\001\004\016\001\000\176@\145\160\160QQ\160\160\001\004\017\001\000\177@\145\160\160RR\160\160\001\004\018\001\000\178@\145\160\160SS\160\160\001\004\019\001\000\179@\145\160\160TT\160\160\001\004\020\001\000\180@\145\160\160UU\160\160\001\004\021\001\000\181@\145\160\160VV\160\160\001\004\022\001\000\182\160\160\001!\022\001\000\240@\145\160\160WW\160\160\001\004\023\001\000\183@\145\160\160XX\160\160\001\004\024\001\000\184@\145\160\160YY\160\160\001\004\025\001\000\185@\145\160\160ZZ\160\160\001\004\026\001\000\186@\145\160\160[[\160\160\001\004\027\001\000\187@\145\160\160\\\\\160\160\001\004\028\001\000\188@\145\160\160]]\160\160\001\004\029\001\000\189@\145\160\160^^\160\160\001\004\030\001\000\190@\145\160\160__\160\160\001\004\031\001\000\191@\145\160\160``\160\160\001\004 \001\000\192@\145\160\160aa\160\160\001\004!\001\000\193@\145\160\160bb\160\160\001\004\"\001\000\194@\145\160\160cc\160\160\001\004#\001\000\195@\145\160\160dd\160\160\001\004$\001\000\196@\145\160\160ee\160\160\001\004%\001\000\197@\145\160\160ff\160\160\001\004&\001\000\198@\145\160\160gg\160\160\001\004'\001\000\199@\145\160\160hh\160\160\001\004(\001\000\200@\145\160\160ii\160\160\001\004)\001\000\201@\145\160\160jj\160\160\001\004*\001\000\202@\145\160\160kk\160\160\001\004+\001\000\203@\145\160\160ll\160\160\001\004,\001\000\204@\145\160\160mm\160\160\001\004-\001\000\205@\145\160\160nn\160\160\001\004.\001\000\206@\145\160\160oo\160\160\001\004/\001\000\207@\145\160\160pp\160\160\001\0040\001\000\208@\145\160\160qq\160\160\001\0041\001\000\209@\145\160\160rr\160\160\001\0042\001\000\210@\145\160\160ss\160\160\001\0043\001\000\211@\145\160\160tt\160\160\001\0044\001\000\212@\145\160\160uu\160\160\001\0045\001\000\213@\145\160\160vv\160\160\001\0046\001\000\214@\145\160\160ww\160\160\001\0047\001\000\215@\145\160\160xx\160\160\001\0048\001\000\216@\145\160\160yy\160\160\001\0049\001\000\217@\145\160\160zz\160\160\001\004:\001\000\218@\145\160\160{{\160\160\001\004;\001\000\219@\145\160\160||\160\160\001\004<\001\000\220@\145\160\160}}\160\160\001\004=\001\000\221@\145\160\160~~\160\160\001\004>\001\000\222@\145\160\160\127\127\160\160\001\004?\001\000\223@\145\160\160\000@\000@\160\160\001\004@\001\000\224@\145\160\160\000A\000A\160\160\001\004A\001\000\225@\145\160\160\000B\000B\160\160\001\004B\001\000\226@\145\160\160\000C\000C\160\160\001\004C\001\000\227@\145\160\160\000D\000D\160\160\001\004D\001\000\228@\145\160\160\000E\000E\160\160\001\004E\001\000\229@\145\160\160\000F\000F\160\160\001\004F\001\000\230@\145\160\160\000G\000G\160\160\001\004G\001\000\231@\145\160\160\000H\000H\160\160\001\004H\001\000\232@\145\160\160\000I\000I\160\160\001\004I\001\000\233@\145\160\160\000J\000J\160\160\001\004J\001\000\234@\145\160\160\000K\000K\160\160\001\004K\001\000\235@\145\160\160\000L\000L\160\160\001\004L\001\000\236@\145\160\160\000M\000M\160\160\001\004M\001\000\237@\145\160\160\000N\000N\160\160\001\004N\001\000\238@\145\160\160\000O\000O\160\160\001\004O\001\000\239@\144\160\000P\000P\145\160\160\000Q\000Q\160\160\001\004Q\001\000\241@\145\160\160\000R\000R\160\160\001\004R\001\000\242@\145\160\160\000S\000S\160\160\001\004S\001\000\243@\145\160\160\000T\000T\160\160\001\004T\001\000\244@\145\160\160\000U\000U\160\160\001\004U\001\000\245@\145\160\160\000V\000V\160\160\001\004V\001\000\246@\145\160\160\000W\000W\160\160\001\004W\001\000\247@\145\160\160\000X\000X\160\160\001\004X\001\000\248@\145\160\160\000Y\000Y\160\160\001\004Y\001\000\249@\145\160\160\000Z\000Z\160\160\001\004Z\001\000\250@\145\160\160\000[\000[\160\160\001\004[\001\000\251@\145\160\160\000\\\000\\\160\160\001\004\\\001\000\252@\144\160\000]\000]\145\160\160\000^\000^\160\160\001\004^\001\000\254@\145\160\160\000_\000_\160\160\001\004_\001\000\255@\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@@@@@@\144\160\001\000\167\001\000\253@@@@@\144\160\001\000\173\001\000\173@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let iso88596_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\024\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\000\255\000\255\000\255\001\000\164\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\006\012\001\000\173\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\006\027\000\255\000\255\000\255\001\006\031\000\255\001\006!\001\006\"\001\006#\001\006$\001\006%\001\006&\001\006'\001\006(\001\006)\001\006*\001\006+\001\006,\001\006-\001\006.\001\006/\001\0060\001\0061\001\0062\001\0063\001\0064\001\0065\001\0066\001\0067\001\0068\001\0069\001\006:\000\255\000\255\000\255\000\255\000\255\001\006@\001\006A\001\006B\001\006C\001\006D\001\006E\001\006F\001\006G\001\006H\001\006I\001\006J\001\006K\001\006L\001\006M\001\006N\001\006O\001\006P\001\006Q\001\006R\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255" 0 : int array);;
+let iso88596_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\005\218\000\000\000\000\000\000\005\224\000\000\005\224\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\006\012\001\000\172@\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\145\160\160[[\160\160\001\006\027\001\000\187@\144\160\\\\\144\160]]\144\160^^\145\160\160__\160\160\001\006\031\001\000\191@\144\160``\145\160\160aa\160\160\001\006!\001\000\193@\145\160\160bb\160\160\001\006\"\001\000\194@\145\160\160cc\160\160\001\006#\001\000\195@\145\160\160dd\160\160\001\006$\001\000\196@\145\160\160ee\160\160\001\006%\001\000\197@\145\160\160ff\160\160\001\006&\001\000\198@\145\160\160gg\160\160\001\006'\001\000\199@\145\160\160hh\160\160\001\006(\001\000\200@\145\160\160ii\160\160\001\006)\001\000\201@\145\160\160jj\160\160\001\006*\001\000\202@\145\160\160kk\160\160\001\006+\001\000\203@\145\160\160ll\160\160\001\006,\001\000\204@\145\160\160mm\160\160\001\006-\001\000\205@\145\160\160nn\160\160\001\006.\001\000\206@\145\160\160oo\160\160\001\006/\001\000\207@\145\160\160pp\160\160\001\0060\001\000\208@\145\160\160qq\160\160\001\0061\001\000\209@\145\160\160rr\160\160\001\0062\001\000\210@\145\160\160ss\160\160\001\0063\001\000\211@\145\160\160tt\160\160\001\0064\001\000\212@\145\160\160uu\160\160\001\0065\001\000\213@\145\160\160vv\160\160\001\0066\001\000\214@\145\160\160ww\160\160\001\0067\001\000\215@\145\160\160xx\160\160\001\0068\001\000\216@\145\160\160yy\160\160\001\0069\001\000\217@\145\160\160zz\160\160\001\006:\001\000\218@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\145\160\160\000@\000@\160\160\001\006@\001\000\224@\145\160\160\000A\000A\160\160\001\006A\001\000\225@\145\160\160\000B\000B\160\160\001\006B\001\000\226@\145\160\160\000C\000C\160\160\001\006C\001\000\227@\145\160\160\000D\000D\160\160\001\006D\001\000\228@\145\160\160\000E\000E\160\160\001\006E\001\000\229@\145\160\160\000F\000F\160\160\001\006F\001\000\230@\145\160\160\000G\000G\160\160\001\006G\001\000\231@\145\160\160\000H\000H\160\160\001\006H\001\000\232@\145\160\160\000I\000I\160\160\001\006I\001\000\233@\145\160\160\000J\000J\160\160\001\006J\001\000\234@\145\160\160\000K\000K\160\160\001\006K\001\000\235@\145\160\160\000L\000L\160\160\001\006L\001\000\236@\145\160\160\000M\000M\160\160\001\006M\001\000\237@\145\160\160\000N\000N\160\160\001\006N\001\000\238@\145\160\160\000O\000O\160\160\001\006O\001\000\239@\145\160\160\000P\000P\160\160\001\006P\001\000\240@\145\160\160\000Q\000Q\160\160\001\006Q\001\000\241@\145\160\160\000R\000R\160\160\001\006R\001\000\242@\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@@@\144\160\001\000\164\001\000\164@@@@@@@@\144\160\001\000\173\001\000\173@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let iso88597_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002?\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001 \024\001 \025\001\000\163\000\255\000\255\001\000\166\001\000\167\001\000\168\001\000\169\000\255\001\000\171\001\000\172\001\000\173\000\255\001 \021\001\000\176\001\000\177\001\000\178\001\000\179\001\003\132\001\003\133\001\003\134\001\000\183\001\003\136\001\003\137\001\003\138\001\000\187\001\003\140\001\000\189\001\003\142\001\003\143\001\003\144\001\003\145\001\003\146\001\003\147\001\003\148\001\003\149\001\003\150\001\003\151\001\003\152\001\003\153\001\003\154\001\003\155\001\003\156\001\003\157\001\003\158\001\003\159\001\003\160\001\003\161\000\255\001\003\163\001\003\164\001\003\165\001\003\166\001\003\167\001\003\168\001\003\169\001\003\170\001\003\171\001\003\172\001\003\173\001\003\174\001\003\175\001\003\176\001\003\177\001\003\178\001\003\179\001\003\180\001\003\181\001\003\182\001\003\183\001\003\184\001\003\185\001\003\186\001\003\187\001\003\188\001\003\189\001\003\190\001\003\191\001\003\192\001\003\193\001\003\194\001\003\195\001\003\196\001\003\197\001\003\198\001\003\199\001\003\200\001\003\201\001\003\202\001\003\203\001\003\204\001\003\205\001\003\206\000\255" 0 : int array);;
+let iso88597_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\223\000\000\000\000\000\000\006\147\000\000\006\147\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\145\160\160UU\160\160\001 \021\001\000\175@\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\161@\145\160\160YY\160\160\001 \025\001\000\162@\144\160ZZ\144\160[[\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\145\160\160\001\000\132\001\000\132\160\160\001\003\132\001\000\180@\145\160\160\001\000\133\001\000\133\160\160\001\003\133\001\000\181@\145\160\160\001\000\134\001\000\134\160\160\001\003\134\001\000\182@\144\160\001\000\135\001\000\135\145\160\160\001\000\136\001\000\136\160\160\001\003\136\001\000\184@\145\160\160\001\000\137\001\000\137\160\160\001\003\137\001\000\185@\145\160\160\001\000\138\001\000\138\160\160\001\003\138\001\000\186@\144\160\001\000\139\001\000\139\145\160\160\001\000\140\001\000\140\160\160\001\003\140\001\000\188@\144\160\001\000\141\001\000\141\145\160\160\001\000\142\001\000\142\160\160\001\003\142\001\000\190@\145\160\160\001\000\143\001\000\143\160\160\001\003\143\001\000\191@\145\160\160\001\000\144\001\000\144\160\160\001\003\144\001\000\192@\145\160\160\001\000\145\001\000\145\160\160\001\003\145\001\000\193@\145\160\160\001\000\146\001\000\146\160\160\001\003\146\001\000\194@\145\160\160\001\000\147\001\000\147\160\160\001\003\147\001\000\195@\145\160\160\001\000\148\001\000\148\160\160\001\003\148\001\000\196@\145\160\160\001\000\149\001\000\149\160\160\001\003\149\001\000\197@\145\160\160\001\000\150\001\000\150\160\160\001\003\150\001\000\198@\145\160\160\001\000\151\001\000\151\160\160\001\003\151\001\000\199@\145\160\160\001\000\152\001\000\152\160\160\001\003\152\001\000\200@\145\160\160\001\000\153\001\000\153\160\160\001\003\153\001\000\201@\145\160\160\001\000\154\001\000\154\160\160\001\003\154\001\000\202@\145\160\160\001\000\155\001\000\155\160\160\001\003\155\001\000\203@\145\160\160\001\000\156\001\000\156\160\160\001\003\156\001\000\204@\145\160\160\001\000\157\001\000\157\160\160\001\003\157\001\000\205@\145\160\160\001\000\158\001\000\158\160\160\001\003\158\001\000\206@\145\160\160\001\000\159\001\000\159\160\160\001\003\159\001\000\207@\145\160\160\001\000\160\001\000\160\160\160\001\003\160\001\000\208@\144\160\001\003\161\001\000\209@\145\160\160\001\000\163\001\000\163\160\160\001\003\163\001\000\211@\144\160\001\003\164\001\000\212\144\160\001\003\165\001\000\213\145\160\160\001\000\166\001\000\166\160\160\001\003\166\001\000\214@\145\160\160\001\000\167\001\000\167\160\160\001\003\167\001\000\215@\145\160\160\001\000\168\001\000\168\160\160\001\003\168\001\000\216@\145\160\160\001\000\169\001\000\169\160\160\001\003\169\001\000\217@\144\160\001\003\170\001\000\218\145\160\160\001\000\171\001\000\171\160\160\001\003\171\001\000\219@\145\160\160\001\000\172\001\000\172\160\160\001\003\172\001\000\220@\145\160\160\001\000\173\001\000\173\160\160\001\003\173\001\000\221@\144\160\001\003\174\001\000\222\144\160\001\003\175\001\000\223\145\160\160\001\000\176\001\000\176\160\160\001\003\176\001\000\224@\145\160\160\001\000\177\001\000\177\160\160\001\003\177\001\000\225@\145\160\160\001\000\178\001\000\178\160\160\001\003\178\001\000\226@\145\160\160\001\000\179\001\000\179\160\160\001\003\179\001\000\227@\144\160\001\003\180\001\000\228\144\160\001\003\181\001\000\229\144\160\001\003\182\001\000\230\145\160\160\001\000\183\001\000\183\160\160\001\003\183\001\000\231@\144\160\001\003\184\001\000\232\144\160\001\003\185\001\000\233\144\160\001\003\186\001\000\234\145\160\160\001\000\187\001\000\187\160\160\001\003\187\001\000\235@\144\160\001\003\188\001\000\236\145\160\160\001\000\189\001\000\189\160\160\001\003\189\001\000\237@\144\160\001\003\190\001\000\238\144\160\001\003\191\001\000\239\144\160\001\003\192\001\000\240\144\160\001\003\193\001\000\241\144\160\001\003\194\001\000\242\144\160\001\003\195\001\000\243\144\160\001\003\196\001\000\244\144\160\001\003\197\001\000\245\144\160\001\003\198\001\000\246\144\160\001\003\199\001\000\247\144\160\001\003\200\001\000\248\144\160\001\003\201\001\000\249\144\160\001\003\202\001\000\250\144\160\001\003\203\001\000\251\144\160\001\003\204\001\000\252\144\160\001\003\205\001\000\253\144\160\001\003\206\001\000\254@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let iso88598_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002!\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\000\255\001\000\162\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\000\215\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\000\247\001\000\187\001\000\188\001\000\189\001\000\190\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001 \023\001\005\208\001\005\209\001\005\210\001\005\211\001\005\212\001\005\213\001\005\214\001\005\215\001\005\216\001\005\217\001\005\218\001\005\219\001\005\220\001\005\221\001\005\222\001\005\223\001\005\224\001\005\225\001\005\226\001\005\227\001\005\228\001\005\229\001\005\230\001\005\231\001\005\232\001\005\233\001\005\234\000\255\000\255\001 \014\001 \015\000\255" 0 : int array);;
+let iso88598_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\005\149\000\000\000\000\000\000\005]\000\000\005]\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\145\160\160NN\160\160\001 \014\001\000\253@\145\160\160OO\160\160\001 \015\001\000\254@\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\145\160\160WW\160\160\001 \023\001\000\223@\144\160XX\144\160YY\144\160ZZ\144\160[[\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169@\144\160\001\000\171\001\000\171\144\160\001\000\172\001\000\172\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\175\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184\144\160\001\000\185\001\000\185@\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190@@@@@@@@@@@@@@@@@\144\160\001\005\208\001\000\224\144\160\001\005\209\001\000\225\144\160\001\005\210\001\000\226\144\160\001\005\211\001\000\227\144\160\001\005\212\001\000\228\144\160\001\005\213\001\000\229\144\160\001\005\214\001\000\230\145\160\160\001\000\215\001\000\170\160\160\001\005\215\001\000\231@\144\160\001\005\216\001\000\232\144\160\001\005\217\001\000\233\144\160\001\005\218\001\000\234\144\160\001\005\219\001\000\235\144\160\001\005\220\001\000\236\144\160\001\005\221\001\000\237\144\160\001\005\222\001\000\238\144\160\001\005\223\001\000\239\144\160\001\005\224\001\000\240\144\160\001\005\225\001\000\241\144\160\001\005\226\001\000\242\144\160\001\005\227\001\000\243\144\160\001\005\228\001\000\244\144\160\001\005\229\001\000\245\144\160\001\005\230\001\000\246\144\160\001\005\231\001\000\247\144\160\001\005\232\001\000\248\144\160\001\005\233\001\000\249\144\160\001\005\234\001\000\250@@@@@@@@@@@@\144\160\001\000\247\001\000\186@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let iso88599_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\000\161\001\000\162\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\000\170\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\000\186\001\000\187\001\000\188\001\000\189\001\000\190\001\000\191\001\000\192\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\000\204\001\000\205\001\000\206\001\000\207\001\001\030\001\000\209\001\000\210\001\000\211\001\000\212\001\000\213\001\000\214\001\000\215\001\000\216\001\000\217\001\000\218\001\000\219\001\000\220\001\0010\001\001^\001\000\223\001\000\224\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\000\236\001\000\237\001\000\238\001\000\239\001\001\031\001\000\241\001\000\242\001\000\243\001\000\244\001\000\245\001\000\246\001\000\247\001\000\248\001\000\249\001\000\250\001\000\251\001\000\252\001\0011\001\001_\001\000\255" 0 : int array);;
+let iso88599_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\151\000\000\000\000\000\000\006\025\000\000\006\025\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\144\160[[\144\160\\\\\144\160]]\145\160\160^^\160\160\001\001\030\001\000\208@\145\160\160__\160\160\001\001\031\001\000\240@\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001\0010\001\000\221@\145\160\160qq\160\160\001\0011\001\000\253@\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\145\160\160\000^\000^\160\160\001\001^\001\000\222@\145\160\160\000_\000_\160\160\001\001_\001\000\254@\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160\144\160\001\000\161\001\000\161\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169\144\160\001\000\170\001\000\170\144\160\001\000\171\001\000\171\144\160\001\000\172\001\000\172\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\175\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184\144\160\001\000\185\001\000\185\144\160\001\000\186\001\000\186\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190\144\160\001\000\191\001\000\191\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\198\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203\144\160\001\000\204\001\000\204\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207@\144\160\001\000\209\001\000\209\144\160\001\000\210\001\000\210\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\216\144\160\001\000\217\001\000\217\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\144\160\001\000\220\001\000\220@@\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239@\144\160\001\000\241\001\000\241\144\160\001\000\242\001\000\242\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\248\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252@@\144\160\001\000\255\001\000\255" 0 : Netmappings.from_uni_list array);;
+ Hashtbl.add Netmappings.to_unicode `Enc_iso88599 iso88599_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88599 iso88599_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88598 iso88598_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88598 iso88598_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88597 iso88597_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88597 iso88597_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88596 iso88596_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88596 iso88596_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88595 iso88595_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88595 iso88595_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88594 iso88594_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88594 iso88594_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88593 iso88593_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88593 iso88593_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88592 iso88592_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88592 iso88592_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso885915 iso885915_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso885915 iso885915_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso885914 iso885914_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso885914 iso885914_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso885913 iso885913_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso885913 iso885913_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso885910 iso885910_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso885910 iso885910_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88591 iso88591_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88591 iso88591_from_unicode;
+();;
diff --git a/helm/DEVEL/pxp/netstring/netmappings_other.ml b/helm/DEVEL/pxp/netstring/netmappings_other.ml
new file mode 100644
index 000000000..57fcb485d
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/netmappings_other.ml
@@ -0,0 +1,154 @@
+(* WARNING! This is a generated file! *)
+let cp037_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABC\001\000\156I\001\000\134\000\127\001\000\151\001\000\141\001\000\142KLMNOPQRS\001\000\157\001\000\133H\001\000\135XY\001\000\146\001\000\143\\]^_\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132JW[\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140EFG\001\000\144\001\000\145V\001\000\147\001\000\148\001\000\149\001\000\150D\001\000\152\001\000\153\001\000\154\001\000\155TU\001\000\158Z`\001\000\160\001\000\226\001\000\228\001\000\224\001\000\225\001\000\227\001\000\229\001\000\231\001\000\241\001\000\162n|hk\000|f\001\000\233\001\000\234\001\000\235\001\000\232\001\000\237\001\000\238\001\000\239\001\000\236\001\000\223adji{\001\000\172mo\001\000\194\001\000\196\001\000\192\001\000\193\001\000\195\001\000\197\001\000\199\001\000\209\001\000\166le\000_~\127\001\000\248\001\000\201\001\000\202\001\000\203\001\000\200\001\000\205\001\000\206\001\000\207\001\000\204\000`zc\000@g}b\001\000\216\000a\000b\000c\000d\000e\000f\000g\000h\000i\001\000\171\001\000\187\001\000\240\001\000\253\001\000\254\001\000\177\001\000\176\000j\000k\000l\000m\000n\000o\000p\000q\000r\001\000\170\001\000\186\001\000\230\001\000\184\001\000\198\001\000\164\001\000\181\000~\000s\000t\000u\000v\000w\000x\000y\000z\001\000\161\001\000\191\001\000\208\001\000\221\001\000\222\001\000\174\000^\001\000\163\001\000\165\001\000\183\001\000\169\001\000\167\001\000\182\001\000\188\001\000\189\001\000\190\000[\000]\001\000\175\001\000\168\001\000\180\001\000\215\000{\000A\000B\000C\000D\000E\000F\000G\000H\000I\001\000\173\001\000\244\001\000\246\001\000\242\001\000\243\001\000\245\000}\000J\000K\000L\000M\000N\000O\000P\000Q\000R\001\000\185\001\000\251\001\000\252\001\000\249\001\000\250\001\000\255\000\\\001\000\247\000S\000T\000U\000V\000W\000X\000Y\000Z\001\000\178\001\000\212\001\000\214\001\000\210\001\000\211\001\000\213pqrstuvwxy\001\000\179\001\000\219\001\000\220\001\000\217\001\000\218\001\000\159" 0 : int array);;
+let cp037_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\133\000\000\000\000\000\000\006\001\000\000\006\001\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160Dw\144\160Em\144\160Fn\144\160Go\144\160HV\144\160IE\144\160Je\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160T|\144\160U}\144\160Vr\144\160Wf\144\160XX\144\160YY\144\160Z\127\144\160[g\144\160\\\\\144\160]]\144\160^^\144\160__\144\160`\000@\144\160a\000Z\144\160b\000\127\144\160c\000{\144\160d\000[\144\160e\000l\144\160f\000P\144\160g\000}\144\160h\000M\144\160i\000]\144\160j\000\\\144\160k\000N\144\160l\000k\144\160m\000`\144\160n\000K\144\160o\000a\144\160p\001\000\240\144\160q\001\000\241\144\160r\001\000\242\144\160s\001\000\243\144\160t\001\000\244\144\160u\001\000\245\144\160v\001\000\246\144\160w\001\000\247\144\160x\001\000\248\144\160y\001\000\249\144\160z\000z\144\160{\000^\144\160|\000L\144\160}\000~\144\160~\000n\144\160\127\000o\144\160\000@\000|\144\160\000A\001\000\193\144\160\000B\001\000\194\144\160\000C\001\000\195\144\160\000D\001\000\196\144\160\000E\001\000\197\144\160\000F\001\000\198\144\160\000G\001\000\199\144\160\000H\001\000\200\144\160\000I\001\000\201\144\160\000J\001\000\209\144\160\000K\001\000\210\144\160\000L\001\000\211\144\160\000M\001\000\212\144\160\000N\001\000\213\144\160\000O\001\000\214\144\160\000P\001\000\215\144\160\000Q\001\000\216\144\160\000R\001\000\217\144\160\000S\001\000\226\144\160\000T\001\000\227\144\160\000U\001\000\228\144\160\000V\001\000\229\144\160\000W\001\000\230\144\160\000X\001\000\231\144\160\000Y\001\000\232\144\160\000Z\001\000\233\144\160\000[\001\000\186\144\160\000\\\001\000\224\144\160\000]\001\000\187\144\160\000^\001\000\176\144\160\000_\000m\144\160\000`\000y\144\160\000a\001\000\129\144\160\000b\001\000\130\144\160\000c\001\000\131\144\160\000d\001\000\132\144\160\000e\001\000\133\144\160\000f\001\000\134\144\160\000g\001\000\135\144\160\000h\001\000\136\144\160\000i\001\000\137\144\160\000j\001\000\145\144\160\000k\001\000\146\144\160\000l\001\000\147\144\160\000m\001\000\148\144\160\000n\001\000\149\144\160\000o\001\000\150\144\160\000p\001\000\151\144\160\000q\001\000\152\144\160\000r\001\000\153\144\160\000s\001\000\162\144\160\000t\001\000\163\144\160\000u\001\000\164\144\160\000v\001\000\165\144\160\000w\001\000\166\144\160\000x\001\000\167\144\160\000y\001\000\168\144\160\000z\001\000\169\144\160\000{\001\000\192\144\160\000|\000O\144\160\000}\001\000\208\144\160\000~\001\000\161\144\160\000\127G\144\160\001\000\128`\144\160\001\000\129a\144\160\001\000\130b\144\160\001\000\131c\144\160\001\000\132d\144\160\001\000\133U\144\160\001\000\134F\144\160\001\000\135W\144\160\001\000\136h\144\160\001\000\137i\144\160\001\000\138j\144\160\001\000\139k\144\160\001\000\140l\144\160\001\000\141I\144\160\001\000\142J\144\160\001\000\143[\144\160\001\000\144p\144\160\001\000\145q\144\160\001\000\146Z\144\160\001\000\147s\144\160\001\000\148t\144\160\001\000\149u\144\160\001\000\150v\144\160\001\000\151H\144\160\001\000\152x\144\160\001\000\153y\144\160\001\000\154z\144\160\001\000\155{\144\160\001\000\156D\144\160\001\000\157T\144\160\001\000\158~\144\160\001\000\159\001\000\255\144\160\001\000\160\000A\144\160\001\000\161\001\000\170\144\160\001\000\162\000J\144\160\001\000\163\001\000\177\144\160\001\000\164\001\000\159\144\160\001\000\165\001\000\178\144\160\001\000\166\000j\144\160\001\000\167\001\000\181\144\160\001\000\168\001\000\189\144\160\001\000\169\001\000\180\144\160\001\000\170\001\000\154\144\160\001\000\171\001\000\138\144\160\001\000\172\000_\144\160\001\000\173\001\000\202\144\160\001\000\174\001\000\175\144\160\001\000\175\001\000\188\144\160\001\000\176\001\000\144\144\160\001\000\177\001\000\143\144\160\001\000\178\001\000\234\144\160\001\000\179\001\000\250\144\160\001\000\180\001\000\190\144\160\001\000\181\001\000\160\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\179\144\160\001\000\184\001\000\157\144\160\001\000\185\001\000\218\144\160\001\000\186\001\000\155\144\160\001\000\187\001\000\139\144\160\001\000\188\001\000\183\144\160\001\000\189\001\000\184\144\160\001\000\190\001\000\185\144\160\001\000\191\001\000\171\144\160\001\000\192\000d\144\160\001\000\193\000e\144\160\001\000\194\000b\144\160\001\000\195\000f\144\160\001\000\196\000c\144\160\001\000\197\000g\144\160\001\000\198\001\000\158\144\160\001\000\199\000h\144\160\001\000\200\000t\144\160\001\000\201\000q\144\160\001\000\202\000r\144\160\001\000\203\000s\144\160\001\000\204\000x\144\160\001\000\205\000u\144\160\001\000\206\000v\144\160\001\000\207\000w\144\160\001\000\208\001\000\172\144\160\001\000\209\000i\144\160\001\000\210\001\000\237\144\160\001\000\211\001\000\238\144\160\001\000\212\001\000\235\144\160\001\000\213\001\000\239\144\160\001\000\214\001\000\236\144\160\001\000\215\001\000\191\144\160\001\000\216\001\000\128\144\160\001\000\217\001\000\253\144\160\001\000\218\001\000\254\144\160\001\000\219\001\000\251\144\160\001\000\220\001\000\252\144\160\001\000\221\001\000\173\144\160\001\000\222\001\000\174\144\160\001\000\223\000Y\144\160\001\000\224\000D\144\160\001\000\225\000E\144\160\001\000\226\000B\144\160\001\000\227\000F\144\160\001\000\228\000C\144\160\001\000\229\000G\144\160\001\000\230\001\000\156\144\160\001\000\231\000H\144\160\001\000\232\000T\144\160\001\000\233\000Q\144\160\001\000\234\000R\144\160\001\000\235\000S\144\160\001\000\236\000X\144\160\001\000\237\000U\144\160\001\000\238\000V\144\160\001\000\239\000W\144\160\001\000\240\001\000\140\144\160\001\000\241\000I\144\160\001\000\242\001\000\205\144\160\001\000\243\001\000\206\144\160\001\000\244\001\000\203\144\160\001\000\245\001\000\207\144\160\001\000\246\001\000\204\144\160\001\000\247\001\000\225\144\160\001\000\248\000p\144\160\001\000\249\001\000\221\144\160\001\000\250\001\000\222\144\160\001\000\251\001\000\219\144\160\001\000\252\001\000\220\144\160\001\000\253\001\000\141\144\160\001\000\254\001\000\142\144\160\001\000\255\001\000\223" 0 : Netmappings.from_uni_list array);;
+ let cp1006_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\228\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\006\240\001\006\241\001\006\242\001\006\243\001\006\244\001\006\245\001\006\246\001\006\247\001\006\248\001\006\249\001\006\012\001\006\027\001\000\173\001\006\031\002\000\000\254\129\002\000\000\254\141\002\000\000\254\142\000\255\002\000\000\254\143\002\000\000\254\145\002\000\000\251V\002\000\000\251X\002\000\000\254\147\002\000\000\254\149\002\000\000\254\151\002\000\000\251f\002\000\000\251h\002\000\000\254\153\002\000\000\254\155\002\000\000\254\157\002\000\000\254\159\002\000\000\251z\002\000\000\251|\002\000\000\254\161\002\000\000\254\163\002\000\000\254\165\002\000\000\254\167\002\000\000\254\169\002\000\000\251\132\002\000\000\254\171\002\000\000\254\173\002\000\000\251\140\002\000\000\254\175\002\000\000\251\138\002\000\000\254\177\002\000\000\254\179\002\000\000\254\181\002\000\000\254\183\002\000\000\254\185\002\000\000\254\187\002\000\000\254\189\002\000\000\254\191\002\000\000\254\193\002\000\000\254\197\002\000\000\254\201\002\000\000\254\202\002\000\000\254\203\002\000\000\254\204\002\000\000\254\205\002\000\000\254\206\002\000\000\254\207\002\000\000\254\208\002\000\000\254\209\002\000\000\254\211\002\000\000\254\213\002\000\000\254\215\002\000\000\254\217\002\000\000\254\219\002\000\000\251\146\002\000\000\251\148\002\000\000\254\221\002\000\000\254\223\002\000\000\254\224\002\000\000\254\225\002\000\000\254\227\002\000\000\251\158\002\000\000\254\229\002\000\000\254\231\002\000\000\254\133\002\000\000\254\237\002\000\000\251\166\002\000\000\251\168\002\000\000\251\169\002\000\000\251\170\002\000\000\254\128\002\000\000\254\137\002\000\000\254\138\002\000\000\254\139\002\000\000\254\241\002\000\000\254\242\002\000\000\254\243\002\000\000\251\176\002\000\000\251\174\002\000\000\254|\002\000\000\254}" 0 : int array);;
+let cp1006_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\143\000\000\000\000\000\000\006\146\000\000\006\146\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\006\012\001\000\171@\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\145\160\160[[\160\160\001\006\027\001\000\172@\144\160\\\\\144\160]]\144\160^^\145\160\160__\160\160\001\006\031\001\000\174@\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\145\160\160\000V\000V\160\160\002\000\000\251V\001\000\181@\144\160\000W\000W\145\160\160\000X\000X\160\160\002\000\000\251X\001\000\182@\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\002\000\000\251f\001\000\186@\144\160\000g\000g\145\160\160\000h\000h\160\160\002\000\000\251h\001\000\187@\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\145\160\160\000z\000z\160\160\002\000\000\251z\001\000\192@\144\160\000{\000{\145\160\160\000|\000|\160\160\002\000\000\251|\001\000\193\160\160\002\000\000\254|\001\000\254@\145\160\160\000}\000}\160\160\002\000\000\254}\001\000\255@\144\160\000~\000~\144\160\000\127\000\127\145\160\160\001\000\128\001\000\128\160\160\002\000\000\254\128\001\000\245@\145\160\160\001\000\129\001\000\129\160\160\002\000\000\254\129\001\000\175@\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\145\160\160\001\000\132\001\000\132\160\160\002\000\000\251\132\001\000\199@\145\160\160\001\000\133\001\000\133\160\160\002\000\000\254\133\001\000\239@\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\145\160\160\001\000\137\001\000\137\160\160\002\000\000\254\137\001\000\246@\145\160\160\001\000\138\001\000\138\160\160\002\000\000\251\138\001\000\204\160\160\002\000\000\254\138\001\000\247@\145\160\160\001\000\139\001\000\139\160\160\002\000\000\254\139\001\000\248@\145\160\160\001\000\140\001\000\140\160\160\002\000\000\251\140\001\000\202@\145\160\160\001\000\141\001\000\141\160\160\002\000\000\254\141\001\000\176@\145\160\160\001\000\142\001\000\142\160\160\002\000\000\254\142\001\000\177@\145\160\160\001\000\143\001\000\143\160\160\002\000\000\254\143\001\000\179@\144\160\001\000\144\001\000\144\145\160\160\001\000\145\001\000\145\160\160\002\000\000\254\145\001\000\180@\145\160\160\001\000\146\001\000\146\160\160\002\000\000\251\146\001\000\229@\145\160\160\001\000\147\001\000\147\160\160\002\000\000\254\147\001\000\183@\145\160\160\001\000\148\001\000\148\160\160\002\000\000\251\148\001\000\230@\145\160\160\001\000\149\001\000\149\160\160\002\000\000\254\149\001\000\184@\144\160\001\000\150\001\000\150\145\160\160\001\000\151\001\000\151\160\160\002\000\000\254\151\001\000\185@\144\160\001\000\152\001\000\152\145\160\160\001\000\153\001\000\153\160\160\002\000\000\254\153\001\000\188@\144\160\001\000\154\001\000\154\145\160\160\001\000\155\001\000\155\160\160\002\000\000\254\155\001\000\189@\144\160\001\000\156\001\000\156\145\160\160\001\000\157\001\000\157\160\160\002\000\000\254\157\001\000\190@\145\160\160\001\000\158\001\000\158\160\160\002\000\000\251\158\001\000\236@\145\160\160\001\000\159\001\000\159\160\160\002\000\000\254\159\001\000\191@\144\160\001\000\160\001\000\160\144\160\002\000\000\254\161\001\000\194@\144\160\002\000\000\254\163\001\000\195@\144\160\002\000\000\254\165\001\000\196\144\160\002\000\000\251\166\001\000\241\144\160\002\000\000\254\167\001\000\197\144\160\002\000\000\251\168\001\000\242\145\160\160\002\000\000\254\169\001\000\198\160\160\002\000\000\251\169\001\000\243@\144\160\002\000\000\251\170\001\000\244\144\160\002\000\000\254\171\001\000\200@\145\160\160\001\000\173\001\000\173\160\160\002\000\000\254\173\001\000\201@\144\160\002\000\000\251\174\001\000\253\144\160\002\000\000\254\175\001\000\203\144\160\002\000\000\251\176\001\000\252\144\160\002\000\000\254\177\001\000\205@\144\160\002\000\000\254\179\001\000\206@\144\160\002\000\000\254\181\001\000\207@\144\160\002\000\000\254\183\001\000\208@\144\160\002\000\000\254\185\001\000\209@\144\160\002\000\000\254\187\001\000\210@\144\160\002\000\000\254\189\001\000\211@\144\160\002\000\000\254\191\001\000\212@\144\160\002\000\000\254\193\001\000\213@@@\144\160\002\000\000\254\197\001\000\214@@@\144\160\002\000\000\254\201\001\000\215\144\160\002\000\000\254\202\001\000\216\144\160\002\000\000\254\203\001\000\217\144\160\002\000\000\254\204\001\000\218\144\160\002\000\000\254\205\001\000\219\144\160\002\000\000\254\206\001\000\220\144\160\002\000\000\254\207\001\000\221\144\160\002\000\000\254\208\001\000\222\144\160\002\000\000\254\209\001\000\223@\144\160\002\000\000\254\211\001\000\224@\144\160\002\000\000\254\213\001\000\225@\144\160\002\000\000\254\215\001\000\226@\144\160\002\000\000\254\217\001\000\227@\144\160\002\000\000\254\219\001\000\228@\144\160\002\000\000\254\221\001\000\231@\144\160\002\000\000\254\223\001\000\232\144\160\002\000\000\254\224\001\000\233\144\160\002\000\000\254\225\001\000\234@\144\160\002\000\000\254\227\001\000\235@\144\160\002\000\000\254\229\001\000\237@\144\160\002\000\000\254\231\001\000\238@@@@@\144\160\002\000\000\254\237\001\000\240@@\144\160\001\006\240\001\000\161\145\160\160\001\006\241\001\000\162\160\160\002\000\000\254\241\001\000\249@\145\160\160\001\006\242\001\000\163\160\160\002\000\000\254\242\001\000\250@\145\160\160\001\006\243\001\000\164\160\160\002\000\000\254\243\001\000\251@\144\160\001\006\244\001\000\165\144\160\001\006\245\001\000\166\144\160\001\006\246\001\000\167\144\160\001\006\247\001\000\168\144\160\001\006\248\001\000\169\144\160\001\006\249\001\000\170@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let cp1026_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABC\001\000\156I\001\000\134\000\127\001\000\151\001\000\141\001\000\142KLMNOPQRS\001\000\157\001\000\133H\001\000\135XY\001\000\146\001\000\143\\]^_\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132JW[\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140EFG\001\000\144\001\000\145V\001\000\147\001\000\148\001\000\149\001\000\150D\001\000\152\001\000\153\001\000\154\001\000\155TU\001\000\158Z`\001\000\160\001\000\226\001\000\228\001\000\224\001\000\225\001\000\227\001\000\229\000{\001\000\241\001\000\199n|hkaf\001\000\233\001\000\234\001\000\235\001\000\232\001\000\237\001\000\238\001\000\239\001\000\236\001\000\223\001\001\030\001\0010ji{\000^mo\001\000\194\001\000\196\001\000\192\001\000\193\001\000\195\001\000\197\000[\001\000\209\001\001_le\000_~\127\001\000\248\001\000\201\001\000\202\001\000\203\001\000\200\001\000\205\001\000\206\001\000\207\001\000\204\001\0011z\001\000\214\001\001^g}\001\000\220\001\000\216\000a\000b\000c\000d\000e\000f\000g\000h\000i\001\000\171\001\000\187\000}\000`\001\000\166\001\000\177\001\000\176\000j\000k\000l\000m\000n\000o\000p\000q\000r\001\000\170\001\000\186\001\000\230\001\000\184\001\000\198\001\000\164\001\000\181\001\000\246\000s\000t\000u\000v\000w\000x\000y\000z\001\000\161\001\000\191\000]d\000@\001\000\174\001\000\162\001\000\163\001\000\165\001\000\183\001\000\169\001\000\167\001\000\182\001\000\188\001\000\189\001\000\190\001\000\172\000|\001\000\175\001\000\168\001\000\180\001\000\215\001\000\231\000A\000B\000C\000D\000E\000F\000G\000H\000I\001\000\173\001\000\244\000~\001\000\242\001\000\243\001\000\245\001\001\031\000J\000K\000L\000M\000N\000O\000P\000Q\000R\001\000\185\001\000\251\000\\\001\000\249\001\000\250\001\000\255\001\000\252\001\000\247\000S\000T\000U\000V\000W\000X\000Y\000Z\001\000\178\001\000\212c\001\000\210\001\000\211\001\000\213pqrstuvwxy\001\000\179\001\000\219b\001\000\217\001\000\218\001\000\159" 0 : int array);;
+let cp1026_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\151\000\000\000\000\000\000\006\025\000\000\006\025\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160Dw\144\160Em\144\160Fn\144\160Go\144\160HV\144\160IE\144\160Je\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160T|\144\160U}\144\160Vr\144\160Wf\144\160XX\144\160YY\144\160Z\127\144\160[g\144\160\\\\\144\160]]\145\160\160^^\160\160\001\001\030\000Z@\145\160\160__\160\160\001\001\031\001\000\208@\144\160`\000@\144\160a\000O\144\160b\001\000\252\144\160c\001\000\236\144\160d\001\000\173\144\160e\000l\144\160f\000P\144\160g\000}\144\160h\000M\144\160i\000]\144\160j\000\\\144\160k\000N\144\160l\000k\144\160m\000`\144\160n\000K\144\160o\000a\145\160\160\001\0010\000[\160\160p\001\000\240@\145\160\160\001\0011\000y\160\160q\001\000\241@\144\160r\001\000\242\144\160s\001\000\243\144\160t\001\000\244\144\160u\001\000\245\144\160v\001\000\246\144\160w\001\000\247\144\160x\001\000\248\144\160y\001\000\249\144\160z\000z\144\160{\000^\144\160|\000L\144\160}\000~\144\160~\000n\144\160\127\000o\144\160\000@\001\000\174\144\160\000A\001\000\193\144\160\000B\001\000\194\144\160\000C\001\000\195\144\160\000D\001\000\196\144\160\000E\001\000\197\144\160\000F\001\000\198\144\160\000G\001\000\199\144\160\000H\001\000\200\144\160\000I\001\000\201\144\160\000J\001\000\209\144\160\000K\001\000\210\144\160\000L\001\000\211\144\160\000M\001\000\212\144\160\000N\001\000\213\144\160\000O\001\000\214\144\160\000P\001\000\215\144\160\000Q\001\000\216\144\160\000R\001\000\217\144\160\000S\001\000\226\144\160\000T\001\000\227\144\160\000U\001\000\228\144\160\000V\001\000\229\144\160\000W\001\000\230\144\160\000X\001\000\231\144\160\000Y\001\000\232\144\160\000Z\001\000\233\144\160\000[\000h\144\160\000\\\001\000\220\144\160\000]\001\000\172\145\160\160\000^\000_\160\160\001\001^\000|@\145\160\160\001\001_\000j\160\160\000_\000m@\144\160\000`\001\000\141\144\160\000a\001\000\129\144\160\000b\001\000\130\144\160\000c\001\000\131\144\160\000d\001\000\132\144\160\000e\001\000\133\144\160\000f\001\000\134\144\160\000g\001\000\135\144\160\000h\001\000\136\144\160\000i\001\000\137\144\160\000j\001\000\145\144\160\000k\001\000\146\144\160\000l\001\000\147\144\160\000m\001\000\148\144\160\000n\001\000\149\144\160\000o\001\000\150\144\160\000p\001\000\151\144\160\000q\001\000\152\144\160\000r\001\000\153\144\160\000s\001\000\162\144\160\000t\001\000\163\144\160\000u\001\000\164\144\160\000v\001\000\165\144\160\000w\001\000\166\144\160\000x\001\000\167\144\160\000y\001\000\168\144\160\000z\001\000\169\144\160\000{\000H\144\160\000|\001\000\187\144\160\000}\001\000\140\144\160\000~\001\000\204\144\160\000\127G\144\160\001\000\128`\144\160\001\000\129a\144\160\001\000\130b\144\160\001\000\131c\144\160\001\000\132d\144\160\001\000\133U\144\160\001\000\134F\144\160\001\000\135W\144\160\001\000\136h\144\160\001\000\137i\144\160\001\000\138j\144\160\001\000\139k\144\160\001\000\140l\144\160\001\000\141I\144\160\001\000\142J\144\160\001\000\143[\144\160\001\000\144p\144\160\001\000\145q\144\160\001\000\146Z\144\160\001\000\147s\144\160\001\000\148t\144\160\001\000\149u\144\160\001\000\150v\144\160\001\000\151H\144\160\001\000\152x\144\160\001\000\153y\144\160\001\000\154z\144\160\001\000\155{\144\160\001\000\156D\144\160\001\000\157T\144\160\001\000\158~\144\160\001\000\159\001\000\255\144\160\001\000\160\000A\144\160\001\000\161\001\000\170\144\160\001\000\162\001\000\176\144\160\001\000\163\001\000\177\144\160\001\000\164\001\000\159\144\160\001\000\165\001\000\178\144\160\001\000\166\001\000\142\144\160\001\000\167\001\000\181\144\160\001\000\168\001\000\189\144\160\001\000\169\001\000\180\144\160\001\000\170\001\000\154\144\160\001\000\171\001\000\138\144\160\001\000\172\001\000\186\144\160\001\000\173\001\000\202\144\160\001\000\174\001\000\175\144\160\001\000\175\001\000\188\144\160\001\000\176\001\000\144\144\160\001\000\177\001\000\143\144\160\001\000\178\001\000\234\144\160\001\000\179\001\000\250\144\160\001\000\180\001\000\190\144\160\001\000\181\001\000\160\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\179\144\160\001\000\184\001\000\157\144\160\001\000\185\001\000\218\144\160\001\000\186\001\000\155\144\160\001\000\187\001\000\139\144\160\001\000\188\001\000\183\144\160\001\000\189\001\000\184\144\160\001\000\190\001\000\185\144\160\001\000\191\001\000\171\144\160\001\000\192\000d\144\160\001\000\193\000e\144\160\001\000\194\000b\144\160\001\000\195\000f\144\160\001\000\196\000c\144\160\001\000\197\000g\144\160\001\000\198\001\000\158\144\160\001\000\199\000J\144\160\001\000\200\000t\144\160\001\000\201\000q\144\160\001\000\202\000r\144\160\001\000\203\000s\144\160\001\000\204\000x\144\160\001\000\205\000u\144\160\001\000\206\000v\144\160\001\000\207\000w@\144\160\001\000\209\000i\144\160\001\000\210\001\000\237\144\160\001\000\211\001\000\238\144\160\001\000\212\001\000\235\144\160\001\000\213\001\000\239\144\160\001\000\214\000{\144\160\001\000\215\001\000\191\144\160\001\000\216\001\000\128\144\160\001\000\217\001\000\253\144\160\001\000\218\001\000\254\144\160\001\000\219\001\000\251\144\160\001\000\220\000\127@@\144\160\001\000\223\000Y\144\160\001\000\224\000D\144\160\001\000\225\000E\144\160\001\000\226\000B\144\160\001\000\227\000F\144\160\001\000\228\000C\144\160\001\000\229\000G\144\160\001\000\230\001\000\156\144\160\001\000\231\001\000\192\144\160\001\000\232\000T\144\160\001\000\233\000Q\144\160\001\000\234\000R\144\160\001\000\235\000S\144\160\001\000\236\000X\144\160\001\000\237\000U\144\160\001\000\238\000V\144\160\001\000\239\000W@\144\160\001\000\241\000I\144\160\001\000\242\001\000\205\144\160\001\000\243\001\000\206\144\160\001\000\244\001\000\203\144\160\001\000\245\001\000\207\144\160\001\000\246\001\000\161\144\160\001\000\247\001\000\225\144\160\001\000\248\000p\144\160\001\000\249\001\000\221\144\160\001\000\250\001\000\222\144\160\001\000\251\001\000\219\144\160\001\000\252\001\000\224@@\144\160\001\000\255\001\000\223" 0 : Netmappings.from_uni_list array);;
+ let cp424_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\031\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABC\001\000\156I\001\000\134\000\127\001\000\151\001\000\141\001\000\142KLMNOPQRS\001\000\157\001\000\133H\001\000\135XY\001\000\146\001\000\143\\]^_\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132JW[\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140EFG\001\000\144\001\000\145V\001\000\147\001\000\148\001\000\149\001\000\150D\001\000\152\001\000\153\001\000\154\001\000\155TU\001\000\158Z`\001\005\208\001\005\209\001\005\210\001\005\211\001\005\212\001\005\213\001\005\214\001\005\215\001\005\216\001\000\162n|hk\000|f\001\005\217\001\005\218\001\005\219\001\005\220\001\005\221\001\005\222\001\005\223\001\005\224\001\005\225adji{\001\000\172mo\001\005\226\001\005\227\001\005\228\001\005\229\001\005\230\001\005\231\001\005\232\001\005\233\001\000\166le\000_~\127\000\255\001\005\234\000\255\000\255\001\000\160\000\255\000\255\000\255\001 \023\000`zc\000@g}b\000\255\000a\000b\000c\000d\000e\000f\000g\000h\000i\001\000\171\001\000\187\000\255\000\255\000\255\001\000\177\001\000\176\000j\000k\000l\000m\000n\000o\000p\000q\000r\000\255\000\255\000\255\001\000\184\000\255\001\000\164\001\000\181\000~\000s\000t\000u\000v\000w\000x\000y\000z\000\255\000\255\000\255\000\255\000\255\001\000\174\000^\001\000\163\001\000\165\001\000\183\001\000\169\001\000\167\001\000\182\001\000\188\001\000\189\001\000\190\000[\000]\001\000\175\001\000\168\001\000\180\001\000\215\000{\000A\000B\000C\000D\000E\000F\000G\000H\000I\001\000\173\000\255\000\255\000\255\000\255\000\255\000}\000J\000K\000L\000M\000N\000O\000P\000Q\000R\001\000\185\000\255\000\255\000\255\000\255\000\255\000\\\001\000\247\000S\000T\000U\000V\000W\000X\000Y\000Z\001\000\178\000\255\000\255\000\255\000\255\000\255pqrstuvwxy\001\000\179\000\255\000\255\000\255\000\255\001\000\159" 0 : int array);;
+let cp424_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\005\135\000\000\000\000\000\000\005K\000\000\005K\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160Dw\144\160Em\144\160Fn\144\160Go\144\160HV\144\160IE\144\160Je\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160T|\144\160U}\144\160Vr\145\160\160Wf\160\160\001 \023\000x@\144\160XX\144\160YY\144\160Z\127\144\160[g\144\160\\\\\144\160]]\144\160^^\144\160__\144\160`\000@\144\160a\000Z\144\160b\000\127\144\160c\000{\144\160d\000[\144\160e\000l\144\160f\000P\144\160g\000}\144\160h\000M\144\160i\000]\144\160j\000\\\144\160k\000N\144\160l\000k\144\160m\000`\144\160n\000K\144\160o\000a\144\160p\001\000\240\144\160q\001\000\241\144\160r\001\000\242\144\160s\001\000\243\144\160t\001\000\244\144\160u\001\000\245\144\160v\001\000\246\144\160w\001\000\247\144\160x\001\000\248\144\160y\001\000\249\144\160z\000z\144\160{\000^\144\160|\000L\144\160}\000~\144\160~\000n\144\160\127\000o\144\160\000@\000|\144\160\000A\001\000\193\144\160\000B\001\000\194\144\160\000C\001\000\195\144\160\000D\001\000\196\144\160\000E\001\000\197\144\160\000F\001\000\198\144\160\000G\001\000\199\144\160\000H\001\000\200\144\160\000I\001\000\201\144\160\000J\001\000\209\144\160\000K\001\000\210\144\160\000L\001\000\211\144\160\000M\001\000\212\144\160\000N\001\000\213\144\160\000O\001\000\214\144\160\000P\001\000\215\144\160\000Q\001\000\216\144\160\000R\001\000\217\144\160\000S\001\000\226\144\160\000T\001\000\227\144\160\000U\001\000\228\144\160\000V\001\000\229\144\160\000W\001\000\230\144\160\000X\001\000\231\144\160\000Y\001\000\232\144\160\000Z\001\000\233\144\160\000[\001\000\186\144\160\000\\\001\000\224\144\160\000]\001\000\187\144\160\000^\001\000\176\144\160\000_\000m\144\160\000`\000y\144\160\000a\001\000\129\144\160\000b\001\000\130\144\160\000c\001\000\131\144\160\000d\001\000\132\144\160\000e\001\000\133\144\160\000f\001\000\134\144\160\000g\001\000\135\144\160\000h\001\000\136\144\160\000i\001\000\137\144\160\000j\001\000\145\144\160\000k\001\000\146\144\160\000l\001\000\147\144\160\000m\001\000\148\144\160\000n\001\000\149\144\160\000o\001\000\150\144\160\000p\001\000\151\144\160\000q\001\000\152\144\160\000r\001\000\153\144\160\000s\001\000\162\144\160\000t\001\000\163\144\160\000u\001\000\164\144\160\000v\001\000\165\144\160\000w\001\000\166\144\160\000x\001\000\167\144\160\000y\001\000\168\144\160\000z\001\000\169\144\160\000{\001\000\192\144\160\000|\000O\144\160\000}\001\000\208\144\160\000~\001\000\161\144\160\000\127G\144\160\001\000\128`\144\160\001\000\129a\144\160\001\000\130b\144\160\001\000\131c\144\160\001\000\132d\144\160\001\000\133U\144\160\001\000\134F\144\160\001\000\135W\144\160\001\000\136h\144\160\001\000\137i\144\160\001\000\138j\144\160\001\000\139k\144\160\001\000\140l\144\160\001\000\141I\144\160\001\000\142J\144\160\001\000\143[\144\160\001\000\144p\144\160\001\000\145q\144\160\001\000\146Z\144\160\001\000\147s\144\160\001\000\148t\144\160\001\000\149u\144\160\001\000\150v\144\160\001\000\151H\144\160\001\000\152x\144\160\001\000\153y\144\160\001\000\154z\144\160\001\000\155{\144\160\001\000\156D\144\160\001\000\157T\144\160\001\000\158~\144\160\001\000\159\001\000\255\144\160\001\000\160\000t@\144\160\001\000\162\000J\144\160\001\000\163\001\000\177\144\160\001\000\164\001\000\159\144\160\001\000\165\001\000\178\144\160\001\000\166\000j\144\160\001\000\167\001\000\181\144\160\001\000\168\001\000\189\144\160\001\000\169\001\000\180@\144\160\001\000\171\001\000\138\144\160\001\000\172\000_\144\160\001\000\173\001\000\202\144\160\001\000\174\001\000\175\144\160\001\000\175\001\000\188\144\160\001\000\176\001\000\144\144\160\001\000\177\001\000\143\144\160\001\000\178\001\000\234\144\160\001\000\179\001\000\250\144\160\001\000\180\001\000\190\144\160\001\000\181\001\000\160\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\179\144\160\001\000\184\001\000\157\144\160\001\000\185\001\000\218@\144\160\001\000\187\001\000\139\144\160\001\000\188\001\000\183\144\160\001\000\189\001\000\184\144\160\001\000\190\001\000\185@@@@@@@@@@@@@@@@@\144\160\001\005\208\000A\144\160\001\005\209\000B\144\160\001\005\210\000C\144\160\001\005\211\000D\144\160\001\005\212\000E\144\160\001\005\213\000F\144\160\001\005\214\000G\145\160\160\001\005\215\000H\160\160\001\000\215\001\000\191@\144\160\001\005\216\000I\144\160\001\005\217\000Q\144\160\001\005\218\000R\144\160\001\005\219\000S\144\160\001\005\220\000T\144\160\001\005\221\000U\144\160\001\005\222\000V\144\160\001\005\223\000W\144\160\001\005\224\000X\144\160\001\005\225\000Y\144\160\001\005\226\000b\144\160\001\005\227\000c\144\160\001\005\228\000d\144\160\001\005\229\000e\144\160\001\005\230\000f\144\160\001\005\231\000g\144\160\001\005\232\000h\144\160\001\005\233\000i\144\160\001\005\234\000q@@@@@@@@@@@@\144\160\001\000\247\001\000\225@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let cp437_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\228\001\000\224\001\000\229\001\000\231\001\000\234\001\000\235\001\000\232\001\000\239\001\000\238\001\000\236\001\000\196\001\000\197\001\000\201\001\000\230\001\000\198\001\000\244\001\000\246\001\000\242\001\000\251\001\000\249\001\000\255\001\000\214\001\000\220\001\000\162\001\000\163\001\000\165\001 \167\001\001\146\001\000\225\001\000\237\001\000\243\001\000\250\001\000\241\001\000\209\001\000\170\001\000\186\001\000\191\001#\016\001\000\172\001\000\189\001\000\188\001\000\161\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp437_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0071\000\000\000\000\000\000\006\229\000\000\006\229\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001#\016\001\000\169\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\247@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\145\160\160\000\127\000\127\160\160\001 \127\001\000\252@\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\145\160\160\001\001\146\001\000\159\160\160\001%\146\001\000\177@\145\160\160\001%\147\001\000\178\160\160\001\003\147\001\000\226@@@@@\144\160\001\003\152\001\000\233@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\000\161\001\000\173\144\160\001\000\162\001\000\155\145\160\160\001\000\163\001\000\156\160\160\001\003\163\001\000\228@@\144\160\001\000\165\001\000\157\144\160\001\003\166\001\000\232\144\160\001 \167\001\000\158@\144\160\001\003\169\001\000\234\144\160\001\000\170\001\000\166\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170@@@\144\160\001\000\176\001\000\248\145\160\160\001\003\177\001\000\224\160\160\001\000\177\001\000\241@\144\160\001\000\178\001\000\253@\144\160\001\003\180\001\000\235\145\160\160\001\000\181\001\000\230\160\160\001\003\181\001\000\238@@\144\160\001\000\183\001\000\250@@\144\160\001\000\186\001\000\167\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171@\144\160\001\000\191\001\000\168\144\160\001\003\192\001\000\227@@\144\160\001\003\195\001\000\229\145\160\160\001\000\196\001\000\142\160\160\001\003\196\001\000\231@\144\160\001\000\197\001\000\143\145\160\160\001\000\198\001\000\146\160\160\001\003\198\001\000\237@\144\160\001\000\199\001\000\128@\144\160\001\000\201\001\000\144@@@@@@@\144\160\001\000\209\001\000\165@@@@\144\160\001\000\214\001\000\153@@@@@\144\160\001\000\220\001\000\154@@\144\160\001\000\223\001\000\225\144\160\001\000\224\001\000\133\144\160\001\000\225\001\000\160\144\160\001\000\226\001\000\131@\144\160\001\000\228\001\000\132\144\160\001\000\229\001\000\134\144\160\001\000\230\001\000\145\144\160\001\000\231\001\000\135\144\160\001\000\232\001\000\138\144\160\001\000\233\001\000\130\144\160\001\000\234\001\000\136\144\160\001\000\235\001\000\137\144\160\001\000\236\001\000\141\144\160\001\000\237\001\000\161\144\160\001\000\238\001\000\140\144\160\001\000\239\001\000\139@\144\160\001\000\241\001\000\164\144\160\001\000\242\001\000\149\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147@\144\160\001\000\246\001\000\148\144\160\001\000\247\001\000\246@\144\160\001\000\249\001\000\151\144\160\001\000\250\001\000\163\144\160\001\000\251\001\000\150\144\160\001\000\252\001\000\129@@\144\160\001\000\255\001\000\152" 0 : Netmappings.from_uni_list array);;
+ let cp500_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABC\001\000\156I\001\000\134\000\127\001\000\151\001\000\141\001\000\142KLMNOPQRS\001\000\157\001\000\133H\001\000\135XY\001\000\146\001\000\143\\]^_\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132JW[\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140EFG\001\000\144\001\000\145V\001\000\147\001\000\148\001\000\149\001\000\150D\001\000\152\001\000\153\001\000\154\001\000\155TU\001\000\158Z`\001\000\160\001\000\226\001\000\228\001\000\224\001\000\225\001\000\227\001\000\229\001\000\231\001\000\241\000[n|hkaf\001\000\233\001\000\234\001\000\235\001\000\232\001\000\237\001\000\238\001\000\239\001\000\236\001\000\223\000]dji{\000^mo\001\000\194\001\000\196\001\000\192\001\000\193\001\000\195\001\000\197\001\000\199\001\000\209\001\000\166le\000_~\127\001\000\248\001\000\201\001\000\202\001\000\203\001\000\200\001\000\205\001\000\206\001\000\207\001\000\204\000`zc\000@g}b\001\000\216\000a\000b\000c\000d\000e\000f\000g\000h\000i\001\000\171\001\000\187\001\000\240\001\000\253\001\000\254\001\000\177\001\000\176\000j\000k\000l\000m\000n\000o\000p\000q\000r\001\000\170\001\000\186\001\000\230\001\000\184\001\000\198\001\000\164\001\000\181\000~\000s\000t\000u\000v\000w\000x\000y\000z\001\000\161\001\000\191\001\000\208\001\000\221\001\000\222\001\000\174\001\000\162\001\000\163\001\000\165\001\000\183\001\000\169\001\000\167\001\000\182\001\000\188\001\000\189\001\000\190\001\000\172\000|\001\000\175\001\000\168\001\000\180\001\000\215\000{\000A\000B\000C\000D\000E\000F\000G\000H\000I\001\000\173\001\000\244\001\000\246\001\000\242\001\000\243\001\000\245\000}\000J\000K\000L\000M\000N\000O\000P\000Q\000R\001\000\185\001\000\251\001\000\252\001\000\249\001\000\250\001\000\255\000\\\001\000\247\000S\000T\000U\000V\000W\000X\000Y\000Z\001\000\178\001\000\212\001\000\214\001\000\210\001\000\211\001\000\213pqrstuvwxy\001\000\179\001\000\219\001\000\220\001\000\217\001\000\218\001\000\159" 0 : int array);;
+let cp500_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\133\000\000\000\000\000\000\006\001\000\000\006\001\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160Dw\144\160Em\144\160Fn\144\160Go\144\160HV\144\160IE\144\160Je\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160T|\144\160U}\144\160Vr\144\160Wf\144\160XX\144\160YY\144\160Z\127\144\160[g\144\160\\\\\144\160]]\144\160^^\144\160__\144\160`\000@\144\160a\000O\144\160b\000\127\144\160c\000{\144\160d\000[\144\160e\000l\144\160f\000P\144\160g\000}\144\160h\000M\144\160i\000]\144\160j\000\\\144\160k\000N\144\160l\000k\144\160m\000`\144\160n\000K\144\160o\000a\144\160p\001\000\240\144\160q\001\000\241\144\160r\001\000\242\144\160s\001\000\243\144\160t\001\000\244\144\160u\001\000\245\144\160v\001\000\246\144\160w\001\000\247\144\160x\001\000\248\144\160y\001\000\249\144\160z\000z\144\160{\000^\144\160|\000L\144\160}\000~\144\160~\000n\144\160\127\000o\144\160\000@\000|\144\160\000A\001\000\193\144\160\000B\001\000\194\144\160\000C\001\000\195\144\160\000D\001\000\196\144\160\000E\001\000\197\144\160\000F\001\000\198\144\160\000G\001\000\199\144\160\000H\001\000\200\144\160\000I\001\000\201\144\160\000J\001\000\209\144\160\000K\001\000\210\144\160\000L\001\000\211\144\160\000M\001\000\212\144\160\000N\001\000\213\144\160\000O\001\000\214\144\160\000P\001\000\215\144\160\000Q\001\000\216\144\160\000R\001\000\217\144\160\000S\001\000\226\144\160\000T\001\000\227\144\160\000U\001\000\228\144\160\000V\001\000\229\144\160\000W\001\000\230\144\160\000X\001\000\231\144\160\000Y\001\000\232\144\160\000Z\001\000\233\144\160\000[\000J\144\160\000\\\001\000\224\144\160\000]\000Z\144\160\000^\000_\144\160\000_\000m\144\160\000`\000y\144\160\000a\001\000\129\144\160\000b\001\000\130\144\160\000c\001\000\131\144\160\000d\001\000\132\144\160\000e\001\000\133\144\160\000f\001\000\134\144\160\000g\001\000\135\144\160\000h\001\000\136\144\160\000i\001\000\137\144\160\000j\001\000\145\144\160\000k\001\000\146\144\160\000l\001\000\147\144\160\000m\001\000\148\144\160\000n\001\000\149\144\160\000o\001\000\150\144\160\000p\001\000\151\144\160\000q\001\000\152\144\160\000r\001\000\153\144\160\000s\001\000\162\144\160\000t\001\000\163\144\160\000u\001\000\164\144\160\000v\001\000\165\144\160\000w\001\000\166\144\160\000x\001\000\167\144\160\000y\001\000\168\144\160\000z\001\000\169\144\160\000{\001\000\192\144\160\000|\001\000\187\144\160\000}\001\000\208\144\160\000~\001\000\161\144\160\000\127G\144\160\001\000\128`\144\160\001\000\129a\144\160\001\000\130b\144\160\001\000\131c\144\160\001\000\132d\144\160\001\000\133U\144\160\001\000\134F\144\160\001\000\135W\144\160\001\000\136h\144\160\001\000\137i\144\160\001\000\138j\144\160\001\000\139k\144\160\001\000\140l\144\160\001\000\141I\144\160\001\000\142J\144\160\001\000\143[\144\160\001\000\144p\144\160\001\000\145q\144\160\001\000\146Z\144\160\001\000\147s\144\160\001\000\148t\144\160\001\000\149u\144\160\001\000\150v\144\160\001\000\151H\144\160\001\000\152x\144\160\001\000\153y\144\160\001\000\154z\144\160\001\000\155{\144\160\001\000\156D\144\160\001\000\157T\144\160\001\000\158~\144\160\001\000\159\001\000\255\144\160\001\000\160\000A\144\160\001\000\161\001\000\170\144\160\001\000\162\001\000\176\144\160\001\000\163\001\000\177\144\160\001\000\164\001\000\159\144\160\001\000\165\001\000\178\144\160\001\000\166\000j\144\160\001\000\167\001\000\181\144\160\001\000\168\001\000\189\144\160\001\000\169\001\000\180\144\160\001\000\170\001\000\154\144\160\001\000\171\001\000\138\144\160\001\000\172\001\000\186\144\160\001\000\173\001\000\202\144\160\001\000\174\001\000\175\144\160\001\000\175\001\000\188\144\160\001\000\176\001\000\144\144\160\001\000\177\001\000\143\144\160\001\000\178\001\000\234\144\160\001\000\179\001\000\250\144\160\001\000\180\001\000\190\144\160\001\000\181\001\000\160\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\179\144\160\001\000\184\001\000\157\144\160\001\000\185\001\000\218\144\160\001\000\186\001\000\155\144\160\001\000\187\001\000\139\144\160\001\000\188\001\000\183\144\160\001\000\189\001\000\184\144\160\001\000\190\001\000\185\144\160\001\000\191\001\000\171\144\160\001\000\192\000d\144\160\001\000\193\000e\144\160\001\000\194\000b\144\160\001\000\195\000f\144\160\001\000\196\000c\144\160\001\000\197\000g\144\160\001\000\198\001\000\158\144\160\001\000\199\000h\144\160\001\000\200\000t\144\160\001\000\201\000q\144\160\001\000\202\000r\144\160\001\000\203\000s\144\160\001\000\204\000x\144\160\001\000\205\000u\144\160\001\000\206\000v\144\160\001\000\207\000w\144\160\001\000\208\001\000\172\144\160\001\000\209\000i\144\160\001\000\210\001\000\237\144\160\001\000\211\001\000\238\144\160\001\000\212\001\000\235\144\160\001\000\213\001\000\239\144\160\001\000\214\001\000\236\144\160\001\000\215\001\000\191\144\160\001\000\216\001\000\128\144\160\001\000\217\001\000\253\144\160\001\000\218\001\000\254\144\160\001\000\219\001\000\251\144\160\001\000\220\001\000\252\144\160\001\000\221\001\000\173\144\160\001\000\222\001\000\174\144\160\001\000\223\000Y\144\160\001\000\224\000D\144\160\001\000\225\000E\144\160\001\000\226\000B\144\160\001\000\227\000F\144\160\001\000\228\000C\144\160\001\000\229\000G\144\160\001\000\230\001\000\156\144\160\001\000\231\000H\144\160\001\000\232\000T\144\160\001\000\233\000Q\144\160\001\000\234\000R\144\160\001\000\235\000S\144\160\001\000\236\000X\144\160\001\000\237\000U\144\160\001\000\238\000V\144\160\001\000\239\000W\144\160\001\000\240\001\000\140\144\160\001\000\241\000I\144\160\001\000\242\001\000\205\144\160\001\000\243\001\000\206\144\160\001\000\244\001\000\203\144\160\001\000\245\001\000\207\144\160\001\000\246\001\000\204\144\160\001\000\247\001\000\225\144\160\001\000\248\000p\144\160\001\000\249\001\000\221\144\160\001\000\250\001\000\222\144\160\001\000\251\001\000\219\144\160\001\000\252\001\000\220\144\160\001\000\253\001\000\141\144\160\001\000\254\001\000\142\144\160\001\000\255\001\000\223" 0 : Netmappings.from_uni_list array);;
+ let cp737_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\003\145\001\003\146\001\003\147\001\003\148\001\003\149\001\003\150\001\003\151\001\003\152\001\003\153\001\003\154\001\003\155\001\003\156\001\003\157\001\003\158\001\003\159\001\003\160\001\003\161\001\003\163\001\003\164\001\003\165\001\003\166\001\003\167\001\003\168\001\003\169\001\003\177\001\003\178\001\003\179\001\003\180\001\003\181\001\003\182\001\003\183\001\003\184\001\003\185\001\003\186\001\003\187\001\003\188\001\003\189\001\003\190\001\003\191\001\003\192\001\003\193\001\003\195\001\003\194\001\003\196\001\003\197\001\003\198\001\003\199\001\003\200\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\201\001\003\172\001\003\173\001\003\174\001\003\202\001\003\175\001\003\204\001\003\205\001\003\203\001\003\206\001\003\134\001\003\136\001\003\137\001\003\138\001\003\140\001\003\142\001\003\143\001\000\177\001\"e\001\"d\001\003\170\001\003\171\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp737_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007'\000\000\000\000\000\000\006\216\000\000\006\216\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\247@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\145\160\160\000\127\000\127\160\160\001 \127\001\000\252@\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@\144\160\001\003\134\001\000\234@\145\160\160\001%\136\001\000\219\160\160\001\003\136\001\000\235@\144\160\001\003\137\001\000\236\144\160\001\003\138\001\000\237@\145\160\160\001%\140\001\000\221\160\160\001\003\140\001\000\238@@\144\160\001\003\142\001\000\239\144\160\001\003\143\001\000\240\144\160\001%\144\001\000\222\145\160\160\001\003\145\001\000\128\160\160\001%\145\001\000\176@\145\160\160\001\003\146\001\000\129\160\160\001%\146\001\000\177@\145\160\160\001\003\147\001\000\130\160\160\001%\147\001\000\178@\144\160\001\003\148\001\000\131\144\160\001\003\149\001\000\132\144\160\001\003\150\001\000\133\144\160\001\003\151\001\000\134\144\160\001\003\152\001\000\135\144\160\001\003\153\001\000\136\144\160\001\003\154\001\000\137\144\160\001\003\155\001\000\138\144\160\001\003\156\001\000\139\144\160\001\003\157\001\000\140\144\160\001\003\158\001\000\141\144\160\001\003\159\001\000\142\145\160\160\001\003\160\001\000\143\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\003\161\001\000\144@\144\160\001\003\163\001\000\145\144\160\001\003\164\001\000\146\144\160\001\003\165\001\000\147\144\160\001\003\166\001\000\148\144\160\001\003\167\001\000\149\144\160\001\003\168\001\000\150\144\160\001\003\169\001\000\151\144\160\001\003\170\001\000\244\144\160\001\003\171\001\000\245\144\160\001\003\172\001\000\225\144\160\001\003\173\001\000\226\144\160\001\003\174\001\000\227\144\160\001\003\175\001\000\229\144\160\001\000\176\001\000\248\145\160\160\001\003\177\001\000\152\160\160\001\000\177\001\000\241@\145\160\160\001\003\178\001\000\153\160\160\001\000\178\001\000\253@\144\160\001\003\179\001\000\154\144\160\001\003\180\001\000\155\144\160\001\003\181\001\000\156\144\160\001\003\182\001\000\157\145\160\160\001\003\183\001\000\158\160\160\001\000\183\001\000\250@\144\160\001\003\184\001\000\159\144\160\001\003\185\001\000\160\144\160\001\003\186\001\000\161\144\160\001\003\187\001\000\162\144\160\001\003\188\001\000\163\144\160\001\003\189\001\000\164\144\160\001\003\190\001\000\165\144\160\001\003\191\001\000\166\144\160\001\003\192\001\000\167\144\160\001\003\193\001\000\168\144\160\001\003\194\001\000\170\144\160\001\003\195\001\000\169\144\160\001\003\196\001\000\171\144\160\001\003\197\001\000\172\144\160\001\003\198\001\000\173\144\160\001\003\199\001\000\174\144\160\001\003\200\001\000\175\144\160\001\003\201\001\000\224\144\160\001\003\202\001\000\228\144\160\001\003\203\001\000\232\144\160\001\003\204\001\000\230\144\160\001\003\205\001\000\231\144\160\001\003\206\001\000\233@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\144\160\001\000\247\001\000\246@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let cp775_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\001\006\001\000\252\001\000\233\001\001\001\001\000\228\001\001#\001\000\229\001\001\007\001\001B\001\001\019\001\001V\001\001W\001\001+\001\001y\001\000\196\001\000\197\001\000\201\001\000\230\001\000\198\001\001M\001\000\246\001\001\"\001\000\162\001\001Z\001\001[\001\000\214\001\000\220\001\000\248\001\000\163\001\000\216\001\000\215\001\000\164\001\001\000\001\001*\001\000\243\001\001{\001\001|\001\001z\001 \029\001\000\166\001\000\169\001\000\174\001\000\172\001\000\189\001\000\188\001\001A\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001\001\004\001\001\012\001\001\024\001\001\022\001%c\001%Q\001%W\001%]\001\001.\001\001`\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\001r\001\001j\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\001}\001\001\005\001\001\013\001\001\025\001\001\023\001\001/\001\001a\001\001s\001\001k\001\001~\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\000\211\001\000\223\001\001L\001\001C\001\000\245\001\000\213\001\000\181\001\001D\001\0016\001\0017\001\001;\001\001<\001\001F\001\001\018\001\001E\001 \025\001\000\173\001\000\177\001 \028\001\000\190\001\000\182\001\000\167\001\000\247\001 \030\001\000\176\001\"\025\001\000\183\001\000\185\001\000\179\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp775_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007U\000\000\000\000\000\000\007\019\000\000\007\019\008\000\004\000\000\145\160\160@@\160\160\001\001\000\001\000\160\160\160\001%\000\001\000\196@\145\160\160AA\160\160\001\001\001\001\000\131@\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\145\160\160DD\160\160\001\001\004\001\000\181@\145\160\160EE\160\160\001\001\005\001\000\208@\145\160\160FF\160\160\001\001\006\001\000\128@\145\160\160GG\160\160\001\001\007\001\000\135@\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\182\160\160\001%\012\001\000\218@\145\160\160MM\160\160\001\001\013\001\000\209@\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\145\160\160RR\160\160\001\001\018\001\000\237@\145\160\160SS\160\160\001\001\019\001\000\137@\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\145\160\160VV\160\160\001\001\022\001\000\184@\145\160\160WW\160\160\001\001\023\001\000\211@\145\160\160XX\160\160\001\001\024\001\000\183\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\001\025\001\000\210\160\160\001 \025\001\000\239\160\160\001\"\025\001\000\249@\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195\160\160\001 \028\001\000\242@\145\160\160]]\160\160\001 \029\001\000\166@\145\160\160^^\160\160\001 \030\001\000\247@\144\160__\144\160``\144\160aa\145\160\160bb\160\160\001\001\"\001\000\149@\145\160\160cc\160\160\001\001#\001\000\133@\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\145\160\160jj\160\160\001\001*\001\000\161@\145\160\160kk\160\160\001\001+\001\000\140@\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\145\160\160nn\160\160\001\001.\001\000\189@\145\160\160oo\160\160\001\001/\001\000\212@\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\145\160\160vv\160\160\001\0016\001\000\232@\145\160\160ww\160\160\001\0017\001\000\233@\144\160xx\144\160yy\144\160zz\145\160\160{{\160\160\001\001;\001\000\234@\145\160\160||\160\160\001%<\001\000\197\160\160\001\001<\001\000\235@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\145\160\160\000A\000A\160\160\001\001A\001\000\173@\145\160\160\000B\000B\160\160\001\001B\001\000\136@\145\160\160\000C\000C\160\160\001\001C\001\000\227@\145\160\160\000D\000D\160\160\001\001D\001\000\231@\145\160\160\000E\000E\160\160\001\001E\001\000\238@\145\160\160\000F\000F\160\160\001\001F\001\000\236@\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\145\160\160\000L\000L\160\160\001\001L\001\000\226@\145\160\160\000M\000M\160\160\001\001M\001\000\147@\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001%T\001\000\201@\144\160\000U\000U\145\160\160\000V\000V\160\160\001\001V\001\000\138@\145\160\160\000W\000W\160\160\001\001W\001\000\139\160\160\001%W\001\000\187@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001\001Z\001\000\151\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001\001[\001\000\152@\144\160\000\\\000\\\145\160\160\000]\000]\160\160\001%]\001\000\188@\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\001`\001\000\190\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001\001a\001\000\213@\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001\001j\001\000\199@\145\160\160\000k\000k\160\160\001\001k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\145\160\160\000r\000r\160\160\001\001r\001\000\198@\145\160\160\000s\000s\160\160\001\001s\001\000\214@\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\141@\145\160\160\000z\000z\160\160\001\001z\001\000\165@\145\160\160\000{\000{\160\160\001\001{\001\000\163@\145\160\160\000|\000|\160\160\001\001|\001\000\164@\145\160\160\000}\000}\160\160\001\001}\001\000\207@\145\160\160\000~\000~\160\160\001\001~\001\000\216@\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@@\144\160\001\000\162\001\000\150\144\160\001\000\163\001\000\156\144\160\001\000\164\001\000\159@\144\160\001\000\166\001\000\167\144\160\001\000\167\001\000\245@\144\160\001\000\169\001\000\168@\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170\144\160\001\000\173\001\000\240\144\160\001\000\174\001\000\169@\144\160\001\000\176\001\000\248\144\160\001\000\177\001\000\241\144\160\001\000\178\001\000\253\144\160\001\000\179\001\000\252@\144\160\001\000\181\001\000\230\144\160\001\000\182\001\000\244\144\160\001\000\183\001\000\250@\144\160\001\000\185\001\000\251@\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171\144\160\001\000\190\001\000\243@@@@@\144\160\001\000\196\001\000\142\144\160\001\000\197\001\000\143\144\160\001\000\198\001\000\146@@\144\160\001\000\201\001\000\144@@@@@@@@@\144\160\001\000\211\001\000\224@\144\160\001\000\213\001\000\229\144\160\001\000\214\001\000\153\144\160\001\000\215\001\000\158\144\160\001\000\216\001\000\157@@@\144\160\001\000\220\001\000\154@@\144\160\001\000\223\001\000\225@@@@\144\160\001\000\228\001\000\132\144\160\001\000\229\001\000\134\144\160\001\000\230\001\000\145@@\144\160\001\000\233\001\000\130@@@@@@@@@\144\160\001\000\243\001\000\162@\144\160\001\000\245\001\000\228\144\160\001\000\246\001\000\148\144\160\001\000\247\001\000\246\144\160\001\000\248\001\000\155@@@\144\160\001\000\252\001\000\129@@@" 0 : Netmappings.from_uni_list array);;
+ let cp850_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\228\001\000\224\001\000\229\001\000\231\001\000\234\001\000\235\001\000\232\001\000\239\001\000\238\001\000\236\001\000\196\001\000\197\001\000\201\001\000\230\001\000\198\001\000\244\001\000\246\001\000\242\001\000\251\001\000\249\001\000\255\001\000\214\001\000\220\001\000\248\001\000\163\001\000\216\001\000\215\001\001\146\001\000\225\001\000\237\001\000\243\001\000\250\001\000\241\001\000\209\001\000\170\001\000\186\001\000\191\001\000\174\001\000\172\001\000\189\001\000\188\001\000\161\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001\000\193\001\000\194\001\000\192\001\000\169\001%c\001%Q\001%W\001%]\001\000\162\001\000\165\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\000\227\001\000\195\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\000\164\001\000\240\001\000\208\001\000\202\001\000\203\001\000\200\001\0011\001\000\205\001\000\206\001\000\207\001%\024\001%\012\001%\136\001%\132\001\000\166\001\000\204\001%\128\001\000\211\001\000\223\001\000\212\001\000\210\001\000\245\001\000\213\001\000\181\001\000\254\001\000\222\001\000\218\001\000\219\001\000\217\001\000\253\001\000\221\001\000\175\001\000\180\001\000\173\001\000\177\001 \023\001\000\190\001\000\182\001\000\167\001\000\247\001\000\184\001\000\176\001\000\168\001\000\183\001\000\185\001\000\179\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp850_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\211\000\000\000\000\000\000\006i\000\000\006i\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\145\160\160WW\160\160\001 \023\001\000\242@\145\160\160XX\160\160\001%\024\001\000\217@\144\160YY\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\145\160\160qq\160\160\001\0011\001\000\213@\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001%T\001\000\201@\144\160\000U\000U\144\160\000V\000V\145\160\160\000W\000W\160\160\001%W\001\000\187@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\144\160\000[\000[\144\160\000\\\000\\\145\160\160\000]\000]\160\160\001%]\001\000\188@\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001%`\001\000\204@\144\160\000a\000a\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@@@@@@\144\160\001%\145\001\000\176\145\160\160\001\001\146\001\000\159\160\160\001%\146\001\000\177@\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\000\161\001\000\173\144\160\001\000\162\001\000\189\144\160\001\000\163\001\000\156\144\160\001\000\164\001\000\207\144\160\001\000\165\001\000\190\144\160\001\000\166\001\000\221\144\160\001\000\167\001\000\245\144\160\001\000\168\001\000\249\144\160\001\000\169\001\000\184\144\160\001\000\170\001\000\166\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170\144\160\001\000\173\001\000\240\144\160\001\000\174\001\000\169\144\160\001\000\175\001\000\238\144\160\001\000\176\001\000\248\144\160\001\000\177\001\000\241\144\160\001\000\178\001\000\253\144\160\001\000\179\001\000\252\144\160\001\000\180\001\000\239\144\160\001\000\181\001\000\230\144\160\001\000\182\001\000\244\144\160\001\000\183\001\000\250\144\160\001\000\184\001\000\247\144\160\001\000\185\001\000\251\144\160\001\000\186\001\000\167\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171\144\160\001\000\190\001\000\243\144\160\001\000\191\001\000\168\144\160\001\000\192\001\000\183\144\160\001\000\193\001\000\181\144\160\001\000\194\001\000\182\144\160\001\000\195\001\000\199\144\160\001\000\196\001\000\142\144\160\001\000\197\001\000\143\144\160\001\000\198\001\000\146\144\160\001\000\199\001\000\128\144\160\001\000\200\001\000\212\144\160\001\000\201\001\000\144\144\160\001\000\202\001\000\210\144\160\001\000\203\001\000\211\144\160\001\000\204\001\000\222\144\160\001\000\205\001\000\214\144\160\001\000\206\001\000\215\144\160\001\000\207\001\000\216\144\160\001\000\208\001\000\209\144\160\001\000\209\001\000\165\144\160\001\000\210\001\000\227\144\160\001\000\211\001\000\224\144\160\001\000\212\001\000\226\144\160\001\000\213\001\000\229\144\160\001\000\214\001\000\153\144\160\001\000\215\001\000\158\144\160\001\000\216\001\000\157\144\160\001\000\217\001\000\235\144\160\001\000\218\001\000\233\144\160\001\000\219\001\000\234\144\160\001\000\220\001\000\154\144\160\001\000\221\001\000\237\144\160\001\000\222\001\000\232\144\160\001\000\223\001\000\225\144\160\001\000\224\001\000\133\144\160\001\000\225\001\000\160\144\160\001\000\226\001\000\131\144\160\001\000\227\001\000\198\144\160\001\000\228\001\000\132\144\160\001\000\229\001\000\134\144\160\001\000\230\001\000\145\144\160\001\000\231\001\000\135\144\160\001\000\232\001\000\138\144\160\001\000\233\001\000\130\144\160\001\000\234\001\000\136\144\160\001\000\235\001\000\137\144\160\001\000\236\001\000\141\144\160\001\000\237\001\000\161\144\160\001\000\238\001\000\140\144\160\001\000\239\001\000\139\144\160\001\000\240\001\000\208\144\160\001\000\241\001\000\164\144\160\001\000\242\001\000\149\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147\144\160\001\000\245\001\000\228\144\160\001\000\246\001\000\148\144\160\001\000\247\001\000\246\144\160\001\000\248\001\000\155\144\160\001\000\249\001\000\151\144\160\001\000\250\001\000\163\144\160\001\000\251\001\000\150\144\160\001\000\252\001\000\129\144\160\001\000\253\001\000\236\144\160\001\000\254\001\000\231\144\160\001\000\255\001\000\152" 0 : Netmappings.from_uni_list array);;
+ let cp852_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\228\001\001o\001\001\007\001\000\231\001\001B\001\000\235\001\001P\001\001Q\001\000\238\001\001y\001\000\196\001\001\006\001\000\201\001\0019\001\001:\001\000\244\001\000\246\001\001=\001\001>\001\001Z\001\001[\001\000\214\001\000\220\001\001d\001\001e\001\001A\001\000\215\001\001\013\001\000\225\001\000\237\001\000\243\001\000\250\001\001\004\001\001\005\001\001}\001\001~\001\001\024\001\001\025\001\000\172\001\001z\001\001\012\001\001_\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001\000\193\001\000\194\001\001\026\001\001^\001%c\001%Q\001%W\001%]\001\001{\001\001|\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\001\002\001\001\003\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\000\164\001\001\017\001\001\016\001\001\014\001\000\203\001\001\015\001\001G\001\000\205\001\000\206\001\001\027\001%\024\001%\012\001%\136\001%\132\001\001b\001\001n\001%\128\001\000\211\001\000\223\001\000\212\001\001C\001\001D\001\001H\001\001`\001\001a\001\001T\001\000\218\001\001U\001\001p\001\000\253\001\000\221\001\001c\001\000\180\001\000\173\001\002\221\001\002\219\001\002\199\001\002\216\001\000\167\001\000\247\001\000\184\001\000\176\001\000\168\001\002\217\001\001q\001\001X\001\001Y\001%\160\001\000\160" 0 : int array);;
+let cp852_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007X\000\000\000\000\000\000\007\023\000\000\007\023\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179\160\160\001\001\002\001\000\198@\145\160\160CC\160\160\001\001\003\001\000\199@\145\160\160DD\160\160\001\001\004\001\000\164@\145\160\160EE\160\160\001\001\005\001\000\165@\145\160\160FF\160\160\001\001\006\001\000\143@\145\160\160GG\160\160\001\001\007\001\000\134@\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\172\160\160\001%\012\001\000\218@\145\160\160MM\160\160\001\001\013\001\000\159@\145\160\160NN\160\160\001\001\014\001\000\210@\145\160\160OO\160\160\001\001\015\001\000\212@\145\160\160PP\160\160\001%\016\001\000\191\160\160\001\001\016\001\000\209@\145\160\160QQ\160\160\001\001\017\001\000\208@\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001\001\024\001\000\168\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\001\025\001\000\169@\145\160\160ZZ\160\160\001\001\026\001\000\183@\145\160\160[[\160\160\001\001\027\001\000\216@\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001\0019\001\000\145@\145\160\160zz\160\160\001\001:\001\000\146@\144\160{{\145\160\160||\160\160\001%<\001\000\197@\145\160\160}}\160\160\001\001=\001\000\149@\145\160\160~~\160\160\001\001>\001\000\150@\144\160\127\127\144\160\000@\000@\145\160\160\000A\000A\160\160\001\001A\001\000\157@\145\160\160\000B\000B\160\160\001\001B\001\000\136@\145\160\160\000C\000C\160\160\001\001C\001\000\227@\145\160\160\000D\000D\160\160\001\001D\001\000\228@\144\160\000E\000E\144\160\000F\000F\145\160\160\000G\000G\160\160\001\001G\001\000\213@\145\160\160\000H\000H\160\160\001\001H\001\000\229@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001\001P\001\000\138\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001\001Q\001\000\139\160\160\001%Q\001\000\186@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001%T\001\000\201\160\160\001\001T\001\000\232@\145\160\160\000U\000U\160\160\001\001U\001\000\234@\144\160\000V\000V\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001\001X\001\000\252@\145\160\160\000Y\000Y\160\160\001\001Y\001\000\253@\145\160\160\000Z\000Z\160\160\001\001Z\001\000\151\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001\001[\001\000\152@\144\160\000\\\000\\\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001\001^\001\000\184@\145\160\160\000_\000_\160\160\001\001_\001\000\173@\145\160\160\000`\000`\160\160\001%`\001\000\204\160\160\001\001`\001\000\230@\145\160\160\000a\000a\160\160\001\001a\001\000\231@\145\160\160\000b\000b\160\160\001\001b\001\000\221@\145\160\160\000c\000c\160\160\001%c\001\000\185\160\160\001\001c\001\000\238@\145\160\160\000d\000d\160\160\001\001d\001\000\155@\145\160\160\000e\000e\160\160\001\001e\001\000\156@\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\145\160\160\000n\000n\160\160\001\001n\001\000\222@\145\160\160\000o\000o\160\160\001\001o\001\000\133@\145\160\160\000p\000p\160\160\001\001p\001\000\235@\145\160\160\000q\000q\160\160\001\001q\001\000\251@\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\141@\145\160\160\000z\000z\160\160\001\001z\001\000\171@\145\160\160\000{\000{\160\160\001\001{\001\000\189@\145\160\160\000|\000|\160\160\001\001|\001\000\190@\145\160\160\000}\000}\160\160\001\001}\001\000\166@\145\160\160\000~\000~\160\160\001\001~\001\000\167@\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@@@@@@\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@@@@\144\160\001\000\164\001\000\207@@\144\160\001\000\167\001\000\245\144\160\001\000\168\001\000\249@@\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170\144\160\001\000\173\001\000\240@@\144\160\001\000\176\001\000\248@@@\144\160\001\000\180\001\000\239@@@\144\160\001\000\184\001\000\247@@\144\160\001\000\187\001\000\175@@@@@\144\160\001\000\193\001\000\181\144\160\001\000\194\001\000\182@\144\160\001\000\196\001\000\142@@\145\160\160\001\000\199\001\000\128\160\160\001\002\199\001\000\243@@\144\160\001\000\201\001\000\144@\144\160\001\000\203\001\000\211@\144\160\001\000\205\001\000\214\144\160\001\000\206\001\000\215@@@@\144\160\001\000\211\001\000\224\144\160\001\000\212\001\000\226@\144\160\001\000\214\001\000\153\144\160\001\000\215\001\000\158\144\160\001\002\216\001\000\244\144\160\001\002\217\001\000\250\144\160\001\000\218\001\000\233\144\160\001\002\219\001\000\242\144\160\001\000\220\001\000\154\145\160\160\001\000\221\001\000\237\160\160\001\002\221\001\000\241@@\144\160\001\000\223\001\000\225@\144\160\001\000\225\001\000\160\144\160\001\000\226\001\000\131@\144\160\001\000\228\001\000\132@@\144\160\001\000\231\001\000\135@\144\160\001\000\233\001\000\130@\144\160\001\000\235\001\000\137@\144\160\001\000\237\001\000\161\144\160\001\000\238\001\000\140@@@@\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147@\144\160\001\000\246\001\000\148\144\160\001\000\247\001\000\246@@\144\160\001\000\250\001\000\163@\144\160\001\000\252\001\000\129\144\160\001\000\253\001\000\236@@" 0 : Netmappings.from_uni_list array);;
+ let cp855_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\004R\001\004\002\001\004S\001\004\003\001\004Q\001\004\001\001\004T\001\004\004\001\004U\001\004\005\001\004V\001\004\006\001\004W\001\004\007\001\004X\001\004\008\001\004Y\001\004\t\001\004Z\001\004\n\001\004[\001\004\011\001\004\\\001\004\012\001\004^\001\004\014\001\004_\001\004\015\001\004N\001\004.\001\004J\001\004*\001\0040\001\004\016\001\0041\001\004\017\001\004F\001\004&\001\0044\001\004\020\001\0045\001\004\021\001\004D\001\004$\001\0043\001\004\019\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001\004E\001\004%\001\0048\001\004\024\001%c\001%Q\001%W\001%]\001\0049\001\004\025\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\004:\001\004\026\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\000\164\001\004;\001\004\027\001\004<\001\004\028\001\004=\001\004\029\001\004>\001\004\030\001\004?\001%\024\001%\012\001%\136\001%\132\001\004\031\001\004O\001%\128\001\004/\001\004@\001\004 \001\004A\001\004!\001\004B\001\004\"\001\004C\001\004#\001\0046\001\004\022\001\0042\001\004\018\001\004L\001\004,\001!\022\001\000\173\001\004K\001\004+\001\0047\001\004\023\001\004H\001\004(\001\004M\001\004-\001\004I\001\004)\001\004G\001\004'\001\000\167\001%\160\001\000\160" 0 : int array);;
+let cp855_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\195\000\000\000\000\000\000\007\164\000\000\007\164\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\145\160\160AA\160\160\001\004\001\001\000\133@\145\160\160BB\160\160\001\004\002\001\000\129\160\160\001%\002\001\000\179@\145\160\160CC\160\160\001\004\003\001\000\131@\145\160\160DD\160\160\001\004\004\001\000\135@\145\160\160EE\160\160\001\004\005\001\000\137@\145\160\160FF\160\160\001\004\006\001\000\139@\145\160\160GG\160\160\001\004\007\001\000\141@\145\160\160HH\160\160\001\004\008\001\000\143@\145\160\160II\160\160\001\004\t\001\000\145@\145\160\160JJ\160\160\001\004\n\001\000\147@\145\160\160KK\160\160\001\004\011\001\000\149@\145\160\160LL\160\160\001\004\012\001\000\151\160\160\001%\012\001\000\218@\144\160MM\145\160\160NN\160\160\001\004\014\001\000\153@\145\160\160OO\160\160\001\004\015\001\000\155@\145\160\160PP\160\160\001\004\016\001\000\161\160\160\001%\016\001\000\191@\145\160\160QQ\160\160\001\004\017\001\000\163@\145\160\160RR\160\160\001\004\018\001\000\236@\145\160\160SS\160\160\001\004\019\001\000\173@\145\160\160TT\160\160\001\004\020\001\000\167\160\160\001%\020\001\000\192@\145\160\160UU\160\160\001\004\021\001\000\169@\145\160\160VV\160\160\001\004\022\001\000\234\160\160\001!\022\001\000\239@\145\160\160WW\160\160\001\004\023\001\000\244@\145\160\160XX\160\160\001\004\024\001\000\184\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\004\025\001\000\190@\145\160\160ZZ\160\160\001\004\026\001\000\199@\145\160\160[[\160\160\001\004\027\001\000\209@\145\160\160\\\\\160\160\001%\028\001\000\195\160\160\001\004\028\001\000\211@\145\160\160]]\160\160\001\004\029\001\000\213@\145\160\160^^\160\160\001\004\030\001\000\215@\145\160\160__\160\160\001\004\031\001\000\221@\145\160\160``\160\160\001\004 \001\000\226@\145\160\160aa\160\160\001\004!\001\000\228@\145\160\160bb\160\160\001\004\"\001\000\230@\145\160\160cc\160\160\001\004#\001\000\232@\145\160\160dd\160\160\001\004$\001\000\171\160\160\001%$\001\000\180@\145\160\160ee\160\160\001\004%\001\000\182@\145\160\160ff\160\160\001\004&\001\000\165@\145\160\160gg\160\160\001\004'\001\000\252@\145\160\160hh\160\160\001\004(\001\000\246@\145\160\160ii\160\160\001\004)\001\000\250@\145\160\160jj\160\160\001\004*\001\000\159@\145\160\160kk\160\160\001\004+\001\000\242@\145\160\160ll\160\160\001%,\001\000\194\160\160\001\004,\001\000\238@\145\160\160mm\160\160\001\004-\001\000\248@\145\160\160nn\160\160\001\004.\001\000\157@\145\160\160oo\160\160\001\004/\001\000\224@\145\160\160pp\160\160\001\0040\001\000\160@\145\160\160qq\160\160\001\0041\001\000\162@\145\160\160rr\160\160\001\0042\001\000\235@\145\160\160ss\160\160\001\0043\001\000\172@\145\160\160tt\160\160\001\0044\001\000\166\160\160\001%4\001\000\193@\145\160\160uu\160\160\001\0045\001\000\168@\145\160\160vv\160\160\001\0046\001\000\233@\145\160\160ww\160\160\001\0047\001\000\243@\145\160\160xx\160\160\001\0048\001\000\183@\145\160\160yy\160\160\001\0049\001\000\189@\145\160\160zz\160\160\001\004:\001\000\198@\145\160\160{{\160\160\001\004;\001\000\208@\145\160\160||\160\160\001%<\001\000\197\160\160\001\004<\001\000\210@\145\160\160}}\160\160\001\004=\001\000\212@\145\160\160~~\160\160\001\004>\001\000\214@\145\160\160\127\127\160\160\001\004?\001\000\216@\145\160\160\000@\000@\160\160\001\004@\001\000\225@\145\160\160\000A\000A\160\160\001\004A\001\000\227@\145\160\160\000B\000B\160\160\001\004B\001\000\229@\145\160\160\000C\000C\160\160\001\004C\001\000\231@\145\160\160\000D\000D\160\160\001\004D\001\000\170@\145\160\160\000E\000E\160\160\001\004E\001\000\181@\145\160\160\000F\000F\160\160\001\004F\001\000\164@\145\160\160\000G\000G\160\160\001\004G\001\000\251@\145\160\160\000H\000H\160\160\001\004H\001\000\245@\145\160\160\000I\000I\160\160\001\004I\001\000\249@\145\160\160\000J\000J\160\160\001\004J\001\000\158@\145\160\160\000K\000K\160\160\001\004K\001\000\241@\145\160\160\000L\000L\160\160\001\004L\001\000\237@\145\160\160\000M\000M\160\160\001\004M\001\000\247@\145\160\160\000N\000N\160\160\001\004N\001\000\156@\145\160\160\000O\000O\160\160\001\004O\001\000\222@\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001\004Q\001\000\132\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001\004R\001\000\128@\145\160\160\000S\000S\160\160\001\004S\001\000\130@\145\160\160\000T\000T\160\160\001\004T\001\000\134\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001\004U\001\000\136@\145\160\160\000V\000V\160\160\001\004V\001\000\138@\145\160\160\000W\000W\160\160\001\004W\001\000\140\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001\004X\001\000\142@\145\160\160\000Y\000Y\160\160\001\004Y\001\000\144@\145\160\160\000Z\000Z\160\160\001\004Z\001\000\146\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001\004[\001\000\148@\145\160\160\000\\\000\\\160\160\001\004\\\001\000\150@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001\004^\001\000\152@\145\160\160\000_\000_\160\160\001\004_\001\000\154@\145\160\160\000`\000`\160\160\001%`\001\000\204@\144\160\000a\000a\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@@@@@@\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@@@@\144\160\001\000\164\001\000\207@@\144\160\001\000\167\001\000\253@@@\144\160\001\000\171\001\000\174@\144\160\001\000\173\001\000\240@@@@@@@@@@@@@\144\160\001\000\187\001\000\175@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let cp856_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\028\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\005\208\001\005\209\001\005\210\001\005\211\001\005\212\001\005\213\001\005\214\001\005\215\001\005\216\001\005\217\001\005\218\001\005\219\001\005\220\001\005\221\001\005\222\001\005\223\001\005\224\001\005\225\001\005\226\001\005\227\001\005\228\001\005\229\001\005\230\001\005\231\001\005\232\001\005\233\001\005\234\000\255\001\000\163\000\255\001\000\215\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\000\174\001\000\172\001\000\189\001\000\188\000\255\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\000\255\000\255\000\255\001\000\169\001%c\001%Q\001%W\001%]\001\000\162\001\000\165\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\000\255\000\255\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\000\164\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001%\024\001%\012\001%\136\001%\132\001\000\166\000\255\001%\128\000\255\000\255\000\255\000\255\000\255\000\255\001\000\181\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\000\175\001\000\180\001\000\173\001\000\177\001 \023\001\000\190\001\000\182\001\000\167\001\000\247\001\000\184\001\000\176\001\000\168\001\000\183\001\000\185\001\000\179\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp856_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\005\177\000\000\000\000\000\000\005\152\000\000\005\152\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\145\160\160WW\160\160\001 \023\001\000\242@\145\160\160XX\160\160\001%\024\001\000\217@\144\160YY\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001%T\001\000\201@\144\160\000U\000U\144\160\000V\000V\145\160\160\000W\000W\160\160\001%W\001\000\187@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\144\160\000[\000[\144\160\000\\\000\\\145\160\160\000]\000]\160\160\001%]\001\000\188@\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001%`\001\000\204@\144\160\000a\000a\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@@@@@@\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@@\144\160\001\000\162\001\000\189\144\160\001\000\163\001\000\156\144\160\001\000\164\001\000\207\144\160\001\000\165\001\000\190\144\160\001\000\166\001\000\221\144\160\001\000\167\001\000\245\144\160\001\000\168\001\000\249\144\160\001\000\169\001\000\184@\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170\144\160\001\000\173\001\000\240\144\160\001\000\174\001\000\169\144\160\001\000\175\001\000\238\144\160\001\000\176\001\000\248\144\160\001\000\177\001\000\241\144\160\001\000\178\001\000\253\144\160\001\000\179\001\000\252\144\160\001\000\180\001\000\239\144\160\001\000\181\001\000\230\144\160\001\000\182\001\000\244\144\160\001\000\183\001\000\250\144\160\001\000\184\001\000\247\144\160\001\000\185\001\000\251@\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171\144\160\001\000\190\001\000\243@@@@@@@@@@@@@@@@@\144\160\001\005\208\001\000\128\144\160\001\005\209\001\000\129\144\160\001\005\210\001\000\130\144\160\001\005\211\001\000\131\144\160\001\005\212\001\000\132\144\160\001\005\213\001\000\133\144\160\001\005\214\001\000\134\145\160\160\001\005\215\001\000\135\160\160\001\000\215\001\000\158@\144\160\001\005\216\001\000\136\144\160\001\005\217\001\000\137\144\160\001\005\218\001\000\138\144\160\001\005\219\001\000\139\144\160\001\005\220\001\000\140\144\160\001\005\221\001\000\141\144\160\001\005\222\001\000\142\144\160\001\005\223\001\000\143\144\160\001\005\224\001\000\144\144\160\001\005\225\001\000\145\144\160\001\005\226\001\000\146\144\160\001\005\227\001\000\147\144\160\001\005\228\001\000\148\144\160\001\005\229\001\000\149\144\160\001\005\230\001\000\150\144\160\001\005\231\001\000\151\144\160\001\005\232\001\000\152\144\160\001\005\233\001\000\153\144\160\001\005\234\001\000\154@@@@@@@@@@@@\144\160\001\000\247\001\000\246@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let cp857_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002B\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\228\001\000\224\001\000\229\001\000\231\001\000\234\001\000\235\001\000\232\001\000\239\001\000\238\001\0011\001\000\196\001\000\197\001\000\201\001\000\230\001\000\198\001\000\244\001\000\246\001\000\242\001\000\251\001\000\249\001\0010\001\000\214\001\000\220\001\000\248\001\000\163\001\000\216\001\001^\001\001_\001\000\225\001\000\237\001\000\243\001\000\250\001\000\241\001\000\209\001\001\030\001\001\031\001\000\191\001\000\174\001\000\172\001\000\189\001\000\188\001\000\161\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001\000\193\001\000\194\001\000\192\001\000\169\001%c\001%Q\001%W\001%]\001\000\162\001\000\165\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\000\227\001\000\195\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\000\164\001\000\186\001\000\170\001\000\202\001\000\203\001\000\200\000\255\001\000\205\001\000\206\001\000\207\001%\024\001%\012\001%\136\001%\132\001\000\166\001\000\204\001%\128\001\000\211\001\000\223\001\000\212\001\000\210\001\000\245\001\000\213\001\000\181\000\255\001\000\215\001\000\218\001\000\219\001\000\217\001\000\236\001\000\255\001\000\175\001\000\180\001\000\173\001\000\177\000\255\001\000\190\001\000\182\001\000\167\001\000\247\001\000\184\001\000\176\001\000\168\001\000\183\001\000\185\001\000\179\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp857_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\199\000\000\000\000\000\000\006f\000\000\006f\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\144\160YY\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\001\030\001\000\166@\145\160\160__\160\160\001\001\031\001\000\167@\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001\0010\001\000\152@\145\160\160qq\160\160\001\0011\001\000\141@\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001%T\001\000\201@\144\160\000U\000U\144\160\000V\000V\145\160\160\000W\000W\160\160\001%W\001\000\187@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\144\160\000[\000[\144\160\000\\\000\\\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001\001^\001\000\158@\145\160\160\000_\000_\160\160\001\001_\001\000\159@\145\160\160\000`\000`\160\160\001%`\001\000\204@\144\160\000a\000a\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@@@@@@\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\000\161\001\000\173\144\160\001\000\162\001\000\189\144\160\001\000\163\001\000\156\144\160\001\000\164\001\000\207\144\160\001\000\165\001\000\190\144\160\001\000\166\001\000\221\144\160\001\000\167\001\000\245\144\160\001\000\168\001\000\249\144\160\001\000\169\001\000\184\144\160\001\000\170\001\000\209\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170\144\160\001\000\173\001\000\240\144\160\001\000\174\001\000\169\144\160\001\000\175\001\000\238\144\160\001\000\176\001\000\248\144\160\001\000\177\001\000\241\144\160\001\000\178\001\000\253\144\160\001\000\179\001\000\252\144\160\001\000\180\001\000\239\144\160\001\000\181\001\000\230\144\160\001\000\182\001\000\244\144\160\001\000\183\001\000\250\144\160\001\000\184\001\000\247\144\160\001\000\185\001\000\251\144\160\001\000\186\001\000\208\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171\144\160\001\000\190\001\000\243\144\160\001\000\191\001\000\168\144\160\001\000\192\001\000\183\144\160\001\000\193\001\000\181\144\160\001\000\194\001\000\182\144\160\001\000\195\001\000\199\144\160\001\000\196\001\000\142\144\160\001\000\197\001\000\143\144\160\001\000\198\001\000\146\144\160\001\000\199\001\000\128\144\160\001\000\200\001\000\212\144\160\001\000\201\001\000\144\144\160\001\000\202\001\000\210\144\160\001\000\203\001\000\211\144\160\001\000\204\001\000\222\144\160\001\000\205\001\000\214\144\160\001\000\206\001\000\215\144\160\001\000\207\001\000\216@\144\160\001\000\209\001\000\165\144\160\001\000\210\001\000\227\144\160\001\000\211\001\000\224\144\160\001\000\212\001\000\226\144\160\001\000\213\001\000\229\144\160\001\000\214\001\000\153\144\160\001\000\215\001\000\232\144\160\001\000\216\001\000\157\144\160\001\000\217\001\000\235\144\160\001\000\218\001\000\233\144\160\001\000\219\001\000\234\144\160\001\000\220\001\000\154@@\144\160\001\000\223\001\000\225\144\160\001\000\224\001\000\133\144\160\001\000\225\001\000\160\144\160\001\000\226\001\000\131\144\160\001\000\227\001\000\198\144\160\001\000\228\001\000\132\144\160\001\000\229\001\000\134\144\160\001\000\230\001\000\145\144\160\001\000\231\001\000\135\144\160\001\000\232\001\000\138\144\160\001\000\233\001\000\130\144\160\001\000\234\001\000\136\144\160\001\000\235\001\000\137\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\161\144\160\001\000\238\001\000\140\144\160\001\000\239\001\000\139@\144\160\001\000\241\001\000\164\144\160\001\000\242\001\000\149\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147\144\160\001\000\245\001\000\228\144\160\001\000\246\001\000\148\144\160\001\000\247\001\000\246\144\160\001\000\248\001\000\155\144\160\001\000\249\001\000\151\144\160\001\000\250\001\000\163\144\160\001\000\251\001\000\150\144\160\001\000\252\001\000\129@@\144\160\001\000\255\001\000\237" 0 : Netmappings.from_uni_list array);;
+ let cp860_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\227\001\000\224\001\000\193\001\000\231\001\000\234\001\000\202\001\000\232\001\000\205\001\000\212\001\000\236\001\000\195\001\000\194\001\000\201\001\000\192\001\000\200\001\000\244\001\000\245\001\000\242\001\000\218\001\000\249\001\000\204\001\000\213\001\000\220\001\000\162\001\000\163\001\000\217\001 \167\001\000\211\001\000\225\001\000\237\001\000\243\001\000\250\001\000\241\001\000\209\001\000\170\001\000\186\001\000\191\001\000\210\001\000\172\001\000\189\001\000\188\001\000\161\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp860_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007-\000\000\000\000\000\000\006\224\000\000\006\224\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\247@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\145\160\160\000\127\000\127\160\160\001 \127\001\000\252@\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\145\160\160\001%\147\001\000\178\160\160\001\003\147\001\000\226@@@@@\144\160\001\003\152\001\000\233@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\000\161\001\000\173\144\160\001\000\162\001\000\155\145\160\160\001\000\163\001\000\156\160\160\001\003\163\001\000\228@@@\144\160\001\003\166\001\000\232\144\160\001 \167\001\000\158@\144\160\001\003\169\001\000\234\144\160\001\000\170\001\000\166\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170@@@\144\160\001\000\176\001\000\248\145\160\160\001\003\177\001\000\224\160\160\001\000\177\001\000\241@\144\160\001\000\178\001\000\253@\144\160\001\003\180\001\000\235\145\160\160\001\000\181\001\000\230\160\160\001\003\181\001\000\238@@\144\160\001\000\183\001\000\250@@\144\160\001\000\186\001\000\167\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171@\144\160\001\000\191\001\000\168\145\160\160\001\000\192\001\000\145\160\160\001\003\192\001\000\227@\144\160\001\000\193\001\000\134\144\160\001\000\194\001\000\143\145\160\160\001\000\195\001\000\142\160\160\001\003\195\001\000\229@\144\160\001\003\196\001\000\231@\144\160\001\003\198\001\000\237\144\160\001\000\199\001\000\128\144\160\001\000\200\001\000\146\144\160\001\000\201\001\000\144\144\160\001\000\202\001\000\137@\144\160\001\000\204\001\000\152\144\160\001\000\205\001\000\139@@@\144\160\001\000\209\001\000\165\144\160\001\000\210\001\000\169\144\160\001\000\211\001\000\159\144\160\001\000\212\001\000\140\144\160\001\000\213\001\000\153@@@\144\160\001\000\217\001\000\157\144\160\001\000\218\001\000\150@\144\160\001\000\220\001\000\154@@\144\160\001\000\223\001\000\225\144\160\001\000\224\001\000\133\144\160\001\000\225\001\000\160\144\160\001\000\226\001\000\131\144\160\001\000\227\001\000\132@@@\144\160\001\000\231\001\000\135\144\160\001\000\232\001\000\138\144\160\001\000\233\001\000\130\144\160\001\000\234\001\000\136@\144\160\001\000\236\001\000\141\144\160\001\000\237\001\000\161@@@\144\160\001\000\241\001\000\164\144\160\001\000\242\001\000\149\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147\144\160\001\000\245\001\000\148@\144\160\001\000\247\001\000\246@\144\160\001\000\249\001\000\151\144\160\001\000\250\001\000\163@\144\160\001\000\252\001\000\129@@@" 0 : Netmappings.from_uni_list array);;
+ let cp861_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\228\001\000\224\001\000\229\001\000\231\001\000\234\001\000\235\001\000\232\001\000\208\001\000\240\001\000\222\001\000\196\001\000\197\001\000\201\001\000\230\001\000\198\001\000\244\001\000\246\001\000\254\001\000\251\001\000\221\001\000\253\001\000\214\001\000\220\001\000\248\001\000\163\001\000\216\001 \167\001\001\146\001\000\225\001\000\237\001\000\243\001\000\250\001\000\193\001\000\205\001\000\211\001\000\218\001\000\191\001#\016\001\000\172\001\000\189\001\000\188\001\000\161\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp861_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0071\000\000\000\000\000\000\006\229\000\000\006\229\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001#\016\001\000\169\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\247@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\145\160\160\000\127\000\127\160\160\001 \127\001\000\252@\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\145\160\160\001\001\146\001\000\159\160\160\001%\146\001\000\177@\145\160\160\001%\147\001\000\178\160\160\001\003\147\001\000\226@@@@@\144\160\001\003\152\001\000\233@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\000\161\001\000\173@\145\160\160\001\000\163\001\000\156\160\160\001\003\163\001\000\228@@@\144\160\001\003\166\001\000\232\144\160\001 \167\001\000\158@\144\160\001\003\169\001\000\234@\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170@@@\144\160\001\000\176\001\000\248\145\160\160\001\003\177\001\000\224\160\160\001\000\177\001\000\241@\144\160\001\000\178\001\000\253@\144\160\001\003\180\001\000\235\145\160\160\001\000\181\001\000\230\160\160\001\003\181\001\000\238@@\144\160\001\000\183\001\000\250@@@\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171@\144\160\001\000\191\001\000\168\144\160\001\003\192\001\000\227\144\160\001\000\193\001\000\164@\144\160\001\003\195\001\000\229\145\160\160\001\000\196\001\000\142\160\160\001\003\196\001\000\231@\144\160\001\000\197\001\000\143\145\160\160\001\000\198\001\000\146\160\160\001\003\198\001\000\237@\144\160\001\000\199\001\000\128@\144\160\001\000\201\001\000\144@@@\144\160\001\000\205\001\000\165@@\144\160\001\000\208\001\000\139@@\144\160\001\000\211\001\000\166@@\144\160\001\000\214\001\000\153@\144\160\001\000\216\001\000\157@\144\160\001\000\218\001\000\167@\144\160\001\000\220\001\000\154\144\160\001\000\221\001\000\151\144\160\001\000\222\001\000\141\144\160\001\000\223\001\000\225\144\160\001\000\224\001\000\133\144\160\001\000\225\001\000\160\144\160\001\000\226\001\000\131@\144\160\001\000\228\001\000\132\144\160\001\000\229\001\000\134\144\160\001\000\230\001\000\145\144\160\001\000\231\001\000\135\144\160\001\000\232\001\000\138\144\160\001\000\233\001\000\130\144\160\001\000\234\001\000\136\144\160\001\000\235\001\000\137@\144\160\001\000\237\001\000\161@@\144\160\001\000\240\001\000\140@@\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147@\144\160\001\000\246\001\000\148\144\160\001\000\247\001\000\246\144\160\001\000\248\001\000\155@\144\160\001\000\250\001\000\163\144\160\001\000\251\001\000\150\144\160\001\000\252\001\000\129\144\160\001\000\253\001\000\152\144\160\001\000\254\001\000\149@" 0 : Netmappings.from_uni_list array);;
+ let cp862_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\005\208\001\005\209\001\005\210\001\005\211\001\005\212\001\005\213\001\005\214\001\005\215\001\005\216\001\005\217\001\005\218\001\005\219\001\005\220\001\005\221\001\005\222\001\005\223\001\005\224\001\005\225\001\005\226\001\005\227\001\005\228\001\005\229\001\005\230\001\005\231\001\005\232\001\005\233\001\005\234\001\000\162\001\000\163\001\000\165\001 \167\001\001\146\001\000\225\001\000\237\001\000\243\001\000\250\001\000\241\001\000\209\001\000\170\001\000\186\001\000\191\001#\016\001\000\172\001\000\189\001\000\188\001\000\161\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp862_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0074\000\000\000\000\000\000\006\233\000\000\006\233\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001#\016\001\000\169\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\247@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\145\160\160\000\127\000\127\160\160\001 \127\001\000\252@\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\145\160\160\001\001\146\001\000\159\160\160\001%\146\001\000\177@\145\160\160\001%\147\001\000\178\160\160\001\003\147\001\000\226@@@@@\144\160\001\003\152\001\000\233@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\000\161\001\000\173\144\160\001\000\162\001\000\155\145\160\160\001\000\163\001\000\156\160\160\001\003\163\001\000\228@@\144\160\001\000\165\001\000\157\144\160\001\003\166\001\000\232\144\160\001 \167\001\000\158@\144\160\001\003\169\001\000\234\144\160\001\000\170\001\000\166\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170@@@\144\160\001\000\176\001\000\248\145\160\160\001\003\177\001\000\224\160\160\001\000\177\001\000\241@\144\160\001\000\178\001\000\253@\144\160\001\003\180\001\000\235\145\160\160\001\000\181\001\000\230\160\160\001\003\181\001\000\238@@\144\160\001\000\183\001\000\250@@\144\160\001\000\186\001\000\167\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171@\144\160\001\000\191\001\000\168\144\160\001\003\192\001\000\227@@\144\160\001\003\195\001\000\229\144\160\001\003\196\001\000\231@\144\160\001\003\198\001\000\237@@@@@@@@@\144\160\001\005\208\001\000\128\145\160\160\001\005\209\001\000\129\160\160\001\000\209\001\000\165@\144\160\001\005\210\001\000\130\144\160\001\005\211\001\000\131\144\160\001\005\212\001\000\132\144\160\001\005\213\001\000\133\144\160\001\005\214\001\000\134\144\160\001\005\215\001\000\135\144\160\001\005\216\001\000\136\144\160\001\005\217\001\000\137\144\160\001\005\218\001\000\138\144\160\001\005\219\001\000\139\144\160\001\005\220\001\000\140\144\160\001\005\221\001\000\141\144\160\001\005\222\001\000\142\145\160\160\001\005\223\001\000\143\160\160\001\000\223\001\000\225@\144\160\001\005\224\001\000\144\145\160\160\001\005\225\001\000\145\160\160\001\000\225\001\000\160@\144\160\001\005\226\001\000\146\144\160\001\005\227\001\000\147\144\160\001\005\228\001\000\148\144\160\001\005\229\001\000\149\144\160\001\005\230\001\000\150\144\160\001\005\231\001\000\151\144\160\001\005\232\001\000\152\144\160\001\005\233\001\000\153\144\160\001\005\234\001\000\154@@\144\160\001\000\237\001\000\161@@@\144\160\001\000\241\001\000\164@\144\160\001\000\243\001\000\162@@@\144\160\001\000\247\001\000\246@@\144\160\001\000\250\001\000\163@@@@@" 0 : Netmappings.from_uni_list array);;
+ let cp863_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\194\001\000\224\001\000\182\001\000\231\001\000\234\001\000\235\001\000\232\001\000\239\001\000\238\001 \023\001\000\192\001\000\167\001\000\201\001\000\200\001\000\202\001\000\244\001\000\203\001\000\207\001\000\251\001\000\249\001\000\164\001\000\212\001\000\220\001\000\162\001\000\163\001\000\217\001\000\219\001\001\146\001\000\166\001\000\180\001\000\243\001\000\250\001\000\168\001\000\184\001\000\179\001\000\175\001\000\206\001#\016\001\000\172\001\000\189\001\000\188\001\000\190\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp863_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0077\000\000\000\000\000\000\006\237\000\000\006\237\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001#\016\001\000\169\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\145\160\160WW\160\160\001 \023\001\000\141@\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\247@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\145\160\160\000\127\000\127\160\160\001 \127\001\000\252@\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\145\160\160\001\001\146\001\000\159\160\160\001%\146\001\000\177@\145\160\160\001%\147\001\000\178\160\160\001\003\147\001\000\226@@@@@\144\160\001\003\152\001\000\233@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@@\144\160\001\000\162\001\000\155\145\160\160\001\000\163\001\000\156\160\160\001\003\163\001\000\228@\144\160\001\000\164\001\000\152@\145\160\160\001\000\166\001\000\160\160\160\001\003\166\001\000\232@\144\160\001\000\167\001\000\143\144\160\001\000\168\001\000\164\144\160\001\003\169\001\000\234@\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170@@\144\160\001\000\175\001\000\167\144\160\001\000\176\001\000\248\145\160\160\001\003\177\001\000\224\160\160\001\000\177\001\000\241@\144\160\001\000\178\001\000\253\144\160\001\000\179\001\000\166\145\160\160\001\000\180\001\000\161\160\160\001\003\180\001\000\235@\145\160\160\001\000\181\001\000\230\160\160\001\003\181\001\000\238@\144\160\001\000\182\001\000\134\144\160\001\000\183\001\000\250\144\160\001\000\184\001\000\165@@\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171\144\160\001\000\190\001\000\173@\145\160\160\001\000\192\001\000\142\160\160\001\003\192\001\000\227@@\144\160\001\000\194\001\000\132\144\160\001\003\195\001\000\229\144\160\001\003\196\001\000\231@\144\160\001\003\198\001\000\237\144\160\001\000\199\001\000\128\144\160\001\000\200\001\000\145\144\160\001\000\201\001\000\144\144\160\001\000\202\001\000\146\144\160\001\000\203\001\000\148@@\144\160\001\000\206\001\000\168\144\160\001\000\207\001\000\149@@@@\144\160\001\000\212\001\000\153@@@@\144\160\001\000\217\001\000\157@\144\160\001\000\219\001\000\158\144\160\001\000\220\001\000\154@@\144\160\001\000\223\001\000\225\144\160\001\000\224\001\000\133@\144\160\001\000\226\001\000\131@@@@\144\160\001\000\231\001\000\135\144\160\001\000\232\001\000\138\144\160\001\000\233\001\000\130\144\160\001\000\234\001\000\136\144\160\001\000\235\001\000\137@@\144\160\001\000\238\001\000\140\144\160\001\000\239\001\000\139@@@\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147@@\144\160\001\000\247\001\000\246@\144\160\001\000\249\001\000\151\144\160\001\000\250\001\000\163\144\160\001\000\251\001\000\150\144\160\001\000\252\001\000\129@@@" 0 : Netmappings.from_uni_list array);;
+ let cp864_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\209\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcd\001\006jfghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\176\001\000\183\001\"\025\001\"\026\001%\146\001%\000\001%\002\001%<\001%$\001%,\001%\028\001%4\001%\016\001%\012\001%\020\001%\024\001\003\178\001\"\030\001\003\198\001\000\177\001\000\189\001\000\188\001\"H\001\000\171\001\000\187\002\000\000\254\247\002\000\000\254\248\000\255\000\255\002\000\000\254\251\002\000\000\254\252\000\255\001\000\160\001\000\173\002\000\000\254\130\001\000\163\001\000\164\002\000\000\254\132\000\255\000\255\002\000\000\254\142\002\000\000\254\143\002\000\000\254\149\002\000\000\254\153\001\006\012\002\000\000\254\157\002\000\000\254\161\002\000\000\254\165\001\006`\001\006a\001\006b\001\006c\001\006d\001\006e\001\006f\001\006g\001\006h\001\006i\002\000\000\254\209\001\006\027\002\000\000\254\177\002\000\000\254\181\002\000\000\254\185\001\006\031\001\000\162\002\000\000\254\128\002\000\000\254\129\002\000\000\254\131\002\000\000\254\133\002\000\000\254\202\002\000\000\254\139\002\000\000\254\141\002\000\000\254\145\002\000\000\254\147\002\000\000\254\151\002\000\000\254\155\002\000\000\254\159\002\000\000\254\163\002\000\000\254\167\002\000\000\254\169\002\000\000\254\171\002\000\000\254\173\002\000\000\254\175\002\000\000\254\179\002\000\000\254\183\002\000\000\254\187\002\000\000\254\191\002\000\000\254\193\002\000\000\254\197\002\000\000\254\203\002\000\000\254\207\001\000\166\001\000\172\001\000\247\001\000\215\002\000\000\254\201\001\006@\002\000\000\254\211\002\000\000\254\215\002\000\000\254\219\002\000\000\254\223\002\000\000\254\227\002\000\000\254\231\002\000\000\254\235\002\000\000\254\237\002\000\000\254\239\002\000\000\254\243\002\000\000\254\189\002\000\000\254\204\002\000\000\254\206\002\000\000\254\205\002\000\000\254\225\002\000\000\254}\001\006Q\002\000\000\254\229\002\000\000\254\233\002\000\000\254\236\002\000\000\254\240\002\000\000\254\242\002\000\000\254\208\002\000\000\254\213\002\000\000\254\245\002\000\000\254\246\002\000\000\254\221\002\000\000\254\217\002\000\000\254\241\001%\160\000\255" 0 : int array);;
+let cp864_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007i\000\000\000\000\000\000\006\136\000\000\006\136\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\133@\144\160AA\145\160\160BB\160\160\001%\002\001\000\134@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\141\160\160\001\006\012\001\000\172@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\140@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\142@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\143@\145\160\160YY\160\160\001\"\025\001\000\130@\145\160\160ZZ\160\160\001\"\026\001\000\131@\145\160\160[[\160\160\001\006\027\001\000\187@\145\160\160\\\\\160\160\001%\028\001\000\138@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\145@\145\160\160__\160\160\001\006\031\001\000\191@\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\136@@\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\137@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\139@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\135@\144\160}}\144\160~~\144\160\127\127\145\160\160\000@\000@\160\160\001\006@\001\000\224@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\150@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\145\160\160\000Q\000Q\160\160\001\006Q\001\000\241@\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\006`\001\000\176@\145\160\160\000a\000a\160\160\001\006a\001\000\177@\145\160\160\000b\000b\160\160\001\006b\001\000\178@\145\160\160\000c\000c\160\160\001\006c\001\000\179@\145\160\160\000d\000d\160\160\001\006d\001\000\180@\145\160\160\000e\000e\160\160\001\006e\001\000\181@\145\160\160\000f\000f\160\160\001\006f\001\000\182@\145\160\160\000g\000g\160\160\001\006g\001\000\183@\145\160\160\000h\000h\160\160\001\006h\001\000\184@\145\160\160\000i\000i\160\160\001\006i\001\000\185@\145\160\160\001\006je\160\160\000j\000j@\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\145\160\160\000}\000}\160\160\002\000\000\254}\001\000\240@\144\160\000~\000~\144\160\000\127\000\127\144\160\002\000\000\254\128\001\000\193\144\160\002\000\000\254\129\001\000\194\144\160\002\000\000\254\130\001\000\162\144\160\002\000\000\254\131\001\000\195\144\160\002\000\000\254\132\001\000\165\144\160\002\000\000\254\133\001\000\196@@@@@\144\160\002\000\000\254\139\001\000\198@\144\160\002\000\000\254\141\001\000\199\144\160\002\000\000\254\142\001\000\168\144\160\002\000\000\254\143\001\000\169@\144\160\002\000\000\254\145\001\000\200\144\160\001%\146\001\000\132\144\160\002\000\000\254\147\001\000\201@\144\160\002\000\000\254\149\001\000\170@\144\160\002\000\000\254\151\001\000\202@\144\160\002\000\000\254\153\001\000\171@\144\160\002\000\000\254\155\001\000\203@\144\160\002\000\000\254\157\001\000\173@\144\160\002\000\000\254\159\001\000\204\145\160\160\001\000\160\001\000\160\160\160\001%\160\001\000\254@\144\160\002\000\000\254\161\001\000\174\144\160\001\000\162\001\000\192\145\160\160\001\000\163\001\000\163\160\160\002\000\000\254\163\001\000\205@\144\160\001\000\164\001\000\164\144\160\002\000\000\254\165\001\000\175\144\160\001\000\166\001\000\219\144\160\002\000\000\254\167\001\000\206@\144\160\002\000\000\254\169\001\000\207@\145\160\160\001\000\171\001\000\151\160\160\002\000\000\254\171\001\000\208@\144\160\001\000\172\001\000\220\145\160\160\001\000\173\001\000\161\160\160\002\000\000\254\173\001\000\209@@\144\160\002\000\000\254\175\001\000\210\144\160\001\000\176\001\000\128\145\160\160\001\000\177\001\000\147\160\160\002\000\000\254\177\001\000\188@\144\160\001\003\178\001\000\144\144\160\002\000\000\254\179\001\000\211@\144\160\002\000\000\254\181\001\000\189@\145\160\160\001\000\183\001\000\129\160\160\002\000\000\254\183\001\000\212@@\144\160\002\000\000\254\185\001\000\190@\145\160\160\001\000\187\001\000\152\160\160\002\000\000\254\187\001\000\213@\144\160\001\000\188\001\000\149\145\160\160\001\000\189\001\000\148\160\160\002\000\000\254\189\001\000\235@@\144\160\002\000\000\254\191\001\000\214@\144\160\002\000\000\254\193\001\000\215@@@\144\160\002\000\000\254\197\001\000\216\144\160\001\003\198\001\000\146@@\144\160\002\000\000\254\201\001\000\223\144\160\002\000\000\254\202\001\000\197\144\160\002\000\000\254\203\001\000\217\144\160\002\000\000\254\204\001\000\236\144\160\002\000\000\254\205\001\000\238\144\160\002\000\000\254\206\001\000\237\144\160\002\000\000\254\207\001\000\218\144\160\002\000\000\254\208\001\000\247\144\160\002\000\000\254\209\001\000\186@\144\160\002\000\000\254\211\001\000\225@\144\160\002\000\000\254\213\001\000\248@\145\160\160\001\000\215\001\000\222\160\160\002\000\000\254\215\001\000\226@@\144\160\002\000\000\254\217\001\000\252@\144\160\002\000\000\254\219\001\000\227@\144\160\002\000\000\254\221\001\000\251@\144\160\002\000\000\254\223\001\000\228@\144\160\002\000\000\254\225\001\000\239@\144\160\002\000\000\254\227\001\000\229@\144\160\002\000\000\254\229\001\000\242@\144\160\002\000\000\254\231\001\000\230@\144\160\002\000\000\254\233\001\000\243@\144\160\002\000\000\254\235\001\000\231\144\160\002\000\000\254\236\001\000\244\144\160\002\000\000\254\237\001\000\232@\144\160\002\000\000\254\239\001\000\233\144\160\002\000\000\254\240\001\000\245\144\160\002\000\000\254\241\001\000\253\144\160\002\000\000\254\242\001\000\246\144\160\002\000\000\254\243\001\000\234@\144\160\002\000\000\254\245\001\000\249\144\160\002\000\000\254\246\001\000\250\145\160\160\002\000\000\254\247\001\000\153\160\160\001\000\247\001\000\221@\144\160\002\000\000\254\248\001\000\154@@\144\160\002\000\000\254\251\001\000\157\144\160\002\000\000\254\252\001\000\158@@@" 0 : Netmappings.from_uni_list array);;
+ let cp865_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\228\001\000\224\001\000\229\001\000\231\001\000\234\001\000\235\001\000\232\001\000\239\001\000\238\001\000\236\001\000\196\001\000\197\001\000\201\001\000\230\001\000\198\001\000\244\001\000\246\001\000\242\001\000\251\001\000\249\001\000\255\001\000\214\001\000\220\001\000\248\001\000\163\001\000\216\001 \167\001\001\146\001\000\225\001\000\237\001\000\243\001\000\250\001\000\241\001\000\209\001\000\170\001\000\186\001\000\191\001#\016\001\000\172\001\000\189\001\000\188\001\000\161\001\000\171\001\000\164\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp865_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0071\000\000\000\000\000\000\006\229\000\000\006\229\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001#\016\001\000\169\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\247@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\145\160\160\000\127\000\127\160\160\001 \127\001\000\252@\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\145\160\160\001\001\146\001\000\159\160\160\001%\146\001\000\177@\145\160\160\001%\147\001\000\178\160\160\001\003\147\001\000\226@@@@@\144\160\001\003\152\001\000\233@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\000\161\001\000\173@\145\160\160\001\000\163\001\000\156\160\160\001\003\163\001\000\228@\144\160\001\000\164\001\000\175@\144\160\001\003\166\001\000\232\144\160\001 \167\001\000\158@\144\160\001\003\169\001\000\234\144\160\001\000\170\001\000\166\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170@@@\144\160\001\000\176\001\000\248\145\160\160\001\003\177\001\000\224\160\160\001\000\177\001\000\241@\144\160\001\000\178\001\000\253@\144\160\001\003\180\001\000\235\145\160\160\001\000\181\001\000\230\160\160\001\003\181\001\000\238@@\144\160\001\000\183\001\000\250@@\144\160\001\000\186\001\000\167@\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171@\144\160\001\000\191\001\000\168\144\160\001\003\192\001\000\227@@\144\160\001\003\195\001\000\229\145\160\160\001\000\196\001\000\142\160\160\001\003\196\001\000\231@\144\160\001\000\197\001\000\143\145\160\160\001\000\198\001\000\146\160\160\001\003\198\001\000\237@\144\160\001\000\199\001\000\128@\144\160\001\000\201\001\000\144@@@@@@@\144\160\001\000\209\001\000\165@@@@\144\160\001\000\214\001\000\153@\144\160\001\000\216\001\000\157@@@\144\160\001\000\220\001\000\154@@\144\160\001\000\223\001\000\225\144\160\001\000\224\001\000\133\144\160\001\000\225\001\000\160\144\160\001\000\226\001\000\131@\144\160\001\000\228\001\000\132\144\160\001\000\229\001\000\134\144\160\001\000\230\001\000\145\144\160\001\000\231\001\000\135\144\160\001\000\232\001\000\138\144\160\001\000\233\001\000\130\144\160\001\000\234\001\000\136\144\160\001\000\235\001\000\137\144\160\001\000\236\001\000\141\144\160\001\000\237\001\000\161\144\160\001\000\238\001\000\140\144\160\001\000\239\001\000\139@\144\160\001\000\241\001\000\164\144\160\001\000\242\001\000\149\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147@\144\160\001\000\246\001\000\148\144\160\001\000\247\001\000\246\144\160\001\000\248\001\000\155\144\160\001\000\249\001\000\151\144\160\001\000\250\001\000\163\144\160\001\000\251\001\000\150\144\160\001\000\252\001\000\129@@\144\160\001\000\255\001\000\152" 0 : Netmappings.from_uni_list array);;
+ let cp866_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\004\016\001\004\017\001\004\018\001\004\019\001\004\020\001\004\021\001\004\022\001\004\023\001\004\024\001\004\025\001\004\026\001\004\027\001\004\028\001\004\029\001\004\030\001\004\031\001\004 \001\004!\001\004\"\001\004#\001\004$\001\004%\001\004&\001\004'\001\004(\001\004)\001\004*\001\004+\001\004,\001\004-\001\004.\001\004/\001\0040\001\0041\001\0042\001\0043\001\0044\001\0045\001\0046\001\0047\001\0048\001\0049\001\004:\001\004;\001\004<\001\004=\001\004>\001\004?\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\004@\001\004A\001\004B\001\004C\001\004D\001\004E\001\004F\001\004G\001\004H\001\004I\001\004J\001\004K\001\004L\001\004M\001\004N\001\004O\001\004\001\001\004Q\001\004\004\001\004T\001\004\007\001\004W\001\004\014\001\004^\001\000\176\001\"\025\001\000\183\001\"\026\001!\022\001\000\164\001%\160\001\000\160" 0 : int array);;
+let cp866_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\195\000\000\000\000\000\000\007\164\000\000\007\164\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\145\160\160AA\160\160\001\004\001\001\000\240@\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\145\160\160DD\160\160\001\004\004\001\000\242@\144\160EE\144\160FF\145\160\160GG\160\160\001\004\007\001\000\244@\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\145\160\160NN\160\160\001\004\014\001\000\246@\144\160OO\145\160\160PP\160\160\001\004\016\001\000\128\160\160\001%\016\001\000\191@\145\160\160QQ\160\160\001\004\017\001\000\129@\145\160\160RR\160\160\001\004\018\001\000\130@\145\160\160SS\160\160\001\004\019\001\000\131@\145\160\160TT\160\160\001\004\020\001\000\132\160\160\001%\020\001\000\192@\145\160\160UU\160\160\001\004\021\001\000\133@\145\160\160VV\160\160\001\004\022\001\000\134\160\160\001!\022\001\000\252@\145\160\160WW\160\160\001\004\023\001\000\135@\145\160\160XX\160\160\001\004\024\001\000\136\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\004\025\001\000\137\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\004\026\001\000\138\160\160\001\"\026\001\000\251@\145\160\160[[\160\160\001\004\027\001\000\139@\145\160\160\\\\\160\160\001\004\028\001\000\140\160\160\001%\028\001\000\195@\145\160\160]]\160\160\001\004\029\001\000\141@\145\160\160^^\160\160\001\004\030\001\000\142@\145\160\160__\160\160\001\004\031\001\000\143@\145\160\160``\160\160\001\004 \001\000\144@\145\160\160aa\160\160\001\004!\001\000\145@\145\160\160bb\160\160\001\004\"\001\000\146@\145\160\160cc\160\160\001\004#\001\000\147@\145\160\160dd\160\160\001\004$\001\000\148\160\160\001%$\001\000\180@\145\160\160ee\160\160\001\004%\001\000\149@\145\160\160ff\160\160\001\004&\001\000\150@\145\160\160gg\160\160\001\004'\001\000\151@\145\160\160hh\160\160\001\004(\001\000\152@\145\160\160ii\160\160\001\004)\001\000\153@\145\160\160jj\160\160\001\004*\001\000\154@\145\160\160kk\160\160\001\004+\001\000\155@\145\160\160ll\160\160\001\004,\001\000\156\160\160\001%,\001\000\194@\145\160\160mm\160\160\001\004-\001\000\157@\145\160\160nn\160\160\001\004.\001\000\158@\145\160\160oo\160\160\001\004/\001\000\159@\145\160\160pp\160\160\001\0040\001\000\160@\145\160\160qq\160\160\001\0041\001\000\161@\145\160\160rr\160\160\001\0042\001\000\162@\145\160\160ss\160\160\001\0043\001\000\163@\145\160\160tt\160\160\001\0044\001\000\164\160\160\001%4\001\000\193@\145\160\160uu\160\160\001\0045\001\000\165@\145\160\160vv\160\160\001\0046\001\000\166@\145\160\160ww\160\160\001\0047\001\000\167@\145\160\160xx\160\160\001\0048\001\000\168@\145\160\160yy\160\160\001\0049\001\000\169@\145\160\160zz\160\160\001\004:\001\000\170@\145\160\160{{\160\160\001\004;\001\000\171@\145\160\160||\160\160\001\004<\001\000\172\160\160\001%<\001\000\197@\145\160\160}}\160\160\001\004=\001\000\173@\145\160\160~~\160\160\001\004>\001\000\174@\145\160\160\127\127\160\160\001\004?\001\000\175@\145\160\160\000@\000@\160\160\001\004@\001\000\224@\145\160\160\000A\000A\160\160\001\004A\001\000\225@\145\160\160\000B\000B\160\160\001\004B\001\000\226@\145\160\160\000C\000C\160\160\001\004C\001\000\227@\145\160\160\000D\000D\160\160\001\004D\001\000\228@\145\160\160\000E\000E\160\160\001\004E\001\000\229@\145\160\160\000F\000F\160\160\001\004F\001\000\230@\145\160\160\000G\000G\160\160\001\004G\001\000\231@\145\160\160\000H\000H\160\160\001\004H\001\000\232@\145\160\160\000I\000I\160\160\001\004I\001\000\233@\145\160\160\000J\000J\160\160\001\004J\001\000\234@\145\160\160\000K\000K\160\160\001\004K\001\000\235@\145\160\160\000L\000L\160\160\001\004L\001\000\236@\145\160\160\000M\000M\160\160\001\004M\001\000\237@\145\160\160\000N\000N\160\160\001\004N\001\000\238@\145\160\160\000O\000O\160\160\001\004O\001\000\239@\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186\160\160\001\004Q\001\000\241@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201\160\160\001\004T\001\000\243@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187\160\160\001\004W\001\000\245@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198\160\160\001\004^\001\000\247@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209@\145\160\160\000e\000e\160\160\001%e\001\000\210@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@@@@\144\160\001\000\164\001\000\253@@@@@@@@@@@\144\160\001\000\176\001\000\248@@@@@@\144\160\001\000\183\001\000\250@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let cp869_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002<\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\000\255\000\255\000\255\000\255\000\255\000\255\001\003\134\000\255\001\000\183\001\000\172\001\000\166\001 \024\001 \025\001\003\136\001 \021\001\003\137\001\003\138\001\003\170\001\003\140\000\255\000\255\001\003\142\001\003\171\001\000\169\001\003\143\001\000\178\001\000\179\001\003\172\001\000\163\001\003\173\001\003\174\001\003\175\001\003\202\001\003\144\001\003\204\001\003\205\001\003\145\001\003\146\001\003\147\001\003\148\001\003\149\001\003\150\001\003\151\001\000\189\001\003\152\001\003\153\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001\003\154\001\003\155\001\003\156\001\003\157\001%c\001%Q\001%W\001%]\001\003\158\001\003\159\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\003\160\001\003\161\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\003\163\001\003\164\001\003\165\001\003\166\001\003\167\001\003\168\001\003\169\001\003\177\001\003\178\001\003\179\001%\024\001%\012\001%\136\001%\132\001\003\180\001\003\181\001%\128\001\003\182\001\003\183\001\003\184\001\003\185\001\003\186\001\003\187\001\003\188\001\003\189\001\003\190\001\003\191\001\003\192\001\003\193\001\003\195\001\003\194\001\003\196\001\003\132\001\000\173\001\000\177\001\003\197\001\003\198\001\003\199\001\000\167\001\003\200\001\003\133\001\000\176\001\000\168\001\003\201\001\003\203\001\003\176\001\003\206\001%\160\001\000\160" 0 : int array);;
+let cp869_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\207\000\000\000\000\000\000\006\138\000\000\006\138\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\145\160\160UU\160\160\001 \021\001\000\142@\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\139\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001 \025\001\000\140@\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001%T\001\000\201@\144\160\000U\000U\144\160\000V\000V\145\160\160\000W\000W\160\160\001%W\001\000\187@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\144\160\000[\000[\144\160\000\\\000\\\145\160\160\000]\000]\160\160\001%]\001\000\188@\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001%`\001\000\204@\144\160\000a\000a\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\145\160\160\001%\132\001\000\220\160\160\001\003\132\001\000\239@\144\160\001\003\133\001\000\247\144\160\001\003\134\001\000\134@\145\160\160\001\003\136\001\000\141\160\160\001%\136\001\000\219@\144\160\001\003\137\001\000\143\144\160\001\003\138\001\000\144@\144\160\001\003\140\001\000\146@\144\160\001\003\142\001\000\149\144\160\001\003\143\001\000\152\144\160\001\003\144\001\000\161\145\160\160\001\003\145\001\000\164\160\160\001%\145\001\000\176@\145\160\160\001\003\146\001\000\165\160\160\001%\146\001\000\177@\145\160\160\001\003\147\001\000\166\160\160\001%\147\001\000\178@\144\160\001\003\148\001\000\167\144\160\001\003\149\001\000\168\144\160\001\003\150\001\000\169\144\160\001\003\151\001\000\170\144\160\001\003\152\001\000\172\144\160\001\003\153\001\000\173\144\160\001\003\154\001\000\181\144\160\001\003\155\001\000\182\144\160\001\003\156\001\000\183\144\160\001\003\157\001\000\184\144\160\001\003\158\001\000\189\144\160\001\003\159\001\000\190\145\160\160\001\003\160\001\000\198\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\003\161\001\000\199@\145\160\160\001\000\163\001\000\156\160\160\001\003\163\001\000\207@\144\160\001\003\164\001\000\208\144\160\001\003\165\001\000\209\145\160\160\001\000\166\001\000\138\160\160\001\003\166\001\000\210@\145\160\160\001\003\167\001\000\211\160\160\001\000\167\001\000\245@\145\160\160\001\003\168\001\000\212\160\160\001\000\168\001\000\249@\145\160\160\001\000\169\001\000\151\160\160\001\003\169\001\000\213@\144\160\001\003\170\001\000\145\145\160\160\001\003\171\001\000\150\160\160\001\000\171\001\000\174@\145\160\160\001\000\172\001\000\137\160\160\001\003\172\001\000\155@\145\160\160\001\003\173\001\000\157\160\160\001\000\173\001\000\240@\144\160\001\003\174\001\000\158\144\160\001\003\175\001\000\159\145\160\160\001\000\176\001\000\248\160\160\001\003\176\001\000\252@\145\160\160\001\003\177\001\000\214\160\160\001\000\177\001\000\241@\145\160\160\001\000\178\001\000\153\160\160\001\003\178\001\000\215@\145\160\160\001\000\179\001\000\154\160\160\001\003\179\001\000\216@\144\160\001\003\180\001\000\221\144\160\001\003\181\001\000\222\144\160\001\003\182\001\000\224\145\160\160\001\000\183\001\000\136\160\160\001\003\183\001\000\225@\144\160\001\003\184\001\000\226\144\160\001\003\185\001\000\227\144\160\001\003\186\001\000\228\145\160\160\001\000\187\001\000\175\160\160\001\003\187\001\000\229@\144\160\001\003\188\001\000\230\145\160\160\001\000\189\001\000\171\160\160\001\003\189\001\000\231@\144\160\001\003\190\001\000\232\144\160\001\003\191\001\000\233\144\160\001\003\192\001\000\234\144\160\001\003\193\001\000\235\144\160\001\003\194\001\000\237\144\160\001\003\195\001\000\236\144\160\001\003\196\001\000\238\144\160\001\003\197\001\000\242\144\160\001\003\198\001\000\243\144\160\001\003\199\001\000\244\144\160\001\003\200\001\000\246\144\160\001\003\201\001\000\250\144\160\001\003\202\001\000\160\144\160\001\003\203\001\000\251\144\160\001\003\204\001\000\162\144\160\001\003\205\001\000\163\144\160\001\003\206\001\000\253@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let cp874_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002&\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\000\255\000\255\000\255\001 &\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\000\160\001\014\001\001\014\002\001\014\003\001\014\004\001\014\005\001\014\006\001\014\007\001\014\008\001\014\t\001\014\n\001\014\011\001\014\012\001\014\013\001\014\014\001\014\015\001\014\016\001\014\017\001\014\018\001\014\019\001\014\020\001\014\021\001\014\022\001\014\023\001\014\024\001\014\025\001\014\026\001\014\027\001\014\028\001\014\029\001\014\030\001\014\031\001\014 \001\014!\001\014\"\001\014#\001\014$\001\014%\001\014&\001\014'\001\014(\001\014)\001\014*\001\014+\001\014,\001\014-\001\014.\001\014/\001\0140\001\0141\001\0142\001\0143\001\0144\001\0145\001\0146\001\0147\001\0148\001\0149\001\014:\000\255\000\255\000\255\000\255\001\014?\001\014@\001\014A\001\014B\001\014C\001\014D\001\014E\001\014F\001\014G\001\014H\001\014I\001\014J\001\014K\001\014L\001\014M\001\014N\001\014O\001\014P\001\014Q\001\014R\001\014S\001\014T\001\014U\001\014V\001\014W\001\014X\001\014Y\001\014Z\001\014[\000\255\000\255\000\255\000\255" 0 : int array);;
+let cp874_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\185\000\000\000\000\000\000\006\202\000\000\006\202\008\000\004\000\000\144\160@@\145\160\160AA\160\160\001\014\001\001\000\161@\145\160\160BB\160\160\001\014\002\001\000\162@\145\160\160CC\160\160\001\014\003\001\000\163@\145\160\160DD\160\160\001\014\004\001\000\164@\145\160\160EE\160\160\001\014\005\001\000\165@\145\160\160FF\160\160\001\014\006\001\000\166@\145\160\160GG\160\160\001\014\007\001\000\167@\145\160\160HH\160\160\001\014\008\001\000\168@\145\160\160II\160\160\001\014\t\001\000\169@\145\160\160JJ\160\160\001\014\n\001\000\170@\145\160\160KK\160\160\001\014\011\001\000\171@\145\160\160LL\160\160\001\014\012\001\000\172@\145\160\160MM\160\160\001\014\013\001\000\173@\145\160\160NN\160\160\001\014\014\001\000\174@\145\160\160OO\160\160\001\014\015\001\000\175@\145\160\160PP\160\160\001\014\016\001\000\176@\145\160\160QQ\160\160\001\014\017\001\000\177@\145\160\160RR\160\160\001\014\018\001\000\178@\145\160\160SS\160\160\001 \019\001\000\150\160\160\001\014\019\001\000\179@\145\160\160TT\160\160\001 \020\001\000\151\160\160\001\014\020\001\000\180@\145\160\160UU\160\160\001\014\021\001\000\181@\145\160\160VV\160\160\001\014\022\001\000\182@\145\160\160WW\160\160\001\014\023\001\000\183@\145\160\160XX\160\160\001 \024\001\000\145\160\160\001\014\024\001\000\184@\145\160\160YY\160\160\001 \025\001\000\146\160\160\001\014\025\001\000\185@\145\160\160ZZ\160\160\001\014\026\001\000\186@\145\160\160[[\160\160\001\014\027\001\000\187@\145\160\160\\\\\160\160\001 \028\001\000\147\160\160\001\014\028\001\000\188@\145\160\160]]\160\160\001 \029\001\000\148\160\160\001\014\029\001\000\189@\145\160\160^^\160\160\001\014\030\001\000\190@\145\160\160__\160\160\001\014\031\001\000\191@\145\160\160``\160\160\001\014 \001\000\192@\145\160\160aa\160\160\001\014!\001\000\193@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001\014\"\001\000\194@\145\160\160cc\160\160\001\014#\001\000\195@\145\160\160dd\160\160\001\014$\001\000\196@\145\160\160ee\160\160\001\014%\001\000\197@\145\160\160ff\160\160\001 &\001\000\133\160\160\001\014&\001\000\198@\145\160\160gg\160\160\001\014'\001\000\199@\145\160\160hh\160\160\001\014(\001\000\200@\145\160\160ii\160\160\001\014)\001\000\201@\145\160\160jj\160\160\001\014*\001\000\202@\145\160\160kk\160\160\001\014+\001\000\203@\145\160\160ll\160\160\001\014,\001\000\204@\145\160\160mm\160\160\001\014-\001\000\205@\145\160\160nn\160\160\001\014.\001\000\206@\145\160\160oo\160\160\001\014/\001\000\207@\145\160\160pp\160\160\001\0140\001\000\208@\145\160\160qq\160\160\001\0141\001\000\209@\145\160\160rr\160\160\001\0142\001\000\210@\145\160\160ss\160\160\001\0143\001\000\211@\145\160\160tt\160\160\001\0144\001\000\212@\145\160\160uu\160\160\001\0145\001\000\213@\145\160\160vv\160\160\001\0146\001\000\214@\145\160\160ww\160\160\001\0147\001\000\215@\145\160\160xx\160\160\001\0148\001\000\216@\145\160\160yy\160\160\001\0149\001\000\217@\145\160\160zz\160\160\001\014:\001\000\218@\144\160{{\144\160||\144\160}}\144\160~~\145\160\160\127\127\160\160\001\014?\001\000\223@\145\160\160\000@\000@\160\160\001\014@\001\000\224@\145\160\160\000A\000A\160\160\001\014A\001\000\225@\145\160\160\000B\000B\160\160\001\014B\001\000\226@\145\160\160\000C\000C\160\160\001\014C\001\000\227@\145\160\160\000D\000D\160\160\001\014D\001\000\228@\145\160\160\000E\000E\160\160\001\014E\001\000\229@\145\160\160\000F\000F\160\160\001\014F\001\000\230@\145\160\160\000G\000G\160\160\001\014G\001\000\231@\145\160\160\000H\000H\160\160\001\014H\001\000\232@\145\160\160\000I\000I\160\160\001\014I\001\000\233@\145\160\160\000J\000J\160\160\001\014J\001\000\234@\145\160\160\000K\000K\160\160\001\014K\001\000\235@\145\160\160\000L\000L\160\160\001\014L\001\000\236@\145\160\160\000M\000M\160\160\001\014M\001\000\237@\145\160\160\000N\000N\160\160\001\014N\001\000\238@\145\160\160\000O\000O\160\160\001\014O\001\000\239@\145\160\160\000P\000P\160\160\001\014P\001\000\240@\145\160\160\000Q\000Q\160\160\001\014Q\001\000\241@\145\160\160\000R\000R\160\160\001\014R\001\000\242@\145\160\160\000S\000S\160\160\001\014S\001\000\243@\145\160\160\000T\000T\160\160\001\014T\001\000\244@\145\160\160\000U\000U\160\160\001\014U\001\000\245@\145\160\160\000V\000V\160\160\001\014V\001\000\246@\145\160\160\000W\000W\160\160\001\014W\001\000\247@\145\160\160\000X\000X\160\160\001\014X\001\000\248@\145\160\160\000Y\000Y\160\160\001\014Y\001\000\249@\145\160\160\000Z\000Z\160\160\001\014Z\001\000\250@\145\160\160\000[\000[\160\160\001\014[\001\000\251@\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\144\160\001\000\160\001\000\160@@@@@@@@@@@\144\160\001 \172\001\000\128@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let cp875_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002@\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABC\001\000\156I\001\000\134\000\127\001\000\151\001\000\141\001\000\142KLMNOPQRS\001\000\157\001\000\133H\001\000\135XY\001\000\146\001\000\143\\]^_\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132JW[\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140EFG\001\000\144\001\000\145V\001\000\147\001\000\148\001\000\149\001\000\150D\001\000\152\001\000\153\001\000\154\001\000\155TU\001\000\158\000\255`\001\003\145\001\003\146\001\003\147\001\003\148\001\003\149\001\003\150\001\003\151\001\003\152\001\003\153\000[n|hkaf\001\003\154\001\003\155\001\003\156\001\003\157\001\003\158\001\003\159\001\003\160\001\003\161\001\003\163\000]dji{\000^mo\001\003\164\001\003\165\001\003\166\001\003\167\001\003\168\001\003\169\001\003\170\001\003\171\000|le\000_~\127\001\000\168\001\003\134\001\003\136\001\003\137\001\000\160\001\003\138\001\003\140\001\003\142\001\003\143\000`zc\000@g}b\001\003\133\000a\000b\000c\000d\000e\000f\000g\000h\000i\001\003\177\001\003\178\001\003\179\001\003\180\001\003\181\001\003\182\001\000\176\000j\000k\000l\000m\000n\000o\000p\000q\000r\001\003\183\001\003\184\001\003\185\001\003\186\001\003\187\001\003\188\001\000\180\000~\000s\000t\000u\000v\000w\000x\000y\000z\001\003\189\001\003\190\001\003\191\001\003\192\001\003\193\001\003\195\001\000\163\001\003\172\001\003\173\001\003\174\001\003\202\001\003\175\001\003\204\001\003\205\001\003\203\001\003\206\001\003\194\001\003\196\001\003\197\001\003\198\001\003\199\001\003\200\000{\000A\000B\000C\000D\000E\000F\000G\000H\000I\001\000\173\001\003\201\001\003\144\001\003\176\001 \024\001 \021\000}\000J\000K\000L\000M\000N\000O\000P\000Q\000R\001\000\177\001\000\189\000\255\001\003\135\001 \025\001\000\166\000\\\000\255\000S\000T\000U\000V\000W\000X\000Y\000Z\001\000\178\001\000\167\000\255\000\255\001\000\171\001\000\172pqrstuvwxy\001\000\179\001\000\169\000\255\000\255\001\000\187\001\000\159" 0 : int array);;
+let cp875_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\220\000\000\000\000\000\000\006\142\000\000\006\142\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160Dw\144\160Em\144\160Fn\144\160Go\144\160HV\144\160IE\144\160Je\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160T|\145\160\160U}\160\160\001 \021\001\000\207@\144\160Vr\144\160Wf\145\160\160XX\160\160\001 \024\001\000\206@\145\160\160YY\160\160\001 \025\001\000\222@@\144\160[g\144\160\\\\\144\160]]\144\160^^\144\160__\144\160`\000@\144\160a\000O\144\160b\000\127\144\160c\000{\144\160d\000[\144\160e\000l\144\160f\000P\144\160g\000}\144\160h\000M\144\160i\000]\144\160j\000\\\144\160k\000N\144\160l\000k\144\160m\000`\144\160n\000K\144\160o\000a\144\160p\001\000\240\144\160q\001\000\241\144\160r\001\000\242\144\160s\001\000\243\144\160t\001\000\244\144\160u\001\000\245\144\160v\001\000\246\144\160w\001\000\247\144\160x\001\000\248\144\160y\001\000\249\144\160z\000z\144\160{\000^\144\160|\000L\144\160}\000~\144\160~\000n\144\160\127\000o\144\160\000@\000|\144\160\000A\001\000\193\144\160\000B\001\000\194\144\160\000C\001\000\195\144\160\000D\001\000\196\144\160\000E\001\000\197\144\160\000F\001\000\198\144\160\000G\001\000\199\144\160\000H\001\000\200\144\160\000I\001\000\201\144\160\000J\001\000\209\144\160\000K\001\000\210\144\160\000L\001\000\211\144\160\000M\001\000\212\144\160\000N\001\000\213\144\160\000O\001\000\214\144\160\000P\001\000\215\144\160\000Q\001\000\216\144\160\000R\001\000\217\144\160\000S\001\000\226\144\160\000T\001\000\227\144\160\000U\001\000\228\144\160\000V\001\000\229\144\160\000W\001\000\230\144\160\000X\001\000\231\144\160\000Y\001\000\232\144\160\000Z\001\000\233\144\160\000[\000J\144\160\000\\\001\000\224\144\160\000]\000Z\144\160\000^\000_\144\160\000_\000m\144\160\000`\000y\144\160\000a\001\000\129\144\160\000b\001\000\130\144\160\000c\001\000\131\144\160\000d\001\000\132\144\160\000e\001\000\133\144\160\000f\001\000\134\144\160\000g\001\000\135\144\160\000h\001\000\136\144\160\000i\001\000\137\144\160\000j\001\000\145\144\160\000k\001\000\146\144\160\000l\001\000\147\144\160\000m\001\000\148\144\160\000n\001\000\149\144\160\000o\001\000\150\144\160\000p\001\000\151\144\160\000q\001\000\152\144\160\000r\001\000\153\144\160\000s\001\000\162\144\160\000t\001\000\163\144\160\000u\001\000\164\144\160\000v\001\000\165\144\160\000w\001\000\166\144\160\000x\001\000\167\144\160\000y\001\000\168\144\160\000z\001\000\169\144\160\000{\001\000\192\144\160\000|\000j\144\160\000}\001\000\208\144\160\000~\001\000\161\144\160\000\127G\144\160\001\000\128`\144\160\001\000\129a\144\160\001\000\130b\144\160\001\000\131c\144\160\001\000\132d\145\160\160\001\000\133U\160\160\001\003\133\001\000\128@\145\160\160\001\000\134F\160\160\001\003\134\000q@\145\160\160\001\000\135W\160\160\001\003\135\001\000\221@\145\160\160\001\000\136h\160\160\001\003\136\000r@\145\160\160\001\000\137i\160\160\001\003\137\000s@\145\160\160\001\000\138j\160\160\001\003\138\000u@\144\160\001\000\139k\145\160\160\001\000\140l\160\160\001\003\140\000v@\144\160\001\000\141I\145\160\160\001\000\142J\160\160\001\003\142\000w@\145\160\160\001\000\143[\160\160\001\003\143\000x@\145\160\160\001\000\144p\160\160\001\003\144\001\000\204@\145\160\160\001\000\145q\160\160\001\003\145\000A@\145\160\160\001\000\146Z\160\160\001\003\146\000B@\145\160\160\001\000\147s\160\160\001\003\147\000C@\145\160\160\001\000\148t\160\160\001\003\148\000D@\145\160\160\001\000\149u\160\160\001\003\149\000E@\145\160\160\001\000\150v\160\160\001\003\150\000F@\145\160\160\001\000\151H\160\160\001\003\151\000G@\145\160\160\001\000\152x\160\160\001\003\152\000H@\145\160\160\001\000\153y\160\160\001\003\153\000I@\145\160\160\001\000\154z\160\160\001\003\154\000Q@\145\160\160\001\000\155{\160\160\001\003\155\000R@\145\160\160\001\000\156D\160\160\001\003\156\000S@\145\160\160\001\000\157T\160\160\001\003\157\000T@\145\160\160\001\000\158~\160\160\001\003\158\000U@\145\160\160\001\003\159\000V\160\160\001\000\159\001\000\255@\145\160\160\001\003\160\000W\160\160\001\000\160\000t@\144\160\001\003\161\000X@\145\160\160\001\003\163\000Y\160\160\001\000\163\001\000\176@\144\160\001\003\164\000b\144\160\001\003\165\000c\145\160\160\001\003\166\000d\160\160\001\000\166\001\000\223@\145\160\160\001\003\167\000e\160\160\001\000\167\001\000\235@\145\160\160\001\003\168\000f\160\160\001\000\168\000p@\145\160\160\001\003\169\000g\160\160\001\000\169\001\000\251@\144\160\001\003\170\000h\145\160\160\001\003\171\000i\160\160\001\000\171\001\000\238@\145\160\160\001\003\172\001\000\177\160\160\001\000\172\001\000\239@\145\160\160\001\003\173\001\000\178\160\160\001\000\173\001\000\202@\144\160\001\003\174\001\000\179\144\160\001\003\175\001\000\181\145\160\160\001\000\176\001\000\144\160\160\001\003\176\001\000\205@\145\160\160\001\003\177\001\000\138\160\160\001\000\177\001\000\218@\145\160\160\001\003\178\001\000\139\160\160\001\000\178\001\000\234@\145\160\160\001\003\179\001\000\140\160\160\001\000\179\001\000\250@\145\160\160\001\003\180\001\000\141\160\160\001\000\180\001\000\160@\144\160\001\003\181\001\000\142\144\160\001\003\182\001\000\143\144\160\001\003\183\001\000\154\144\160\001\003\184\001\000\155\144\160\001\003\185\001\000\156\144\160\001\003\186\001\000\157\145\160\160\001\003\187\001\000\158\160\160\001\000\187\001\000\254@\144\160\001\003\188\001\000\159\145\160\160\001\003\189\001\000\170\160\160\001\000\189\001\000\219@\144\160\001\003\190\001\000\171\144\160\001\003\191\001\000\172\144\160\001\003\192\001\000\173\144\160\001\003\193\001\000\174\144\160\001\003\194\001\000\186\144\160\001\003\195\001\000\175\144\160\001\003\196\001\000\187\144\160\001\003\197\001\000\188\144\160\001\003\198\001\000\189\144\160\001\003\199\001\000\190\144\160\001\003\200\001\000\191\144\160\001\003\201\001\000\203\144\160\001\003\202\001\000\180\144\160\001\003\203\001\000\184\144\160\001\003\204\001\000\182\144\160\001\003\205\001\000\183\144\160\001\003\206\001\000\185@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let adobe_standard_encoding_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\031\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255`abcdef\001 \025hijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\001 \024\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\000\161\001\000\162\001\000\163\001 D\001\000\165\001\001\146\001\000\167\001\000\164g\001 \028\001\000\171\001 9\001 :\002\000\000\251\001\002\000\000\251\002\000\255\001 \019\001 \001 !\001\000\183\000\255\001\000\182\001 \"\001 \026\001 \030\001 \029\001\000\187\001 &\001 0\000\255\001\000\191\000\255\000`\001\000\180\001\002\198\001\002\220\001\000\175\001\002\216\001\002\217\001\000\168\000\255\001\002\218\001\000\184\000\255\001\002\221\001\002\219\001\002\199\001 \020\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\000\198\000\255\001\000\170\000\255\000\255\000\255\000\255\001\001A\001\000\216\001\001R\001\000\186\000\255\000\255\000\255\000\255\000\255\001\000\230\000\255\000\255\000\255\001\0011\000\255\000\255\001\001B\001\000\248\001\001S\001\000\223\000\255\000\255\000\255\000\255" 0 : int array);;
+let adobe_standard_encoding_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\004K\000\000\000\000\000\000\004&\000\000\004&\008\000\004\000\000@\144\160\002\000\000\251\001\001\000\174\144\160\002\000\000\251\002\001\000\175@@@@@@@@@@@@@@@@\144\160\001 \019\001\000\177\144\160\001 \020\001\000\208@@@\144\160\001 \024\000`\144\160\001 \025g\144\160\001 \026\001\000\184@\144\160\001 \028\001\000\170\144\160\001 \029\001\000\186\144\160\001 \030\001\000\185@\145\160\160``\160\160\001 \001\000\178@\145\160\160aa\160\160\001 !\001\000\179@\145\160\160bb\160\160\001 \"\001\000\183@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\188@\144\160g\001\000\169\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\189@\145\160\160qq\160\160\001\0011\001\000\245@\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\172@\145\160\160zz\160\160\001 :\001\000\173@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\145\160\160\000A\000A\160\160\001\001A\001\000\232@\145\160\160\000B\000B\160\160\001\001B\001\000\248@\144\160\000C\000C\145\160\160\000D\000D\160\160\001 D\001\000\164@\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\145\160\160\000R\000R\160\160\001\001R\001\000\234@\145\160\160\000S\000S\160\160\001\001S\001\000\250@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\001\000\193\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~@@@@@@@@@@@@@@@@@@@\144\160\001\001\146\001\000\166@@@@@@@@@@@@@@\144\160\001\000\161\001\000\161\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\168\144\160\001\000\165\001\000\165@\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\200@\144\160\001\000\170\001\000\227\144\160\001\000\171\001\000\171@@@\144\160\001\000\175\001\000\197@@@@\144\160\001\000\180\001\000\194@\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\180\144\160\001\000\184\001\000\203@\144\160\001\000\186\001\000\235\144\160\001\000\187\001\000\187@@@\144\160\001\000\191\001\000\191@@@@@@\145\160\160\001\002\198\001\000\195\160\160\001\000\198\001\000\225@\144\160\001\002\199\001\000\207@@@@@@@@@@@@@@@@\145\160\160\001\002\216\001\000\198\160\160\001\000\216\001\000\233@\144\160\001\002\217\001\000\199\144\160\001\002\218\001\000\202\144\160\001\002\219\001\000\206\144\160\001\002\220\001\000\196\144\160\001\002\221\001\000\205@\144\160\001\000\223\001\000\251@@@@@@\144\160\001\000\230\001\000\241@@@@@@@@@@@@@@@@@\144\160\001\000\248\001\000\249@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let adobe_symbol_encoding_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\192\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255`a\001\"\000c\001\"\003ef\001\"\011hi\001\"\023kl\001\"\018nopqrstuvwxyz{|}~\127\001\"E\001\003\145\001\003\146\001\003\167\001\003\148\001\003\149\001\003\166\001\003\147\001\003\151\001\003\153\001\003\209\001\003\154\001\003\155\001\003\156\001\003\157\001\003\159\001\003\160\001\003\152\001\003\161\001\003\163\001\003\164\001\003\165\001\003\194\001\003\169\001\003\158\001\003\168\001\003\150\000[\001\"4\000]\001\"\165\000_\002\000\000\248\229\001\003\177\001\003\178\001\003\199\001\003\180\001\003\181\001\003\198\001\003\179\001\003\183\001\003\185\001\003\213\001\003\186\001\003\187\001\003\188\001\003\189\001\003\191\001\003\192\001\003\184\001\003\193\001\003\195\001\003\196\001\003\197\001\003\214\001\003\201\001\003\190\001\003\200\001\003\182\000{\000|\000}\001\"<\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001 \172\001\003\210\001 2\001\"d\001 D\001\"\030\001\001\146\001&c\001&f\001&e\001&`\001!\148\001!\144\001!\145\001!\146\001!\147\001\000\176\001\000\177\001 3\001\"e\001\000\215\001\"\029\001\"\002\001 \"\001\000\247\001\"`\001\"a\001\"H\001 &\002\000\000\248\230\002\000\000\248\231\001!\181\001!5\001!\017\001!\028\001!\024\001\"\151\001\"\149\001\"\005\001\")\001\"*\001\"\131\001\"\135\001\"\132\001\"\130\001\"\134\001\"\008\001\"\t\001\" \001\"\007\002\000\000\246\218\002\000\000\246\217\002\000\000\246\219\001\"\015\001\"\026\001\"\197\001\000\172\001\"'\001\"(\001!\212\001!\208\001!\209\001!\210\001!\211\001%\202\001#)\002\000\000\248\232\002\000\000\248\233\002\000\000\248\234\001\"\017\002\000\000\248\235\002\000\000\248\236\002\000\000\248\237\002\000\000\248\238\002\000\000\248\239\002\000\000\248\240\002\000\000\248\241\002\000\000\248\242\002\000\000\248\243\002\000\000\248\244\000\255\001#*\001\"+\001# \002\000\000\248\245\001#!\002\000\000\248\246\002\000\000\248\247\002\000\000\248\248\002\000\000\248\249\002\000\000\248\250\002\000\000\248\251\002\000\000\248\252\002\000\000\248\253\002\000\000\248\254\000\255" 0 : int array);;
+let adobe_symbol_encoding_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\t\000\000\000\000\000\000\005)\000\000\005)\008\000\004\000\000\144\160\001\"\000b@\144\160\001\"\002\001\000\182\144\160\001\"\003d@\144\160\001\"\005\001\000\198@\144\160\001\"\007\001\000\209\144\160\001\"\008\001\000\206\144\160\001\"\t\001\000\207@\144\160\001\"\011g@@@\144\160\001\"\015\001\000\213@\145\160\160\001!\017\001\000\193\160\160\001\"\017\001\000\229@\144\160\001\"\018m@@@@\144\160\001\"\023j\144\160\001!\024\001\000\195@\144\160\001\"\026\001\000\214@\144\160\001!\028\001\000\194\144\160\001\"\029\001\000\181\144\160\001\"\030\001\000\165@\145\160\160``\160\160\001\" \001\000\208\160\160\001# \001\000\243@\145\160\160aa\160\160\001#!\001\000\245@\144\160\001 \"\001\000\183\144\160cc@\144\160ee\145\160\160ff\160\160\001 &\001\000\188@\144\160\001\"'\001\000\217\145\160\160hh\160\160\001\"(\001\000\218@\145\160\160ii\160\160\001\")\001\000\199\160\160\001#)\001\000\225@\145\160\160\001\"*\001\000\200\160\160\001#*\001\000\241@\145\160\160kk\160\160\001\"+\001\000\242@\144\160ll@\144\160nn\144\160oo\144\160pp\144\160qq\145\160\160rr\160\160\001 2\001\000\162@\145\160\160ss\160\160\001 3\001\000\178@\145\160\160tt\160\160\001\"4\000\\@\145\160\160uu\160\160\001!5\001\000\192@\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001\"<\000~@\144\160}}\144\160~~\144\160\127\127@@@@\144\160\001 D\001\000\164\144\160\001\"E\000@@@\144\160\001\"H\001\000\187@@@@@@@@@@@@@@@@@@\144\160\000[\000[@\144\160\000]\000]@\144\160\000_\000_\145\160\160\001&`\001\000\170\160\160\001\"`\001\000\185@\144\160\001\"a\001\000\186@\144\160\001&c\001\000\167\144\160\001\"d\001\000\163\145\160\160\001&e\001\000\169\160\160\001\"e\001\000\179@\144\160\001&f\001\000\168@@@@@@@@@@@@@@@@@@@@\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}@@@@\144\160\001\"\130\001\000\204\144\160\001\"\131\001\000\201\144\160\001\"\132\001\000\203@\144\160\001\"\134\001\000\205\144\160\001\"\135\001\000\202@@@@@@@@\144\160\001!\144\001\000\172\145\160\160\001\003\145\000A\160\160\001!\145\001\000\173@\145\160\160\001\003\146\000B\160\160\001\001\146\001\000\166\160\160\001!\146\001\000\174@\145\160\160\001\003\147\000G\160\160\001!\147\001\000\175@\145\160\160\001\003\148\000D\160\160\001!\148\001\000\171@\145\160\160\001\003\149\000E\160\160\001\"\149\001\000\197@\144\160\001\003\150\000Z\145\160\160\001\003\151\000H\160\160\001\"\151\001\000\196@\144\160\001\003\152\000Q\144\160\001\003\153\000I\144\160\001\003\154\000K\144\160\001\003\155\000L\144\160\001\003\156\000M\144\160\001\003\157\000N\144\160\001\003\158\000X\144\160\001\003\159\000O\144\160\001\003\160\000P\144\160\001\003\161\000R@\144\160\001\003\163\000S\144\160\001\003\164\000T\145\160\160\001\003\165\000U\160\160\001\"\165\000^@\144\160\001\003\166\000F\144\160\001\003\167\000C\144\160\001\003\168\000Y\144\160\001\003\169\000W@@\145\160\160\001 \172\001\000\160\160\160\001\000\172\001\000\216@@@@\144\160\001\000\176\001\000\176\145\160\160\001\003\177\000a\160\160\001\000\177\001\000\177@\144\160\001\003\178\000b\144\160\001\003\179\000g\144\160\001\003\180\000d\145\160\160\001\003\181\000e\160\160\001!\181\001\000\191@\144\160\001\003\182\000z\144\160\001\003\183\000h\144\160\001\003\184\000q\144\160\001\003\185\000i\144\160\001\003\186\000k\144\160\001\003\187\000l\144\160\001\003\188\000m\144\160\001\003\189\000n\144\160\001\003\190\000x\144\160\001\003\191\000o\144\160\001\003\192\000p\144\160\001\003\193\000r\144\160\001\003\194\000V\144\160\001\003\195\000s\144\160\001\003\196\000t\145\160\160\001\003\197\000u\160\160\001\"\197\001\000\215@\144\160\001\003\198\000f\144\160\001\003\199\000c\144\160\001\003\200\000y\144\160\001\003\201\000w\144\160\001%\202\001\000\224@@@@@\144\160\001!\208\001\000\220\145\160\160\001\003\209\000J\160\160\001!\209\001\000\221@\145\160\160\001\003\210\001\000\161\160\160\001!\210\001\000\222@\144\160\001!\211\001\000\223\144\160\001!\212\001\000\219\144\160\001\003\213\000j\144\160\001\003\214\000v\144\160\001\000\215\001\000\180@\144\160\002\000\000\246\217\001\000\211\144\160\002\000\000\246\218\001\000\210\144\160\002\000\000\246\219\001\000\212@@@@@@@@@\144\160\002\000\000\248\229\000`\144\160\002\000\000\248\230\001\000\189\144\160\002\000\000\248\231\001\000\190\144\160\002\000\000\248\232\001\000\226\144\160\002\000\000\248\233\001\000\227\144\160\002\000\000\248\234\001\000\228\144\160\002\000\000\248\235\001\000\230\144\160\002\000\000\248\236\001\000\231\144\160\002\000\000\248\237\001\000\232\144\160\002\000\000\248\238\001\000\233\144\160\002\000\000\248\239\001\000\234\144\160\002\000\000\248\240\001\000\235\144\160\002\000\000\248\241\001\000\236\144\160\002\000\000\248\242\001\000\237\144\160\002\000\000\248\243\001\000\238\144\160\002\000\000\248\244\001\000\239\144\160\002\000\000\248\245\001\000\244\144\160\002\000\000\248\246\001\000\246\145\160\160\001\000\247\001\000\184\160\160\002\000\000\248\247\001\000\247@\144\160\002\000\000\248\248\001\000\248\144\160\002\000\000\248\249\001\000\249\144\160\002\000\000\248\250\001\000\250\144\160\002\000\000\248\251\001\000\251\144\160\002\000\000\248\252\001\000\252\144\160\002\000\000\248\253\001\000\253\144\160\002\000\000\248\254\001\000\254@" 0 : Netmappings.from_uni_list array);;
+ let adobe_zapf_dingbats_encoding_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\233\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255`\001'\001\001'\002\001'\003\001'\004\001&\014\001'\006\001'\007\001'\008\001'\t\001&\027\001&\030\001'\012\001'\013\001'\014\001'\015\001'\016\001'\017\001'\018\001'\019\001'\020\001'\021\001'\022\001'\023\001'\024\001'\025\001'\026\001'\027\001'\028\001'\029\001'\030\001'\031\001' \001'!\001'\"\001'#\001'$\001'%\001'&\001''\001&\005\001')\001'*\001'+\001',\001'-\001'.\001'/\001'0\001'1\001'2\001'3\001'4\001'5\001'6\001'7\001'8\001'9\001':\001';\001'<\001'=\001'>\001'?\001'@\001'A\001'B\001'C\001'D\001'E\001'F\001'G\001'H\001'I\001'J\001'K\001%\207\001'M\001%\160\001'O\001'P\001'Q\001'R\001%\178\001%\188\001%\198\001'V\001%\215\001'X\001'Y\001'Z\001'[\001'\\\001']\001'^\000\255\002\000\000\248\215\002\000\000\248\216\002\000\000\248\217\002\000\000\248\218\002\000\000\248\219\002\000\000\248\220\002\000\000\248\221\002\000\000\248\222\002\000\000\248\223\002\000\000\248\224\002\000\000\248\225\002\000\000\248\226\002\000\000\248\227\002\000\000\248\228\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001'a\001'b\001'c\001'd\001'e\001'f\001'g\001&c\001&f\001&e\001&`\001$`\001$a\001$b\001$c\001$d\001$e\001$f\001$g\001$h\001$i\001'v\001'w\001'x\001'y\001'z\001'{\001'|\001'}\001'~\001'\127\001'\128\001'\129\001'\130\001'\131\001'\132\001'\133\001'\134\001'\135\001'\136\001'\137\001'\138\001'\139\001'\140\001'\141\001'\142\001'\143\001'\144\001'\145\001'\146\001'\147\001'\148\001!\146\001!\148\001!\149\001'\152\001'\153\001'\154\001'\155\001'\156\001'\157\001'\158\001'\159\001'\160\001'\161\001'\162\001'\163\001'\164\001'\165\001'\166\001'\167\001'\168\001'\169\001'\170\001'\171\001'\172\001'\173\001'\174\001'\175\000\255\001'\177\001'\178\001'\179\001'\180\001'\181\001'\182\001'\183\001'\184\001'\185\001'\186\001'\187\001'\188\001'\189\001'\190\000\255" 0 : int array);;
+let adobe_zapf_dingbats_encoding_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006_\000\000\000\000\000\000\005>\000\000\005>\008\000\004\000\000@\144\160\001'\001a\144\160\001'\002b\144\160\001'\003c\144\160\001'\004d\144\160\001&\005\000H\144\160\001'\006f\144\160\001'\007g\144\160\001'\008h\144\160\001'\ti@@\144\160\001'\012l\144\160\001'\013m\145\160\160\001&\014e\160\160\001'\014n@\144\160\001'\015o\144\160\001'\016p\144\160\001'\017q\144\160\001'\018r\144\160\001'\019s\144\160\001'\020t\144\160\001'\021u\144\160\001'\022v\144\160\001'\023w\144\160\001'\024x\144\160\001'\025y\144\160\001'\026z\145\160\160\001&\027j\160\160\001'\027{@\144\160\001'\028|\144\160\001'\029}\145\160\160\001&\030k\160\160\001'\030~@\144\160\001'\031\127\145\160\160``\160\160\001' \000@@\144\160\001'!\000A\144\160\001'\"\000B\144\160\001'#\000C\144\160\001'$\000D\144\160\001'%\000E\144\160\001'&\000F\144\160\001''\000G@\144\160\001')\000I\144\160\001'*\000J\144\160\001'+\000K\144\160\001',\000L\144\160\001'-\000M\144\160\001'.\000N\144\160\001'/\000O\144\160\001'0\000P\144\160\001'1\000Q\144\160\001'2\000R\144\160\001'3\000S\144\160\001'4\000T\144\160\001'5\000U\144\160\001'6\000V\144\160\001'7\000W\144\160\001'8\000X\144\160\001'9\000Y\144\160\001':\000Z\144\160\001';\000[\144\160\001'<\000\\\144\160\001'=\000]\144\160\001'>\000^\144\160\001'?\000_\144\160\001'@\000`\144\160\001'A\000a\144\160\001'B\000b\144\160\001'C\000c\144\160\001'D\000d\144\160\001'E\000e\144\160\001'F\000f\144\160\001'G\000g\144\160\001'H\000h\144\160\001'I\000i\144\160\001'J\000j\144\160\001'K\000k@\144\160\001'M\000m@\144\160\001'O\000o\144\160\001'P\000p\144\160\001'Q\000q\144\160\001'R\000r@@@\144\160\001'V\000v@\144\160\001'X\000x\144\160\001'Y\000y\144\160\001'Z\000z\144\160\001'[\000{\144\160\001'\\\000|\144\160\001']\000}\144\160\001'^\000~@\145\160\160\001&`\001\000\171\160\160\001$`\001\000\172@\145\160\160\001'a\001\000\161\160\160\001$a\001\000\173@\145\160\160\001'b\001\000\162\160\160\001$b\001\000\174@\145\160\160\001'c\001\000\163\160\160\001&c\001\000\168\160\160\001$c\001\000\175@\145\160\160\001'd\001\000\164\160\160\001$d\001\000\176@\145\160\160\001'e\001\000\165\160\160\001&e\001\000\170\160\160\001$e\001\000\177@\145\160\160\001'f\001\000\166\160\160\001&f\001\000\169\160\160\001$f\001\000\178@\145\160\160\001'g\001\000\167\160\160\001$g\001\000\179@\144\160\001$h\001\000\180\144\160\001$i\001\000\181@@@@@@@@@@@@\144\160\001'v\001\000\182\144\160\001'w\001\000\183\144\160\001'x\001\000\184\144\160\001'y\001\000\185\144\160\001'z\001\000\186\144\160\001'{\001\000\187\144\160\001'|\001\000\188\144\160\001'}\001\000\189\144\160\001'~\001\000\190\144\160\001'\127\001\000\191\144\160\001'\128\001\000\192\144\160\001'\129\001\000\193\144\160\001'\130\001\000\194\144\160\001'\131\001\000\195\144\160\001'\132\001\000\196\144\160\001'\133\001\000\197\144\160\001'\134\001\000\198\144\160\001'\135\001\000\199\144\160\001'\136\001\000\200\144\160\001'\137\001\000\201\144\160\001'\138\001\000\202\144\160\001'\139\001\000\203\144\160\001'\140\001\000\204\144\160\001'\141\001\000\205\144\160\001'\142\001\000\206\144\160\001'\143\001\000\207\144\160\001'\144\001\000\208\144\160\001'\145\001\000\209\145\160\160\001'\146\001\000\210\160\160\001!\146\001\000\213@\144\160\001'\147\001\000\211\145\160\160\001'\148\001\000\212\160\160\001!\148\001\000\214@\144\160\001!\149\001\000\215@@\144\160\001'\152\001\000\216\144\160\001'\153\001\000\217\144\160\001'\154\001\000\218\144\160\001'\155\001\000\219\144\160\001'\156\001\000\220\144\160\001'\157\001\000\221\144\160\001'\158\001\000\222\144\160\001'\159\001\000\223\145\160\160\001%\160\000n\160\160\001'\160\001\000\224@\144\160\001'\161\001\000\225\144\160\001'\162\001\000\226\144\160\001'\163\001\000\227\144\160\001'\164\001\000\228\144\160\001'\165\001\000\229\144\160\001'\166\001\000\230\144\160\001'\167\001\000\231\144\160\001'\168\001\000\232\144\160\001'\169\001\000\233\144\160\001'\170\001\000\234\144\160\001'\171\001\000\235\144\160\001'\172\001\000\236\144\160\001'\173\001\000\237\144\160\001'\174\001\000\238\144\160\001'\175\001\000\239@\144\160\001'\177\001\000\241\145\160\160\001%\178\000s\160\160\001'\178\001\000\242@\144\160\001'\179\001\000\243\144\160\001'\180\001\000\244\144\160\001'\181\001\000\245\144\160\001'\182\001\000\246\144\160\001'\183\001\000\247\144\160\001'\184\001\000\248\144\160\001'\185\001\000\249\144\160\001'\186\001\000\250\144\160\001'\187\001\000\251\145\160\160\001%\188\000t\160\160\001'\188\001\000\252@\144\160\001'\189\001\000\253\144\160\001'\190\001\000\254@@@@@@@\144\160\001%\198\000u@@@@@@@@\144\160\001%\207\000l@@@@@@@\145\160\160\001%\215\000w\160\160\002\000\000\248\215\001\000\128@\144\160\002\000\000\248\216\001\000\129\144\160\002\000\000\248\217\001\000\130\144\160\002\000\000\248\218\001\000\131\144\160\002\000\000\248\219\001\000\132\144\160\002\000\000\248\220\001\000\133\144\160\002\000\000\248\221\001\000\134\144\160\002\000\000\248\222\001\000\135\144\160\002\000\000\248\223\001\000\136\144\160\002\000\000\248\224\001\000\137\144\160\002\000\000\248\225\001\000\138\144\160\002\000\000\248\226\001\000\139\144\160\002\000\000\248\227\001\000\140\144\160\002\000\000\248\228\001\000\141@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let jis0201_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\164\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\001\000\165\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\001 >\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\002\000\000\255a\002\000\000\255b\002\000\000\255c\002\000\000\255d\002\000\000\255e\002\000\000\255f\002\000\000\255g\002\000\000\255h\002\000\000\255i\002\000\000\255j\002\000\000\255k\002\000\000\255l\002\000\000\255m\002\000\000\255n\002\000\000\255o\002\000\000\255p\002\000\000\255q\002\000\000\255r\002\000\000\255s\002\000\000\255t\002\000\000\255u\002\000\000\255v\002\000\000\255w\002\000\000\255x\002\000\000\255y\002\000\000\255z\002\000\000\255{\002\000\000\255|\002\000\000\255}\002\000\000\255~\002\000\000\255\127\002\000\000\255\128\002\000\000\255\129\002\000\000\255\130\002\000\000\255\131\002\000\000\255\132\002\000\000\255\133\002\000\000\255\134\002\000\000\255\135\002\000\000\255\136\002\000\000\255\137\002\000\000\255\138\002\000\000\255\139\002\000\000\255\140\002\000\000\255\141\002\000\000\255\142\002\000\000\255\143\002\000\000\255\144\002\000\000\255\145\002\000\000\255\146\002\000\000\255\147\002\000\000\255\148\002\000\000\255\149\002\000\000\255\150\002\000\000\255\151\002\000\000\255\152\002\000\000\255\153\002\000\000\255\154\002\000\000\255\155\002\000\000\255\156\002\000\000\255\157\002\000\000\255\158\002\000\000\255\159\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255" 0 : int array);;
+let jis0201_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0053\000\000\000\000\000\000\004\143\000\000\004\143\008\000\004\000\000@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\145\160\160~~\160\160\001 >\000~@\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[@\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\145\160\160\000a\000a\160\160\002\000\000\255a\001\000\161@\145\160\160\000b\000b\160\160\002\000\000\255b\001\000\162@\145\160\160\000c\000c\160\160\002\000\000\255c\001\000\163@\145\160\160\000d\000d\160\160\002\000\000\255d\001\000\164@\145\160\160\000e\000e\160\160\002\000\000\255e\001\000\165@\145\160\160\000f\000f\160\160\002\000\000\255f\001\000\166@\145\160\160\000g\000g\160\160\002\000\000\255g\001\000\167@\145\160\160\000h\000h\160\160\002\000\000\255h\001\000\168@\145\160\160\000i\000i\160\160\002\000\000\255i\001\000\169@\145\160\160\000j\000j\160\160\002\000\000\255j\001\000\170@\145\160\160\000k\000k\160\160\002\000\000\255k\001\000\171@\145\160\160\000l\000l\160\160\002\000\000\255l\001\000\172@\145\160\160\000m\000m\160\160\002\000\000\255m\001\000\173@\145\160\160\000n\000n\160\160\002\000\000\255n\001\000\174@\145\160\160\000o\000o\160\160\002\000\000\255o\001\000\175@\145\160\160\000p\000p\160\160\002\000\000\255p\001\000\176@\145\160\160\000q\000q\160\160\002\000\000\255q\001\000\177@\145\160\160\000r\000r\160\160\002\000\000\255r\001\000\178@\145\160\160\000s\000s\160\160\002\000\000\255s\001\000\179@\145\160\160\000t\000t\160\160\002\000\000\255t\001\000\180@\145\160\160\000u\000u\160\160\002\000\000\255u\001\000\181@\145\160\160\000v\000v\160\160\002\000\000\255v\001\000\182@\145\160\160\000w\000w\160\160\002\000\000\255w\001\000\183@\145\160\160\000x\000x\160\160\002\000\000\255x\001\000\184@\145\160\160\000y\000y\160\160\002\000\000\255y\001\000\185@\145\160\160\000z\000z\160\160\002\000\000\255z\001\000\186@\145\160\160\000{\000{\160\160\002\000\000\255{\001\000\187@\145\160\160\000|\000|\160\160\002\000\000\255|\001\000\188@\145\160\160\000}\000}\160\160\002\000\000\255}\001\000\189@\144\160\002\000\000\255~\001\000\190\144\160\002\000\000\255\127\001\000\191\144\160\002\000\000\255\128\001\000\192\144\160\002\000\000\255\129\001\000\193\144\160\002\000\000\255\130\001\000\194\144\160\002\000\000\255\131\001\000\195\144\160\002\000\000\255\132\001\000\196\144\160\002\000\000\255\133\001\000\197\144\160\002\000\000\255\134\001\000\198\144\160\002\000\000\255\135\001\000\199\144\160\002\000\000\255\136\001\000\200\144\160\002\000\000\255\137\001\000\201\144\160\002\000\000\255\138\001\000\202\144\160\002\000\000\255\139\001\000\203\144\160\002\000\000\255\140\001\000\204\144\160\002\000\000\255\141\001\000\205\144\160\002\000\000\255\142\001\000\206\144\160\002\000\000\255\143\001\000\207\144\160\002\000\000\255\144\001\000\208\144\160\002\000\000\255\145\001\000\209\144\160\002\000\000\255\146\001\000\210\144\160\002\000\000\255\147\001\000\211\144\160\002\000\000\255\148\001\000\212\144\160\002\000\000\255\149\001\000\213\144\160\002\000\000\255\150\001\000\214\144\160\002\000\000\255\151\001\000\215\144\160\002\000\000\255\152\001\000\216\144\160\002\000\000\255\153\001\000\217\144\160\002\000\000\255\154\001\000\218\144\160\002\000\000\255\155\001\000\219\144\160\002\000\000\255\156\001\000\220\144\160\002\000\000\255\157\001\000\221\144\160\002\000\000\255\158\001\000\222\144\160\002\000\000\255\159\001\000\223@@@@@\144\160\001\000\165\000\\@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let koi8r_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001%\000\001%\002\001%\012\001%\016\001%\020\001%\024\001%\028\001%$\001%,\001%4\001%<\001%\128\001%\132\001%\136\001%\140\001%\144\001%\145\001%\146\001%\147\001# \001%\160\001\"\025\001\"\026\001\"H\001\"d\001\"e\001\000\160\001#!\001\000\176\001\000\178\001\000\183\001\000\247\001%P\001%Q\001%R\001\004Q\001%S\001%T\001%U\001%V\001%W\001%X\001%Y\001%Z\001%[\001%\\\001%]\001%^\001%_\001%`\001%a\001\004\001\001%b\001%c\001%d\001%e\001%f\001%g\001%h\001%i\001%j\001%k\001%l\001\000\169\001\004N\001\0040\001\0041\001\004F\001\0044\001\0045\001\004D\001\0043\001\004E\001\0048\001\0049\001\004:\001\004;\001\004<\001\004=\001\004>\001\004?\001\004O\001\004@\001\004A\001\004B\001\004C\001\0046\001\0042\001\004L\001\004K\001\0047\001\004H\001\004M\001\004I\001\004G\001\004J\001\004.\001\004\016\001\004\017\001\004&\001\004\020\001\004\021\001\004$\001\004\019\001\004%\001\004\024\001\004\025\001\004\026\001\004\027\001\004\028\001\004\029\001\004\030\001\004\031\001\004/\001\004 \001\004!\001\004\"\001\004#\001\004\022\001\004\018\001\004,\001\004+\001\004\023\001\004(\001\004-\001\004)\001\004'\001\004*" 0 : int array);;
+let koi8r_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\187\000\000\000\000\000\000\007\153\000\000\007\153\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\128@\145\160\160AA\160\160\001\004\001\001\000\179@\145\160\160BB\160\160\001%\002\001\000\129@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\130@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\131\160\160\001\004\016\001\000\225@\145\160\160QQ\160\160\001\004\017\001\000\226@\145\160\160RR\160\160\001\004\018\001\000\247@\145\160\160SS\160\160\001\004\019\001\000\231@\145\160\160TT\160\160\001%\020\001\000\132\160\160\001\004\020\001\000\228@\145\160\160UU\160\160\001\004\021\001\000\229@\145\160\160VV\160\160\001\004\022\001\000\246@\145\160\160WW\160\160\001\004\023\001\000\250@\145\160\160XX\160\160\001%\024\001\000\133\160\160\001\004\024\001\000\233@\145\160\160YY\160\160\001\"\025\001\000\149\160\160\001\004\025\001\000\234@\145\160\160ZZ\160\160\001\"\026\001\000\150\160\160\001\004\026\001\000\235@\145\160\160[[\160\160\001\004\027\001\000\236@\145\160\160\\\\\160\160\001%\028\001\000\134\160\160\001\004\028\001\000\237@\145\160\160]]\160\160\001\004\029\001\000\238@\145\160\160^^\160\160\001\004\030\001\000\239@\145\160\160__\160\160\001\004\031\001\000\240@\145\160\160``\160\160\001# \001\000\147\160\160\001\004 \001\000\242@\145\160\160aa\160\160\001#!\001\000\155\160\160\001\004!\001\000\243@\145\160\160bb\160\160\001\004\"\001\000\244@\145\160\160cc\160\160\001\004#\001\000\245@\145\160\160dd\160\160\001%$\001\000\135\160\160\001\004$\001\000\230@\145\160\160ee\160\160\001\004%\001\000\232@\145\160\160ff\160\160\001\004&\001\000\227@\145\160\160gg\160\160\001\004'\001\000\254@\145\160\160hh\160\160\001\004(\001\000\251@\145\160\160ii\160\160\001\004)\001\000\253@\145\160\160jj\160\160\001\004*\001\000\255@\145\160\160kk\160\160\001\004+\001\000\249@\145\160\160ll\160\160\001%,\001\000\136\160\160\001\004,\001\000\248@\145\160\160mm\160\160\001\004-\001\000\252@\145\160\160nn\160\160\001\004.\001\000\224@\145\160\160oo\160\160\001\004/\001\000\241@\145\160\160pp\160\160\001\0040\001\000\193@\145\160\160qq\160\160\001\0041\001\000\194@\145\160\160rr\160\160\001\0042\001\000\215@\145\160\160ss\160\160\001\0043\001\000\199@\145\160\160tt\160\160\001%4\001\000\137\160\160\001\0044\001\000\196@\145\160\160uu\160\160\001\0045\001\000\197@\145\160\160vv\160\160\001\0046\001\000\214@\145\160\160ww\160\160\001\0047\001\000\218@\145\160\160xx\160\160\001\0048\001\000\201@\145\160\160yy\160\160\001\0049\001\000\202@\145\160\160zz\160\160\001\004:\001\000\203@\145\160\160{{\160\160\001\004;\001\000\204@\145\160\160||\160\160\001%<\001\000\138\160\160\001\004<\001\000\205@\145\160\160}}\160\160\001\004=\001\000\206@\145\160\160~~\160\160\001\004>\001\000\207@\145\160\160\127\127\160\160\001\004?\001\000\208@\145\160\160\000@\000@\160\160\001\004@\001\000\210@\145\160\160\000A\000A\160\160\001\004A\001\000\211@\145\160\160\000B\000B\160\160\001\004B\001\000\212@\145\160\160\000C\000C\160\160\001\004C\001\000\213@\145\160\160\000D\000D\160\160\001\004D\001\000\198@\145\160\160\000E\000E\160\160\001\004E\001\000\200@\145\160\160\000F\000F\160\160\001\004F\001\000\195@\145\160\160\000G\000G\160\160\001\004G\001\000\222@\145\160\160\000H\000H\160\160\001\"H\001\000\151\160\160\001\004H\001\000\219@\145\160\160\000I\000I\160\160\001\004I\001\000\221@\145\160\160\000J\000J\160\160\001\004J\001\000\223@\145\160\160\000K\000K\160\160\001\004K\001\000\217@\145\160\160\000L\000L\160\160\001\004L\001\000\216@\145\160\160\000M\000M\160\160\001\004M\001\000\220@\145\160\160\000N\000N\160\160\001\004N\001\000\192@\145\160\160\000O\000O\160\160\001\004O\001\000\209@\145\160\160\000P\000P\160\160\001%P\001\000\160@\145\160\160\000Q\000Q\160\160\001%Q\001\000\161\160\160\001\004Q\001\000\163@\145\160\160\000R\000R\160\160\001%R\001\000\162@\145\160\160\000S\000S\160\160\001%S\001\000\164@\145\160\160\000T\000T\160\160\001%T\001\000\165@\145\160\160\000U\000U\160\160\001%U\001\000\166@\145\160\160\000V\000V\160\160\001%V\001\000\167@\145\160\160\000W\000W\160\160\001%W\001\000\168@\145\160\160\000X\000X\160\160\001%X\001\000\169@\145\160\160\000Y\000Y\160\160\001%Y\001\000\170@\145\160\160\000Z\000Z\160\160\001%Z\001\000\171@\145\160\160\000[\000[\160\160\001%[\001\000\172@\145\160\160\000\\\000\\\160\160\001%\\\001\000\173@\145\160\160\000]\000]\160\160\001%]\001\000\174@\145\160\160\000^\000^\160\160\001%^\001\000\175@\145\160\160\000_\000_\160\160\001%_\001\000\176@\145\160\160\000`\000`\160\160\001%`\001\000\177@\145\160\160\000a\000a\160\160\001%a\001\000\178@\145\160\160\000b\000b\160\160\001%b\001\000\180@\145\160\160\000c\000c\160\160\001%c\001\000\181@\145\160\160\000d\000d\160\160\001\"d\001\000\152\160\160\001%d\001\000\182@\145\160\160\000e\000e\160\160\001\"e\001\000\153\160\160\001%e\001\000\183@\145\160\160\000f\000f\160\160\001%f\001\000\184@\145\160\160\000g\000g\160\160\001%g\001\000\185@\145\160\160\000h\000h\160\160\001%h\001\000\186@\145\160\160\000i\000i\160\160\001%i\001\000\187@\145\160\160\000j\000j\160\160\001%j\001\000\188@\145\160\160\000k\000k\160\160\001%k\001\000\189@\145\160\160\000l\000l\160\160\001%l\001\000\190@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\139@@@\144\160\001%\132\001\000\140@@@\144\160\001%\136\001\000\141@@@\144\160\001%\140\001\000\142@@@\144\160\001%\144\001\000\143\144\160\001%\145\001\000\144\144\160\001%\146\001\000\145\144\160\001%\147\001\000\146@@@@@@@@@@@@\145\160\160\001%\160\001\000\148\160\160\001\000\160\001\000\154@@@@@@@@@\144\160\001\000\169\001\000\191@@@@@@\144\160\001\000\176\001\000\156@\144\160\001\000\178\001\000\157@@@@\144\160\001\000\183\001\000\158@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\144\160\001\000\247\001\000\159@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let macroman_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002k\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\255\001\000\196\001\000\197\001\000\199\001\000\201\001\000\209\001\000\214\001\000\220\001\000\225\001\000\224\001\000\226\001\000\228\001\000\227\001\000\229\001\000\231\001\000\233\001\000\232\001\000\234\001\000\235\001\000\237\001\000\236\001\000\238\001\000\239\001\000\241\001\000\243\001\000\242\001\000\244\001\000\246\001\000\245\001\000\250\001\000\249\001\000\251\001\000\252\001 \001\000\176\001\000\162\001\000\163\001\000\167\001 \"\001\000\182\001\000\223\001\000\174\001\000\169\001!\"\001\000\180\001\000\168\001\"`\001\000\198\001\000\216\001\"\030\001\000\177\001\"d\001\"e\001\000\165\001\000\181\001\"\002\001\"\017\001\"\015\001\003\192\001\"+\001\000\170\001\000\186\001\003\169\001\000\230\001\000\248\001\000\191\001\000\161\001\000\172\001\"\026\001\001\146\001\"H\001\"\006\001\000\171\001\000\187\001 &\001\000\160\001\000\192\001\000\195\001\000\213\001\001R\001\001S\001 \019\001 \020\001 \028\001 \029\001 \024\001 \025\001\000\247\001%\202\001\000\255\001\001x\001 D\001 \172\001 9\001 :\002\000\000\251\001\002\000\000\251\002\001 !\001\000\183\001 \026\001 \030\001 0\001\000\194\001\000\202\001\000\193\001\000\203\001\000\200\001\000\205\001\000\206\001\000\207\001\000\204\001\000\211\001\000\212\002\000\000\248\255\001\000\210\001\000\218\001\000\219\001\000\217\001\0011\001\002\198\001\002\220\001\000\175\001\002\216\001\002\217\001\002\218\001\000\184\001\002\221\001\002\219\001\002\199" 0 : int array);;
+let macroman_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\135\000\000\000\000\000\000\005\221\000\000\005\221\008\000\004\000\000@\144\160\002\000\000\251\001\001\000\222\145\160\160\001\"\002\001\000\182\160\160\002\000\000\251\002\001\000\223@@@@\144\160\001\"\006\001\000\198@@@@@@@@\144\160\001\"\015\001\000\184@\144\160\001\"\017\001\000\183@\144\160\001 \019\001\000\208\144\160\001 \020\001\000\209@@@\144\160\001 \024\001\000\212\144\160\001 \025\001\000\213\145\160\160\001\"\026\001\000\195\160\160\001 \026\001\000\226@@\144\160\001 \028\001\000\210\144\160\001 \029\001\000\211\145\160\160\001\"\030\001\000\176\160\160\001 \030\001\000\227@@\145\160\160``\160\160\001 \001\000\160@\145\160\160aa\160\160\001 !\001\000\224@\145\160\160bb\160\160\001 \"\001\000\165\160\160\001!\"\001\000\170@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\201@\144\160gg\144\160hh\144\160ii\144\160jj\145\160\160kk\160\160\001\"+\001\000\186@\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\228@\145\160\160qq\160\160\001\0011\001\000\245@\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\220@\145\160\160zz\160\160\001 :\001\000\221@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\145\160\160\000D\000D\160\160\001 D\001\000\218@\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\197@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\145\160\160\000R\000R\160\160\001\001R\001\000\206@\145\160\160\000S\000S\160\160\001\001S\001\000\207@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\"`\001\000\173@\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\145\160\160\000d\000d\160\160\001\"d\001\000\178@\145\160\160\000e\000e\160\160\001\"e\001\000\179@\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\145\160\160\000x\000x\160\160\001\001x\001\000\217@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~@@@@@@@@@@@@@@@@@@@\144\160\001\001\146\001\000\196@@@@@@@@@@@@@\144\160\001\000\160\001\000\202\144\160\001\000\161\001\000\193\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163@\144\160\001\000\165\001\000\180@\144\160\001\000\167\001\000\164\144\160\001\000\168\001\000\172\145\160\160\001\000\169\001\000\169\160\160\001\003\169\001\000\189@\144\160\001\000\170\001\000\187\144\160\001\000\171\001\000\199\145\160\160\001\000\172\001\000\194\160\160\001 \172\001\000\219@@\144\160\001\000\174\001\000\168\144\160\001\000\175\001\000\248\144\160\001\000\176\001\000\161\144\160\001\000\177\001\000\177@@\144\160\001\000\180\001\000\171\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\166\144\160\001\000\183\001\000\225\144\160\001\000\184\001\000\252@\144\160\001\000\186\001\000\188\144\160\001\000\187\001\000\200@@@\144\160\001\000\191\001\000\192\145\160\160\001\003\192\001\000\185\160\160\001\000\192\001\000\203@\144\160\001\000\193\001\000\231\144\160\001\000\194\001\000\229\144\160\001\000\195\001\000\204\144\160\001\000\196\001\000\128\144\160\001\000\197\001\000\129\145\160\160\001\000\198\001\000\174\160\160\001\002\198\001\000\246@\145\160\160\001\000\199\001\000\130\160\160\001\002\199\001\000\255@\144\160\001\000\200\001\000\233\144\160\001\000\201\001\000\131\145\160\160\001%\202\001\000\215\160\160\001\000\202\001\000\230@\144\160\001\000\203\001\000\232\144\160\001\000\204\001\000\237\144\160\001\000\205\001\000\234\144\160\001\000\206\001\000\235\144\160\001\000\207\001\000\236@\144\160\001\000\209\001\000\132\144\160\001\000\210\001\000\241\144\160\001\000\211\001\000\238\144\160\001\000\212\001\000\239\144\160\001\000\213\001\000\205\144\160\001\000\214\001\000\133@\145\160\160\001\000\216\001\000\175\160\160\001\002\216\001\000\249@\145\160\160\001\000\217\001\000\244\160\160\001\002\217\001\000\250@\145\160\160\001\000\218\001\000\242\160\160\001\002\218\001\000\251@\145\160\160\001\000\219\001\000\243\160\160\001\002\219\001\000\254@\145\160\160\001\000\220\001\000\134\160\160\001\002\220\001\000\247@\144\160\001\002\221\001\000\253@\144\160\001\000\223\001\000\167\144\160\001\000\224\001\000\136\144\160\001\000\225\001\000\135\144\160\001\000\226\001\000\137\144\160\001\000\227\001\000\139\144\160\001\000\228\001\000\138\144\160\001\000\229\001\000\140\144\160\001\000\230\001\000\190\144\160\001\000\231\001\000\141\144\160\001\000\232\001\000\143\144\160\001\000\233\001\000\142\144\160\001\000\234\001\000\144\144\160\001\000\235\001\000\145\144\160\001\000\236\001\000\147\144\160\001\000\237\001\000\146\144\160\001\000\238\001\000\148\144\160\001\000\239\001\000\149@\144\160\001\000\241\001\000\150\144\160\001\000\242\001\000\152\144\160\001\000\243\001\000\151\144\160\001\000\244\001\000\153\144\160\001\000\245\001\000\155\144\160\001\000\246\001\000\154\144\160\001\000\247\001\000\214\144\160\001\000\248\001\000\191\144\160\001\000\249\001\000\157\144\160\001\000\250\001\000\156\144\160\001\000\251\001\000\158\144\160\001\000\252\001\000\159@@\145\160\160\001\000\255\001\000\216\160\160\002\000\000\248\255\001\000\240@" 0 : Netmappings.from_uni_list array);;
+ let windows1250_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002@\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\000\255\001 \030\001 &\001 \001 !\000\255\001 0\001\001`\001 9\001\001Z\001\001d\001\001}\001\001y\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\000\255\001!\"\001\001a\001 :\001\001[\001\001e\001\001~\001\001z\001\000\160\001\002\199\001\002\216\001\001A\001\000\164\001\001\004\001\000\166\001\000\167\001\000\168\001\000\169\001\001^\001\000\171\001\000\172\001\000\173\001\000\174\001\001{\001\000\176\001\000\177\001\002\219\001\001B\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\001\005\001\001_\001\000\187\001\001=\001\002\221\001\001>\001\001|\001\001T\001\000\193\001\000\194\001\001\002\001\000\196\001\0019\001\001\006\001\000\199\001\001\012\001\000\201\001\001\024\001\000\203\001\001\026\001\000\205\001\000\206\001\001\014\001\001\016\001\001C\001\001G\001\000\211\001\000\212\001\001P\001\000\214\001\000\215\001\001X\001\001n\001\000\218\001\001p\001\000\220\001\000\221\001\001b\001\000\223\001\001U\001\000\225\001\000\226\001\001\003\001\000\228\001\001:\001\001\007\001\000\231\001\001\013\001\000\233\001\001\025\001\000\235\001\001\027\001\000\237\001\000\238\001\001\015\001\001\017\001\001D\001\001H\001\000\243\001\000\244\001\001Q\001\000\246\001\000\247\001\001Y\001\001o\001\000\250\001\001q\001\000\252\001\000\253\001\001c\001\002\217" 0 : int array);;
+let windows1250_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007+\000\000\000\000\000\000\006\242\000\000\006\242\008\000\004\000\000\144\160@@\144\160AA\145\160\160BB\160\160\001\001\002\001\000\195@\145\160\160CC\160\160\001\001\003\001\000\227@\145\160\160DD\160\160\001\001\004\001\000\165@\145\160\160EE\160\160\001\001\005\001\000\185@\145\160\160FF\160\160\001\001\006\001\000\198@\145\160\160GG\160\160\001\001\007\001\000\230@\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\200@\145\160\160MM\160\160\001\001\013\001\000\232@\145\160\160NN\160\160\001\001\014\001\000\207@\145\160\160OO\160\160\001\001\015\001\000\239@\145\160\160PP\160\160\001\001\016\001\000\208@\145\160\160QQ\160\160\001\001\017\001\000\240@\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145\160\160\001\001\024\001\000\202@\145\160\160YY\160\160\001 \025\001\000\146\160\160\001\001\025\001\000\234@\145\160\160ZZ\160\160\001 \026\001\000\130\160\160\001\001\026\001\000\204@\145\160\160[[\160\160\001\001\027\001\000\236@\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\144\160__\145\160\160``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139\160\160\001\0019\001\000\197@\145\160\160zz\160\160\001 :\001\000\155\160\160\001\001:\001\000\229@\144\160{{\144\160||\145\160\160}}\160\160\001\001=\001\000\188@\145\160\160~~\160\160\001\001>\001\000\190@\144\160\127\127\144\160\000@\000@\145\160\160\000A\000A\160\160\001\001A\001\000\163@\145\160\160\000B\000B\160\160\001\001B\001\000\179@\145\160\160\000C\000C\160\160\001\001C\001\000\209@\145\160\160\000D\000D\160\160\001\001D\001\000\241@\144\160\000E\000E\144\160\000F\000F\145\160\160\000G\000G\160\160\001\001G\001\000\210@\145\160\160\000H\000H\160\160\001\001H\001\000\242@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001\001P\001\000\213@\145\160\160\000Q\000Q\160\160\001\001Q\001\000\245@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001\001T\001\000\192@\145\160\160\000U\000U\160\160\001\001U\001\000\224@\144\160\000V\000V\144\160\000W\000W\145\160\160\000X\000X\160\160\001\001X\001\000\216@\145\160\160\000Y\000Y\160\160\001\001Y\001\000\248@\145\160\160\000Z\000Z\160\160\001\001Z\001\000\140@\145\160\160\000[\000[\160\160\001\001[\001\000\156@\144\160\000\\\000\\\144\160\000]\000]\145\160\160\000^\000^\160\160\001\001^\001\000\170@\145\160\160\000_\000_\160\160\001\001_\001\000\186@\145\160\160\000`\000`\160\160\001\001`\001\000\138@\145\160\160\000a\000a\160\160\001\001a\001\000\154@\145\160\160\000b\000b\160\160\001\001b\001\000\222@\145\160\160\000c\000c\160\160\001\001c\001\000\254@\145\160\160\000d\000d\160\160\001\001d\001\000\141@\145\160\160\000e\000e\160\160\001\001e\001\000\157@\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\145\160\160\000n\000n\160\160\001\001n\001\000\217@\145\160\160\000o\000o\160\160\001\001o\001\000\249@\145\160\160\000p\000p\160\160\001\001p\001\000\219@\145\160\160\000q\000q\160\160\001\001q\001\000\251@\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\143@\145\160\160\000z\000z\160\160\001\001z\001\000\159@\145\160\160\000{\000{\160\160\001\001{\001\000\175@\145\160\160\000|\000|\160\160\001\001|\001\000\191@\145\160\160\000}\000}\160\160\001\001}\001\000\142@\145\160\160\000~\000~\160\160\001\001~\001\000\158@\144\160\000\127\000\127@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\144\160\001\000\160\001\000\160@@@\144\160\001\000\164\001\000\164@\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169@\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174@\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177@@\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184@@\144\160\001\000\187\001\000\187@@@@@\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194@\144\160\001\000\196\001\000\196@@\145\160\160\001\002\199\001\000\161\160\160\001\000\199\001\000\199@@\144\160\001\000\201\001\000\201@\144\160\001\000\203\001\000\203@\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206@@@@\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212@\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\002\216\001\000\162\144\160\001\002\217\001\000\255\144\160\001\000\218\001\000\218\144\160\001\002\219\001\000\178\144\160\001\000\220\001\000\220\145\160\160\001\002\221\001\000\189\160\160\001\000\221\001\000\221@@\144\160\001\000\223\001\000\223@\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226@\144\160\001\000\228\001\000\228@@\144\160\001\000\231\001\000\231@\144\160\001\000\233\001\000\233@\144\160\001\000\235\001\000\235@\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238@@@@\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244@\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247@@\144\160\001\000\250\001\000\250@\144\160\001\000\252\001\000\252\144\160\001\000\253\001\000\253@@" 0 : Netmappings.from_uni_list array);;
+ let windows1251_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002D\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\004\002\001\004\003\001 \026\001\004S\001 \030\001 &\001 \001 !\001 \172\001 0\001\004\t\001 9\001\004\n\001\004\012\001\004\011\001\004\015\001\004R\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\000\255\001!\"\001\004Y\001 :\001\004Z\001\004\\\001\004[\001\004_\001\000\160\001\004\014\001\004^\001\004\008\001\000\164\001\004\144\001\000\166\001\000\167\001\004\001\001\000\169\001\004\004\001\000\171\001\000\172\001\000\173\001\000\174\001\004\007\001\000\176\001\000\177\001\004\006\001\004V\001\004\145\001\000\181\001\000\182\001\000\183\001\004Q\001!\022\001\004T\001\000\187\001\004X\001\004\005\001\004U\001\004W\001\004\016\001\004\017\001\004\018\001\004\019\001\004\020\001\004\021\001\004\022\001\004\023\001\004\024\001\004\025\001\004\026\001\004\027\001\004\028\001\004\029\001\004\030\001\004\031\001\004 \001\004!\001\004\"\001\004#\001\004$\001\004%\001\004&\001\004'\001\004(\001\004)\001\004*\001\004+\001\004,\001\004-\001\004.\001\004/\001\0040\001\0041\001\0042\001\0043\001\0044\001\0045\001\0046\001\0047\001\0048\001\0049\001\004:\001\004;\001\004<\001\004=\001\004>\001\004?\001\004@\001\004A\001\004B\001\004C\001\004D\001\004E\001\004F\001\004G\001\004H\001\004I\001\004J\001\004K\001\004L\001\004M\001\004N\001\004O" 0 : int array);;
+let windows1251_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\166\000\000\000\000\000\000\007\129\000\000\007\129\008\000\004\000\000\144\160@@\145\160\160AA\160\160\001\004\001\001\000\168@\145\160\160BB\160\160\001\004\002\001\000\128@\145\160\160CC\160\160\001\004\003\001\000\129@\145\160\160DD\160\160\001\004\004\001\000\170@\145\160\160EE\160\160\001\004\005\001\000\189@\145\160\160FF\160\160\001\004\006\001\000\178@\145\160\160GG\160\160\001\004\007\001\000\175@\145\160\160HH\160\160\001\004\008\001\000\163@\145\160\160II\160\160\001\004\t\001\000\138@\145\160\160JJ\160\160\001\004\n\001\000\140@\145\160\160KK\160\160\001\004\011\001\000\142@\145\160\160LL\160\160\001\004\012\001\000\141@\144\160MM\145\160\160NN\160\160\001\004\014\001\000\161@\145\160\160OO\160\160\001\004\015\001\000\143@\145\160\160PP\160\160\001\004\016\001\000\192@\145\160\160QQ\160\160\001\004\017\001\000\193@\145\160\160RR\160\160\001\004\018\001\000\194@\145\160\160SS\160\160\001 \019\001\000\150\160\160\001\004\019\001\000\195@\145\160\160TT\160\160\001 \020\001\000\151\160\160\001\004\020\001\000\196@\145\160\160UU\160\160\001\004\021\001\000\197@\145\160\160VV\160\160\001!\022\001\000\185\160\160\001\004\022\001\000\198@\145\160\160WW\160\160\001\004\023\001\000\199@\145\160\160XX\160\160\001 \024\001\000\145\160\160\001\004\024\001\000\200@\145\160\160YY\160\160\001 \025\001\000\146\160\160\001\004\025\001\000\201@\145\160\160ZZ\160\160\001 \026\001\000\130\160\160\001\004\026\001\000\202@\145\160\160[[\160\160\001\004\027\001\000\203@\145\160\160\\\\\160\160\001 \028\001\000\147\160\160\001\004\028\001\000\204@\145\160\160]]\160\160\001 \029\001\000\148\160\160\001\004\029\001\000\205@\145\160\160^^\160\160\001 \030\001\000\132\160\160\001\004\030\001\000\206@\145\160\160__\160\160\001\004\031\001\000\207@\145\160\160``\160\160\001 \001\000\134\160\160\001\004 \001\000\208@\145\160\160aa\160\160\001 !\001\000\135\160\160\001\004!\001\000\209@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153\160\160\001\004\"\001\000\210@\145\160\160cc\160\160\001\004#\001\000\211@\145\160\160dd\160\160\001\004$\001\000\212@\145\160\160ee\160\160\001\004%\001\000\213@\145\160\160ff\160\160\001 &\001\000\133\160\160\001\004&\001\000\214@\145\160\160gg\160\160\001\004'\001\000\215@\145\160\160hh\160\160\001\004(\001\000\216@\145\160\160ii\160\160\001\004)\001\000\217@\145\160\160jj\160\160\001\004*\001\000\218@\145\160\160kk\160\160\001\004+\001\000\219@\145\160\160ll\160\160\001\004,\001\000\220@\145\160\160mm\160\160\001\004-\001\000\221@\145\160\160nn\160\160\001\004.\001\000\222@\145\160\160oo\160\160\001\004/\001\000\223@\145\160\160pp\160\160\001 0\001\000\137\160\160\001\0040\001\000\224@\145\160\160qq\160\160\001\0041\001\000\225@\145\160\160rr\160\160\001\0042\001\000\226@\145\160\160ss\160\160\001\0043\001\000\227@\145\160\160tt\160\160\001\0044\001\000\228@\145\160\160uu\160\160\001\0045\001\000\229@\145\160\160vv\160\160\001\0046\001\000\230@\145\160\160ww\160\160\001\0047\001\000\231@\145\160\160xx\160\160\001\0048\001\000\232@\145\160\160yy\160\160\001 9\001\000\139\160\160\001\0049\001\000\233@\145\160\160zz\160\160\001 :\001\000\155\160\160\001\004:\001\000\234@\145\160\160{{\160\160\001\004;\001\000\235@\145\160\160||\160\160\001\004<\001\000\236@\145\160\160}}\160\160\001\004=\001\000\237@\145\160\160~~\160\160\001\004>\001\000\238@\145\160\160\127\127\160\160\001\004?\001\000\239@\145\160\160\000@\000@\160\160\001\004@\001\000\240@\145\160\160\000A\000A\160\160\001\004A\001\000\241@\145\160\160\000B\000B\160\160\001\004B\001\000\242@\145\160\160\000C\000C\160\160\001\004C\001\000\243@\145\160\160\000D\000D\160\160\001\004D\001\000\244@\145\160\160\000E\000E\160\160\001\004E\001\000\245@\145\160\160\000F\000F\160\160\001\004F\001\000\246@\145\160\160\000G\000G\160\160\001\004G\001\000\247@\145\160\160\000H\000H\160\160\001\004H\001\000\248@\145\160\160\000I\000I\160\160\001\004I\001\000\249@\145\160\160\000J\000J\160\160\001\004J\001\000\250@\145\160\160\000K\000K\160\160\001\004K\001\000\251@\145\160\160\000L\000L\160\160\001\004L\001\000\252@\145\160\160\000M\000M\160\160\001\004M\001\000\253@\145\160\160\000N\000N\160\160\001\004N\001\000\254@\145\160\160\000O\000O\160\160\001\004O\001\000\255@\144\160\000P\000P\145\160\160\000Q\000Q\160\160\001\004Q\001\000\184@\145\160\160\000R\000R\160\160\001\004R\001\000\144@\145\160\160\000S\000S\160\160\001\004S\001\000\131@\145\160\160\000T\000T\160\160\001\004T\001\000\186@\145\160\160\000U\000U\160\160\001\004U\001\000\190@\145\160\160\000V\000V\160\160\001\004V\001\000\179@\145\160\160\000W\000W\160\160\001\004W\001\000\191@\145\160\160\000X\000X\160\160\001\004X\001\000\188@\145\160\160\000Y\000Y\160\160\001\004Y\001\000\154@\145\160\160\000Z\000Z\160\160\001\004Z\001\000\156@\145\160\160\000[\000[\160\160\001\004[\001\000\158@\145\160\160\000\\\000\\\160\160\001\004\\\001\000\157@\144\160\000]\000]\145\160\160\000^\000^\160\160\001\004^\001\000\162@\145\160\160\000_\000_\160\160\001\004_\001\000\159@\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127@@@@@@@@@@@@@@@@\144\160\001\004\144\001\000\165\144\160\001\004\145\001\000\180@@@@@@@@@@@@@@\144\160\001\000\160\001\000\160@@@\144\160\001\000\164\001\000\164@\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167@\144\160\001\000\169\001\000\169@\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\136\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174@\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177@@@\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183@@@\144\160\001\000\187\001\000\187@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let windows1252_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002@\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\001\001\146\001 \030\001 &\001 \001 !\001\002\198\001 0\001\001`\001 9\001\001R\000\255\001\001}\000\255\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\001\002\220\001!\"\001\001a\001 :\001\001S\000\255\001\001~\001\001x\001\000\160\001\000\161\001\000\162\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\000\170\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\000\186\001\000\187\001\000\188\001\000\189\001\000\190\001\000\191\001\000\192\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\000\204\001\000\205\001\000\206\001\000\207\001\000\208\001\000\209\001\000\210\001\000\211\001\000\212\001\000\213\001\000\214\001\000\215\001\000\216\001\000\217\001\000\218\001\000\219\001\000\220\001\000\221\001\000\222\001\000\223\001\000\224\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\000\236\001\000\237\001\000\238\001\000\239\001\000\240\001\000\241\001\000\242\001\000\243\001\000\244\001\000\245\001\000\246\001\000\247\001\000\248\001\000\249\001\000\250\001\000\251\001\000\252\001\000\253\001\000\254\001\000\255" 0 : int array);;
+let windows1252_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\174\000\000\000\000\000\000\006M\000\000\006M\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145@\145\160\160YY\160\160\001 \025\001\000\146@\145\160\160ZZ\160\160\001 \026\001\000\130@\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\144\160__\145\160\160``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\145\160\160\000R\000R\160\160\001\001R\001\000\140@\145\160\160\000S\000S\160\160\001\001S\001\000\156@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\001`\001\000\138@\145\160\160\000a\000a\160\160\001\001a\001\000\154@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\145\160\160\000x\000x\160\160\001\001x\001\000\159@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\145\160\160\000}\000}\160\160\001\001}\001\000\142@\145\160\160\000~\000~\160\160\001\001~\001\000\158@\144\160\000\127\000\127@@@@@@@@@@@@@@@@@@\144\160\001\001\146\001\000\131@@@@@@@@@@@@@\144\160\001\000\160\001\000\160\144\160\001\000\161\001\000\161\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169\144\160\001\000\170\001\000\170\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\175\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184\144\160\001\000\185\001\000\185\144\160\001\000\186\001\000\186\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190\144\160\001\000\191\001\000\191\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\145\160\160\001\002\198\001\000\136\160\160\001\000\198\001\000\198@\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203\144\160\001\000\204\001\000\204\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207\144\160\001\000\208\001\000\208\144\160\001\000\209\001\000\209\144\160\001\000\210\001\000\210\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\216\144\160\001\000\217\001\000\217\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\145\160\160\001\002\220\001\000\152\160\160\001\000\220\001\000\220@\144\160\001\000\221\001\000\221\144\160\001\000\222\001\000\222\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239\144\160\001\000\240\001\000\240\144\160\001\000\241\001\000\241\144\160\001\000\242\001\000\242\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\248\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252\144\160\001\000\253\001\000\253\144\160\001\000\254\001\000\254\144\160\001\000\255\001\000\255" 0 : Netmappings.from_uni_list array);;
+ let windows1253_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0024\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\001\001\146\001 \030\001 &\001 \001 !\000\255\001 0\000\255\001 9\000\255\000\255\000\255\000\255\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\000\255\001!\"\000\255\001 :\000\255\000\255\000\255\000\255\001\000\160\001\003\133\001\003\134\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\000\255\001\000\171\001\000\172\001\000\173\001\000\174\001 \021\001\000\176\001\000\177\001\000\178\001\000\179\001\003\132\001\000\181\001\000\182\001\000\183\001\003\136\001\003\137\001\003\138\001\000\187\001\003\140\001\000\189\001\003\142\001\003\143\001\003\144\001\003\145\001\003\146\001\003\147\001\003\148\001\003\149\001\003\150\001\003\151\001\003\152\001\003\153\001\003\154\001\003\155\001\003\156\001\003\157\001\003\158\001\003\159\001\003\160\001\003\161\000\255\001\003\163\001\003\164\001\003\165\001\003\166\001\003\167\001\003\168\001\003\169\001\003\170\001\003\171\001\003\172\001\003\173\001\003\174\001\003\175\001\003\176\001\003\177\001\003\178\001\003\179\001\003\180\001\003\181\001\003\182\001\003\183\001\003\184\001\003\185\001\003\186\001\003\187\001\003\188\001\003\189\001\003\190\001\003\191\001\003\192\001\003\193\001\003\194\001\003\195\001\003\196\001\003\197\001\003\198\001\003\199\001\003\200\001\003\201\001\003\202\001\003\203\001\003\204\001\003\205\001\003\206\000\255" 0 : int array);;
+let windows1253_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\130\000\000\000\000\000\000\006F\000\000\006F\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\145\160\160UU\160\160\001 \021\001\000\175@\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145@\145\160\160YY\160\160\001 \025\001\000\146@\145\160\160ZZ\160\160\001 \026\001\000\130@\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\144\160__\145\160\160``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127@@@@\144\160\001\003\132\001\000\180\144\160\001\003\133\001\000\161\144\160\001\003\134\001\000\162@\144\160\001\003\136\001\000\184\144\160\001\003\137\001\000\185\144\160\001\003\138\001\000\186@\144\160\001\003\140\001\000\188@\144\160\001\003\142\001\000\190\144\160\001\003\143\001\000\191\144\160\001\003\144\001\000\192\144\160\001\003\145\001\000\193\145\160\160\001\001\146\001\000\131\160\160\001\003\146\001\000\194@\144\160\001\003\147\001\000\195\144\160\001\003\148\001\000\196\144\160\001\003\149\001\000\197\144\160\001\003\150\001\000\198\144\160\001\003\151\001\000\199\144\160\001\003\152\001\000\200\144\160\001\003\153\001\000\201\144\160\001\003\154\001\000\202\144\160\001\003\155\001\000\203\144\160\001\003\156\001\000\204\144\160\001\003\157\001\000\205\144\160\001\003\158\001\000\206\144\160\001\003\159\001\000\207\145\160\160\001\000\160\001\000\160\160\160\001\003\160\001\000\208@\144\160\001\003\161\001\000\209@\145\160\160\001\000\163\001\000\163\160\160\001\003\163\001\000\211@\145\160\160\001\000\164\001\000\164\160\160\001\003\164\001\000\212@\145\160\160\001\000\165\001\000\165\160\160\001\003\165\001\000\213@\145\160\160\001\000\166\001\000\166\160\160\001\003\166\001\000\214@\145\160\160\001\000\167\001\000\167\160\160\001\003\167\001\000\215@\145\160\160\001\000\168\001\000\168\160\160\001\003\168\001\000\216@\145\160\160\001\000\169\001\000\169\160\160\001\003\169\001\000\217@\144\160\001\003\170\001\000\218\145\160\160\001\000\171\001\000\171\160\160\001\003\171\001\000\219@\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172\160\160\001\003\172\001\000\220@\145\160\160\001\000\173\001\000\173\160\160\001\003\173\001\000\221@\145\160\160\001\000\174\001\000\174\160\160\001\003\174\001\000\222@\144\160\001\003\175\001\000\223\145\160\160\001\000\176\001\000\176\160\160\001\003\176\001\000\224@\145\160\160\001\000\177\001\000\177\160\160\001\003\177\001\000\225@\145\160\160\001\000\178\001\000\178\160\160\001\003\178\001\000\226@\145\160\160\001\000\179\001\000\179\160\160\001\003\179\001\000\227@\144\160\001\003\180\001\000\228\145\160\160\001\000\181\001\000\181\160\160\001\003\181\001\000\229@\145\160\160\001\000\182\001\000\182\160\160\001\003\182\001\000\230@\145\160\160\001\000\183\001\000\183\160\160\001\003\183\001\000\231@\144\160\001\003\184\001\000\232\144\160\001\003\185\001\000\233\144\160\001\003\186\001\000\234\145\160\160\001\000\187\001\000\187\160\160\001\003\187\001\000\235@\144\160\001\003\188\001\000\236\145\160\160\001\000\189\001\000\189\160\160\001\003\189\001\000\237@\144\160\001\003\190\001\000\238\144\160\001\003\191\001\000\239\144\160\001\003\192\001\000\240\144\160\001\003\193\001\000\241\144\160\001\003\194\001\000\242\144\160\001\003\195\001\000\243\144\160\001\003\196\001\000\244\144\160\001\003\197\001\000\245\144\160\001\003\198\001\000\246\144\160\001\003\199\001\000\247\144\160\001\003\200\001\000\248\144\160\001\003\201\001\000\249\144\160\001\003\202\001\000\250\144\160\001\003\203\001\000\251\144\160\001\003\204\001\000\252\144\160\001\003\205\001\000\253\144\160\001\003\206\001\000\254@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let windows1254_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002>\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\001\001\146\001 \030\001 &\001 \001 !\001\002\198\001 0\001\001`\001 9\001\001R\000\255\000\255\000\255\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\001\002\220\001!\"\001\001a\001 :\001\001S\000\255\000\255\001\001x\001\000\160\001\000\161\001\000\162\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\000\170\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\000\186\001\000\187\001\000\188\001\000\189\001\000\190\001\000\191\001\000\192\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\000\204\001\000\205\001\000\206\001\000\207\001\001\030\001\000\209\001\000\210\001\000\211\001\000\212\001\000\213\001\000\214\001\000\215\001\000\216\001\000\217\001\000\218\001\000\219\001\000\220\001\0010\001\001^\001\000\223\001\000\224\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\000\236\001\000\237\001\000\238\001\000\239\001\001\031\001\000\241\001\000\242\001\000\243\001\000\244\001\000\245\001\000\246\001\000\247\001\000\248\001\000\249\001\000\250\001\000\251\001\000\252\001\0011\001\001_\001\000\255" 0 : int array);;
+let windows1254_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\168\000\000\000\000\000\000\006M\000\000\006M\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145@\145\160\160YY\160\160\001 \025\001\000\146@\145\160\160ZZ\160\160\001 \026\001\000\130@\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132\160\160\001\001\030\001\000\208@\145\160\160__\160\160\001\001\031\001\000\240@\145\160\160``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137\160\160\001\0010\001\000\221@\145\160\160qq\160\160\001\0011\001\000\253@\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\145\160\160\000R\000R\160\160\001\001R\001\000\140@\145\160\160\000S\000S\160\160\001\001S\001\000\156@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\145\160\160\000^\000^\160\160\001\001^\001\000\222@\145\160\160\000_\000_\160\160\001\001_\001\000\254@\145\160\160\000`\000`\160\160\001\001`\001\000\138@\145\160\160\000a\000a\160\160\001\001a\001\000\154@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\145\160\160\000x\000x\160\160\001\001x\001\000\159@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127@@@@@@@@@@@@@@@@@@\144\160\001\001\146\001\000\131@@@@@@@@@@@@@\144\160\001\000\160\001\000\160\144\160\001\000\161\001\000\161\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169\144\160\001\000\170\001\000\170\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\175\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184\144\160\001\000\185\001\000\185\144\160\001\000\186\001\000\186\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190\144\160\001\000\191\001\000\191\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\145\160\160\001\002\198\001\000\136\160\160\001\000\198\001\000\198@\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203\144\160\001\000\204\001\000\204\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207@\144\160\001\000\209\001\000\209\144\160\001\000\210\001\000\210\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\216\144\160\001\000\217\001\000\217\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\145\160\160\001\002\220\001\000\152\160\160\001\000\220\001\000\220@@@\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239@\144\160\001\000\241\001\000\241\144\160\001\000\242\001\000\242\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\248\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252@@\144\160\001\000\255\001\000\255" 0 : Netmappings.from_uni_list array);;
+ let windows1255_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002.\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\001\001\146\001 \030\001 &\001 \001 !\001\002\198\001 0\000\255\001 9\000\255\000\255\000\255\000\255\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\001\002\220\001!\"\000\255\001 :\000\255\000\255\000\255\000\255\001\000\160\001\000\161\001\000\162\001\000\163\001 \170\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\000\215\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\000\247\001\000\187\001\000\188\001\000\189\001\000\190\001\000\191\001\005\176\001\005\177\001\005\178\001\005\179\001\005\180\001\005\181\001\005\182\001\005\183\001\005\184\001\005\185\000\255\001\005\187\001\005\188\001\005\189\001\005\190\001\005\191\001\005\192\001\005\193\001\005\194\001\005\195\001\005\240\001\005\241\001\005\242\001\005\243\001\005\244\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\005\208\001\005\209\001\005\210\001\005\211\001\005\212\001\005\213\001\005\214\001\005\215\001\005\216\001\005\217\001\005\218\001\005\219\001\005\220\001\005\221\001\005\222\001\005\223\001\005\224\001\005\225\001\005\226\001\005\227\001\005\228\001\005\229\001\005\230\001\005\231\001\005\232\001\005\233\001\005\234\000\255\000\255\001 \014\001 \015\000\255" 0 : int array);;
+let windows1255_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006N\000\000\000\000\000\000\006\027\000\000\006\027\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\145\160\160NN\160\160\001 \014\001\000\253@\145\160\160OO\160\160\001 \015\001\000\254@\144\160PP\144\160QQ\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145@\145\160\160YY\160\160\001 \025\001\000\146@\145\160\160ZZ\160\160\001 \026\001\000\130@\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\144\160__\145\160\160``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127@@@@@@@@@@@@@@@@@@\144\160\001\001\146\001\000\131@@@@@@@@@@@@@\144\160\001\000\160\001\000\160\144\160\001\000\161\001\000\161\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163@\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169\144\160\001 \170\001\000\164\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\175\145\160\160\001\000\176\001\000\176\160\160\001\005\176\001\000\192@\145\160\160\001\000\177\001\000\177\160\160\001\005\177\001\000\193@\145\160\160\001\000\178\001\000\178\160\160\001\005\178\001\000\194@\145\160\160\001\000\179\001\000\179\160\160\001\005\179\001\000\195@\145\160\160\001\000\180\001\000\180\160\160\001\005\180\001\000\196@\145\160\160\001\000\181\001\000\181\160\160\001\005\181\001\000\197@\145\160\160\001\000\182\001\000\182\160\160\001\005\182\001\000\198@\145\160\160\001\000\183\001\000\183\160\160\001\005\183\001\000\199@\145\160\160\001\000\184\001\000\184\160\160\001\005\184\001\000\200@\145\160\160\001\000\185\001\000\185\160\160\001\005\185\001\000\201@@\145\160\160\001\000\187\001\000\187\160\160\001\005\187\001\000\203@\145\160\160\001\000\188\001\000\188\160\160\001\005\188\001\000\204@\145\160\160\001\000\189\001\000\189\160\160\001\005\189\001\000\205@\145\160\160\001\000\190\001\000\190\160\160\001\005\190\001\000\206@\145\160\160\001\000\191\001\000\191\160\160\001\005\191\001\000\207@\144\160\001\005\192\001\000\208\144\160\001\005\193\001\000\209\144\160\001\005\194\001\000\210\144\160\001\005\195\001\000\211@@\144\160\001\002\198\001\000\136@@@@@@@@@\144\160\001\005\208\001\000\224\144\160\001\005\209\001\000\225\144\160\001\005\210\001\000\226\144\160\001\005\211\001\000\227\144\160\001\005\212\001\000\228\144\160\001\005\213\001\000\229\144\160\001\005\214\001\000\230\145\160\160\001\000\215\001\000\170\160\160\001\005\215\001\000\231@\144\160\001\005\216\001\000\232\144\160\001\005\217\001\000\233\144\160\001\005\218\001\000\234\144\160\001\005\219\001\000\235\145\160\160\001\002\220\001\000\152\160\160\001\005\220\001\000\236@\144\160\001\005\221\001\000\237\144\160\001\005\222\001\000\238\144\160\001\005\223\001\000\239\144\160\001\005\224\001\000\240\144\160\001\005\225\001\000\241\144\160\001\005\226\001\000\242\144\160\001\005\227\001\000\243\144\160\001\005\228\001\000\244\144\160\001\005\229\001\000\245\144\160\001\005\230\001\000\246\144\160\001\005\231\001\000\247\144\160\001\005\232\001\000\248\144\160\001\005\233\001\000\249\144\160\001\005\234\001\000\250@@@@@\144\160\001\005\240\001\000\212\144\160\001\005\241\001\000\213\144\160\001\005\242\001\000\214\144\160\001\005\243\001\000\215\144\160\001\005\244\001\000\216@@\144\160\001\000\247\001\000\186@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let windows1256_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\001\006~\001 \026\001\001\146\001 \030\001 &\001 \001 !\001\002\198\001 0\001\006y\001 9\001\001R\001\006\134\001\006\152\001\006\136\001\006\175\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\001\006\169\001!\"\001\006\145\001 :\001\001S\001 \012\001 \013\001\006\186\001\000\160\001\006\012\001\000\162\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\006\190\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\006\027\001\000\187\001\000\188\001\000\189\001\000\190\001\006\031\001\006\193\001\006!\001\006\"\001\006#\001\006$\001\006%\001\006&\001\006'\001\006(\001\006)\001\006*\001\006+\001\006,\001\006-\001\006.\001\006/\001\0060\001\0061\001\0062\001\0063\001\0064\001\0065\001\0066\001\000\215\001\0067\001\0068\001\0069\001\006:\001\006@\001\006A\001\006B\001\006C\001\000\224\001\006D\001\000\226\001\006E\001\006F\001\006G\001\006H\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\006I\001\006J\001\000\238\001\000\239\001\006K\001\006L\001\006M\001\006N\001\000\244\001\006O\001\006P\001\000\247\001\006Q\001\000\249\001\006R\001\000\251\001\000\252\001 \014\001 \015\001\006\210" 0 : int array);;
+let windows1256_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007W\000\000\000\000\000\000\007\022\000\000\007\022\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001 \012\001\000\157\160\160\001\006\012\001\000\161@\145\160\160MM\160\160\001 \013\001\000\158@\145\160\160NN\160\160\001 \014\001\000\253@\145\160\160OO\160\160\001 \015\001\000\254@\144\160PP\144\160QQ\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145@\145\160\160YY\160\160\001 \025\001\000\146@\145\160\160ZZ\160\160\001 \026\001\000\130@\145\160\160[[\160\160\001\006\027\001\000\186@\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\145\160\160__\160\160\001\006\031\001\000\191@\145\160\160``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135\160\160\001\006!\001\000\193@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153\160\160\001\006\"\001\000\194@\145\160\160cc\160\160\001\006#\001\000\195@\145\160\160dd\160\160\001\006$\001\000\196@\145\160\160ee\160\160\001\006%\001\000\197@\145\160\160ff\160\160\001 &\001\000\133\160\160\001\006&\001\000\198@\145\160\160gg\160\160\001\006'\001\000\199@\145\160\160hh\160\160\001\006(\001\000\200@\145\160\160ii\160\160\001\006)\001\000\201@\145\160\160jj\160\160\001\006*\001\000\202@\145\160\160kk\160\160\001\006+\001\000\203@\145\160\160ll\160\160\001\006,\001\000\204@\145\160\160mm\160\160\001\006-\001\000\205@\145\160\160nn\160\160\001\006.\001\000\206@\145\160\160oo\160\160\001\006/\001\000\207@\145\160\160pp\160\160\001 0\001\000\137\160\160\001\0060\001\000\208@\145\160\160qq\160\160\001\0061\001\000\209@\145\160\160rr\160\160\001\0062\001\000\210@\145\160\160ss\160\160\001\0063\001\000\211@\145\160\160tt\160\160\001\0064\001\000\212@\145\160\160uu\160\160\001\0065\001\000\213@\145\160\160vv\160\160\001\0066\001\000\214@\145\160\160ww\160\160\001\0067\001\000\216@\145\160\160xx\160\160\001\0068\001\000\217@\145\160\160yy\160\160\001 9\001\000\139\160\160\001\0069\001\000\218@\145\160\160zz\160\160\001 :\001\000\155\160\160\001\006:\001\000\219@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\145\160\160\000@\000@\160\160\001\006@\001\000\220@\145\160\160\000A\000A\160\160\001\006A\001\000\221@\145\160\160\000B\000B\160\160\001\006B\001\000\222@\145\160\160\000C\000C\160\160\001\006C\001\000\223@\145\160\160\000D\000D\160\160\001\006D\001\000\225@\145\160\160\000E\000E\160\160\001\006E\001\000\227@\145\160\160\000F\000F\160\160\001\006F\001\000\228@\145\160\160\000G\000G\160\160\001\006G\001\000\229@\145\160\160\000H\000H\160\160\001\006H\001\000\230@\145\160\160\000I\000I\160\160\001\006I\001\000\236@\145\160\160\000J\000J\160\160\001\006J\001\000\237@\145\160\160\000K\000K\160\160\001\006K\001\000\240@\145\160\160\000L\000L\160\160\001\006L\001\000\241@\145\160\160\000M\000M\160\160\001\006M\001\000\242@\145\160\160\000N\000N\160\160\001\006N\001\000\243@\145\160\160\000O\000O\160\160\001\006O\001\000\245@\145\160\160\000P\000P\160\160\001\006P\001\000\246@\145\160\160\000Q\000Q\160\160\001\006Q\001\000\248@\145\160\160\000R\000R\160\160\001\001R\001\000\140\160\160\001\006R\001\000\250@\145\160\160\000S\000S\160\160\001\001S\001\000\156@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\006y\001\000\138@\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\145\160\160\000~\000~\160\160\001\006~\001\000\129@\144\160\000\127\000\127@@@@@@\144\160\001\006\134\001\000\141@\144\160\001\006\136\001\000\143@@@@@@@@\144\160\001\006\145\001\000\154\144\160\001\001\146\001\000\131@@@@@\144\160\001\006\152\001\000\142@@@@@@@\144\160\001\000\160\001\000\160@\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\145\160\160\001\006\169\001\000\152\160\160\001\000\169\001\000\169@@\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\145\160\160\001\006\175\001\000\144\160\160\001\000\175\001\000\175@\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184\144\160\001\000\185\001\000\185\144\160\001\006\186\001\000\159\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\145\160\160\001\006\190\001\000\170\160\160\001\000\190\001\000\190@@@\144\160\001\006\193\001\000\192@@@@\144\160\001\002\198\001\000\136@@@@@@@@@@@\144\160\001\006\210\001\000\255@@@@\144\160\001\000\215\001\000\215@@@@@@@@\144\160\001\000\224\001\000\224@\144\160\001\000\226\001\000\226@@@@\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235@@\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239@@@@\144\160\001\000\244\001\000\244@@\144\160\001\000\247\001\000\247@\144\160\001\000\249\001\000\249@\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252@@@" 0 : Netmappings.from_uni_list array);;
+ let windows1257_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0029\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\000\255\001 \030\001 &\001 \001 !\000\255\001 0\000\255\001 9\000\255\001\000\168\001\002\199\001\000\184\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\000\255\001!\"\000\255\001 :\000\255\001\000\175\001\002\219\000\255\001\000\160\000\255\001\000\162\001\000\163\001\000\164\000\255\001\000\166\001\000\167\001\000\216\001\000\169\001\001V\001\000\171\001\000\172\001\000\173\001\000\174\001\000\198\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\248\001\000\185\001\001W\001\000\187\001\000\188\001\000\189\001\000\190\001\000\230\001\001\004\001\001.\001\001\000\001\001\006\001\000\196\001\000\197\001\001\024\001\001\018\001\001\012\001\000\201\001\001y\001\001\022\001\001\"\001\0016\001\001*\001\001;\001\001`\001\001C\001\001E\001\000\211\001\001L\001\000\213\001\000\214\001\000\215\001\001r\001\001A\001\001Z\001\001j\001\000\220\001\001{\001\001}\001\000\223\001\001\005\001\001/\001\001\001\001\001\007\001\000\228\001\000\229\001\001\025\001\001\019\001\001\013\001\000\233\001\001z\001\001\023\001\001#\001\0017\001\001+\001\001<\001\001a\001\001D\001\001F\001\000\243\001\001M\001\000\245\001\000\246\001\000\247\001\001s\001\001B\001\001[\001\001k\001\000\252\001\001|\001\001~\001\002\217" 0 : int array);;
+let windows1257_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\234\000\000\000\000\000\000\006\186\000\000\006\186\008\000\004\000\000\145\160\160@@\160\160\001\001\000\001\000\194@\145\160\160AA\160\160\001\001\001\001\000\226@\144\160BB\144\160CC\145\160\160DD\160\160\001\001\004\001\000\192@\145\160\160EE\160\160\001\001\005\001\000\224@\145\160\160FF\160\160\001\001\006\001\000\195@\145\160\160GG\160\160\001\001\007\001\000\227@\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\200@\145\160\160MM\160\160\001\001\013\001\000\232@\144\160NN\144\160OO\144\160PP\144\160QQ\145\160\160RR\160\160\001\001\018\001\000\199@\145\160\160SS\160\160\001 \019\001\000\150\160\160\001\001\019\001\000\231@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\145\160\160VV\160\160\001\001\022\001\000\203@\145\160\160WW\160\160\001\001\023\001\000\235@\145\160\160XX\160\160\001 \024\001\000\145\160\160\001\001\024\001\000\198@\145\160\160YY\160\160\001 \025\001\000\146\160\160\001\001\025\001\000\230@\145\160\160ZZ\160\160\001 \026\001\000\130@\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\144\160__\145\160\160``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153\160\160\001\001\"\001\000\204@\145\160\160cc\160\160\001\001#\001\000\236@\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\145\160\160jj\160\160\001\001*\001\000\206@\145\160\160kk\160\160\001\001+\001\000\238@\144\160ll\144\160mm\145\160\160nn\160\160\001\001.\001\000\193@\145\160\160oo\160\160\001\001/\001\000\225@\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\145\160\160vv\160\160\001\0016\001\000\205@\145\160\160ww\160\160\001\0017\001\000\237@\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\145\160\160{{\160\160\001\001;\001\000\207@\145\160\160||\160\160\001\001<\001\000\239@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\145\160\160\000A\000A\160\160\001\001A\001\000\217@\145\160\160\000B\000B\160\160\001\001B\001\000\249@\145\160\160\000C\000C\160\160\001\001C\001\000\209@\145\160\160\000D\000D\160\160\001\001D\001\000\241@\145\160\160\000E\000E\160\160\001\001E\001\000\210@\145\160\160\000F\000F\160\160\001\001F\001\000\242@\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\145\160\160\000L\000L\160\160\001\001L\001\000\212@\145\160\160\000M\000M\160\160\001\001M\001\000\244@\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\145\160\160\000V\000V\160\160\001\001V\001\000\170@\145\160\160\000W\000W\160\160\001\001W\001\000\186@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001\001Z\001\000\218@\145\160\160\000[\000[\160\160\001\001[\001\000\250@\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\001`\001\000\208@\145\160\160\000a\000a\160\160\001\001a\001\000\240@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\145\160\160\000j\000j\160\160\001\001j\001\000\219@\145\160\160\000k\000k\160\160\001\001k\001\000\251@\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\145\160\160\000r\000r\160\160\001\001r\001\000\216@\145\160\160\000s\000s\160\160\001\001s\001\000\248@\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\202@\145\160\160\000z\000z\160\160\001\001z\001\000\234@\145\160\160\000{\000{\160\160\001\001{\001\000\221@\145\160\160\000|\000|\160\160\001\001|\001\000\253@\145\160\160\000}\000}\160\160\001\001}\001\000\222@\145\160\160\000~\000~\160\160\001\001~\001\000\254@\144\160\000\127\000\127@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\144\160\001\000\160\001\000\160@\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164@\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\141\144\160\001\000\169\001\000\169@\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\157\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\143\144\160\001\000\185\001\000\185@\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190@@@@@\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\175\144\160\001\002\199\001\000\142@\144\160\001\000\201\001\000\201@@@@@@@@@\144\160\001\000\211\001\000\211@\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\168\144\160\001\002\217\001\000\255@\144\160\001\002\219\001\000\158\144\160\001\000\220\001\000\220@@\144\160\001\000\223\001\000\223@@@@\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\191@@\144\160\001\000\233\001\000\233@@@@@@@@@\144\160\001\000\243\001\000\243@\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\184@@@\144\160\001\000\252\001\000\252@@@" 0 : Netmappings.from_uni_list array);;
+ let windows1258_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002<\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\001\001\146\001 \030\001 &\001 \001 !\001\002\198\001 0\000\255\001 9\001\001R\000\255\000\255\000\255\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\001\002\220\001!\"\000\255\001 :\001\001S\000\255\000\255\001\001x\001\000\160\001\000\161\001\000\162\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\000\170\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\000\186\001\000\187\001\000\188\001\000\189\001\000\190\001\000\191\001\000\192\001\000\193\001\000\194\001\001\002\001\000\196\001\000\197\001\000\198\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\003\000\001\000\205\001\000\206\001\000\207\001\001\016\001\000\209\001\003\t\001\000\211\001\000\212\001\001\160\001\000\214\001\000\215\001\000\216\001\000\217\001\000\218\001\000\219\001\000\220\001\001\175\001\003\003\001\000\223\001\000\224\001\000\225\001\000\226\001\001\003\001\000\228\001\000\229\001\000\230\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\003\001\001\000\237\001\000\238\001\000\239\001\001\017\001\000\241\001\003#\001\000\243\001\000\244\001\001\161\001\000\246\001\000\247\001\000\248\001\000\249\001\000\250\001\000\251\001\000\252\001\001\176\001 \171\001\000\255" 0 : int array);;
+let windows1258_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\174\000\000\000\000\000\000\006^\000\000\006^\008\000\004\000\000\145\160\160@@\160\160\001\003\000\001\000\204@\145\160\160AA\160\160\001\003\001\001\000\236@\145\160\160BB\160\160\001\001\002\001\000\195@\145\160\160CC\160\160\001\003\003\001\000\222\160\160\001\001\003\001\000\227@\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\145\160\160II\160\160\001\003\t\001\000\210@\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001\001\016\001\000\208@\145\160\160QQ\160\160\001\001\017\001\000\240@\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145@\145\160\160YY\160\160\001 \025\001\000\146@\145\160\160ZZ\160\160\001 \026\001\000\130@\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\144\160__\145\160\160``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\145\160\160cc\160\160\001\003#\001\000\242@\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\145\160\160\000R\000R\160\160\001\001R\001\000\140@\145\160\160\000S\000S\160\160\001\001S\001\000\156@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\145\160\160\000x\000x\160\160\001\001x\001\000\159@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127@@@@@@@@@@@@@@@@@@\144\160\001\001\146\001\000\131@@@@@@@@@@@@@\145\160\160\001\000\160\001\000\160\160\160\001\001\160\001\000\213@\145\160\160\001\000\161\001\000\161\160\160\001\001\161\001\000\245@\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169\144\160\001\000\170\001\000\170\145\160\160\001\000\171\001\000\171\160\160\001 \171\001\000\254@\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\145\160\160\001\000\175\001\000\175\160\160\001\001\175\001\000\221@\145\160\160\001\000\176\001\000\176\160\160\001\001\176\001\000\253@\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184\144\160\001\000\185\001\000\185\144\160\001\000\186\001\000\186\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190\144\160\001\000\191\001\000\191\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194@\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\145\160\160\001\002\198\001\000\136\160\160\001\000\198\001\000\198@\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203@\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207@\144\160\001\000\209\001\000\209@\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212@\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\216\144\160\001\000\217\001\000\217\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\145\160\160\001\002\220\001\000\152\160\160\001\000\220\001\000\220@@@\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226@\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235@\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239@\144\160\001\000\241\001\000\241@\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244@\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\248\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252@@\144\160\001\000\255\001\000\255" 0 : Netmappings.from_uni_list array);;
+ Hashtbl.add Netmappings.to_unicode `Enc_windows1258 windows1258_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1258 windows1258_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1257 windows1257_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1257 windows1257_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1256 windows1256_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1256 windows1256_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1255 windows1255_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1255 windows1255_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1254 windows1254_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1254 windows1254_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1253 windows1253_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1253 windows1253_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1252 windows1252_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1252 windows1252_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1251 windows1251_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1251 windows1251_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1250 windows1250_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1250 windows1250_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_macroman macroman_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_macroman macroman_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_koi8r koi8r_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_koi8r koi8r_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_jis0201 jis0201_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_jis0201 jis0201_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_adobe_zapf_dingbats_encoding adobe_zapf_dingbats_encoding_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_adobe_zapf_dingbats_encoding adobe_zapf_dingbats_encoding_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_adobe_symbol_encoding adobe_symbol_encoding_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_adobe_symbol_encoding adobe_symbol_encoding_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_adobe_standard_encoding adobe_standard_encoding_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_adobe_standard_encoding adobe_standard_encoding_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp875 cp875_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp875 cp875_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp874 cp874_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp874 cp874_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp869 cp869_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp869 cp869_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp866 cp866_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp866 cp866_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp865 cp865_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp865 cp865_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp864 cp864_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp864 cp864_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp863 cp863_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp863 cp863_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp862 cp862_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp862 cp862_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp861 cp861_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp861 cp861_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp860 cp860_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp860 cp860_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp857 cp857_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp857 cp857_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp856 cp856_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp856 cp856_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp855 cp855_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp855 cp855_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp852 cp852_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp852 cp852_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp850 cp850_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp850 cp850_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp775 cp775_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp775 cp775_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp737 cp737_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp737 cp737_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp500 cp500_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp500 cp500_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp437 cp437_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp437 cp437_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp424 cp424_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp424 cp424_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp1026 cp1026_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp1026 cp1026_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp1006 cp1006_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp1006 cp1006_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp037 cp037_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp037 cp037_from_unicode;
+();;
diff --git a/helm/DEVEL/pxp/netstring/netstream.ml b/helm/DEVEL/pxp/netstring/netstream.ml
new file mode 100644
index 000000000..76c2e3a4c
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/netstream.ml
@@ -0,0 +1,162 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+type t =
+ { s_channel : in_channel;
+ s_maxlength : int option;
+ s_blocksize : int;
+ mutable s_current_length : int;
+ mutable s_at_eos : bool;
+ mutable s_win_pos : int;
+ mutable s_win_len : int;
+ s_netbuf : Netbuffer.t;
+ s_iobuf : string;
+ }
+;;
+
+
+let dump s text =
+ print_string ("*** NETSTREAM DUMP " ^ text ^ "\n");
+ Printf.printf "current_length=%d at_eos=%b win_pos=%d win_len=%d\n"
+ s.s_current_length s.s_at_eos s.s_win_pos s.s_win_len;
+ Printf.printf "netbuffer_length=%d netbuffer_size=%d\n"
+ (Netbuffer.length s.s_netbuf)
+ (String.length(Netbuffer.unsafe_buffer s.s_netbuf));
+ Printf.printf "netbuffer=\"%s\"\n"
+ (String.escaped(Netbuffer.contents s.s_netbuf));
+ print_string "*** ---------------\n";
+ flush stdout
+;;
+
+
+let want_another_block s =
+ if not s.s_at_eos then begin
+ (* How much are we allowed to read? *)
+ let m =
+ match s.s_maxlength with
+ None -> s.s_blocksize
+ | Some k -> min (k - s.s_current_length) s.s_blocksize
+ in
+ (* Read this. *)
+ let rec read_block k =
+ if k < m then
+ let n =
+ input s.s_channel s.s_iobuf k (m - k) in
+ ( if n > 0 then
+ read_block (k+n)
+ else (* EOF *)
+ k
+ )
+ else
+ k
+ in
+ let n = read_block 0 in
+ (* If n < blocksize, EOS is reached. *)
+ Netbuffer.add_sub_string s.s_netbuf s.s_iobuf 0 n;
+ s.s_win_len <- s.s_win_len + n;
+ s.s_current_length <- s.s_current_length + n;
+ s.s_at_eos <- n < s.s_blocksize;
+
+ (* dump s "After appending block"; *)
+ end
+;;
+
+
+let want s n =
+ while not s.s_at_eos && s.s_win_len < n do
+ want_another_block s
+ done
+;;
+
+
+let want_minimum s =
+ want s (s.s_blocksize + s.s_blocksize)
+;;
+
+
+let move s n =
+ Netbuffer.delete s.s_netbuf 0 n;
+ s.s_win_pos <- s.s_win_pos + n;
+ s.s_win_len <- s.s_win_len - n;
+ want_minimum s;
+ (* dump s "After move"; *)
+;;
+
+
+let create_from_channel ch maxlength blocksize =
+ let s =
+ { s_channel = ch;
+ s_maxlength = maxlength;
+ s_blocksize = blocksize;
+ s_current_length = 0;
+ s_at_eos = false;
+ s_win_pos = 0;
+ s_win_len = 0;
+ s_netbuf = Netbuffer.create (2*blocksize);
+ s_iobuf = String.create blocksize;
+ }
+ in
+ want_minimum s;
+ s
+;;
+
+
+let create_from_string str =
+ let l = String.length str in
+ { s_channel = stdin;
+ s_maxlength = None;
+ s_blocksize = l;
+ s_current_length = l;
+ s_at_eos = true;
+ s_win_pos = 0;
+ s_win_len = l;
+ s_netbuf =
+ ( let nb = Netbuffer.create l in
+ Netbuffer.add_string nb str;
+ nb
+ );
+ s_iobuf = "";
+ }
+;;
+
+
+let block_size s = s.s_blocksize;;
+
+let current_length s = s.s_current_length;;
+
+let at_eos s = s.s_at_eos;;
+
+let window_position s = s.s_win_pos;;
+
+let window_length s = s.s_win_len;;
+
+let window s = s.s_netbuf;;
+
+let print_stream s =
+ Format.printf
+ ""
+ s.s_win_pos
+ s.s_win_len
+ s.s_current_length
+ s.s_at_eos
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/06/24 20:20:33 gerd
+ * Added the toploop printer.
+ *
+ * Revision 1.1 2000/04/15 13:07:48 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netstream.mli b/helm/DEVEL/pxp/netstring/netstream.mli
new file mode 100644
index 000000000..7cb185712
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/netstream.mli
@@ -0,0 +1,118 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+(* A netstream is an input channel that is read block by block. The
+ * fragment of the channel currently loaded into memory is called the
+ * current window of the netstream.
+ *
+ * PICTURE:
+ *
+ * 0 window_position current_length EOS
+ * +------------------+-------------------+--------------------------+
+ * ====================
+ * The current window
+ *
+ * window_length = current_length - window_position
+ *
+ * There is an automatism that the window has a certain length. If possible,
+ * the window is at least twice the block size long, where a "block" is
+ * the amount of data that is read from the input channel in one step.
+ *
+ * (The idea is that you choose as block size the number of bytes you want
+ * to analyze at once, and which must be loaded into memory. You can start
+ * your analysis at window_position and proceed until window_position +
+ * blocksize without having to check whether your window is large enough.
+ * Only when the first blocksize bytes of the window are already processed,
+ * the window must be enlarged by loading the next block.)
+ *
+ * If you want that the window becomes larger, you can call 'want' (to
+ * enlarge the window to a certain size) or 'want_another_block' (to load
+ * just another block from the input channel). Note that this affects only
+ * the current window and not future windows.
+ *
+ * If you do not need the first n bytes of the window anymore, you can
+ * call 'move' to move the beginning of the window by n bytes. If the
+ * window becomes too small after this operation, it is enlarged until
+ * it has twice the block size or until it reaches EOS.
+ *)
+
+type t
+
+val create_from_channel : in_channel -> int option -> int -> t
+ (* create_from_channel ch maxlength blocksize:
+ * The new netstream reads from the channel 'ch'. If maxlength = None,
+ * the channel is read until EOF. If maxlength = Some n, at most n bytes
+ * are read; i.e. the netstream reads until n bytes have been read or
+ * until EOF has been reached, whatever comes first. The blocksize
+ * specifies the number of bytes to read at once.
+ *)
+
+val create_from_string : string -> t
+ (* Creates a new netstream from a string. The initial window of this
+ * netstream is a copy of the passed string.
+ *)
+
+val block_size : t -> int
+ (* Returns the (immutable) block size. *)
+
+val current_length : t -> int
+ (* Returns the number of bytes read so far. *)
+
+val at_eos : t -> bool
+ (* True iff EOS (end of stream) is reached, i.e. the last byte of the
+ * window is the last byte of the stream.
+ *)
+
+val window_position : t -> int
+ (* Returns the absolute position of the current window. *)
+
+val window_length : t -> int
+ (* Returns the length of the current window. *)
+
+val window : t -> Netbuffer.t
+ (* Returns the current window. *)
+
+val move : t -> int -> unit
+ (* move s n:
+ * Moves the window: The first n bytes of the current window are
+ * discarded. If the window would become smaller than twice the
+ * blocksize and if the end of the stream is not yet reached, another
+ * block is read from the input channel and appended to the window.
+ *
+ * PRECONDITION:
+ * - n <= window_length
+ *)
+
+val want : t -> int -> unit
+ (* want s n:
+ * If the window is smaller than n bytes, it is tried to enlarge
+ * the window such that it is at least n bytes long. The enlargement
+ * is not possible if the stream is not long enough; in this case
+ * the window becomes as large as possible.
+ *)
+
+val want_another_block : t -> unit
+ (* Enlarges the window by another block (if possible i.e. if the stream
+ * is long enough).
+ *)
+
+val print_stream : t -> unit
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/06/24 20:20:33 gerd
+ * Added the toploop printer.
+ *
+ * Revision 1.1 2000/04/15 13:07:48 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netstring.cma b/helm/DEVEL/pxp/netstring/netstring.cma
new file mode 100644
index 000000000..1cf66b502
Binary files /dev/null and b/helm/DEVEL/pxp/netstring/netstring.cma differ
diff --git a/helm/DEVEL/pxp/netstring/netstring.cmxa b/helm/DEVEL/pxp/netstring/netstring.cmxa
new file mode 100644
index 000000000..f95c00849
Binary files /dev/null and b/helm/DEVEL/pxp/netstring/netstring.cmxa differ
diff --git a/helm/DEVEL/pxp/netstring/netstring_mt.ml b/helm/DEVEL/pxp/netstring/netstring_mt.ml
new file mode 100644
index 000000000..96576f1ef
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/netstring_mt.ml
@@ -0,0 +1,37 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* Initialize multi-threading mode: *)
+
+let str_mutex = Mutex.create();;
+let cgi_mutex = Mutex.create();;
+let mappings_mutex = Mutex.create();;
+
+Netstring_str.init_mt
+ (fun () -> Mutex.lock str_mutex)
+ (fun () -> Mutex.unlock str_mutex);
+Cgi.init_mt
+ (fun () -> Mutex.lock cgi_mutex)
+ (fun () -> Mutex.unlock cgi_mutex);
+Netmappings.init_mt
+ (fun () -> Mutex.lock mappings_mutex)
+ (fun () -> Mutex.unlock mappings_mutex)
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/08/29 00:45:42 gerd
+ * Initializing Netmappings, too
+ *
+ * Revision 1.1 2000/06/25 21:15:27 gerd
+ * Initial revision
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netstring_mt.mli b/helm/DEVEL/pxp/netstring/netstring_mt.mli
new file mode 100644
index 000000000..c224b2bba
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/netstring_mt.mli
@@ -0,0 +1,25 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* This module initializes the multi-threading mode of
+ * Netstring. You must link it with every application that
+ * uses multi-threading.
+ * PITFALL: Link this module _directly_ with the executable,
+ * _don't_ put this module into a cma archive! This would not work!
+ *)
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/06/25 21:15:27 gerd
+ * Initial revision
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netstring_str.ml b/helm/DEVEL/pxp/netstring/netstring_str.ml
new file mode 100644
index 000000000..7353719dc
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/netstring_str.ml
@@ -0,0 +1,241 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+let lock = ref (fun () -> ());;
+let unlock = ref (fun () -> ());;
+
+let init_mt new_lock new_unlock =
+ lock := new_lock;
+ unlock := new_unlock
+;;
+
+let protect f =
+ !lock();
+ try
+ let r = f() in
+ !unlock();
+ r
+ with
+ x ->
+ !unlock();
+ raise x
+;;
+
+type regexp = Str.regexp;;
+type split_result = Str.split_result = Text of string | Delim of string;;
+
+type result =
+ { pos : int;
+ match_beg : int;
+ match_end : int;
+ group_beg : int array;
+ group_end : int array;
+ }
+;;
+
+let regexp s =
+ protect
+ (fun () -> Str.regexp s)
+;;
+
+let regexp_case_fold s =
+ protect
+ (fun () -> Str.regexp_case_fold s)
+;;
+
+let quote s =
+ protect
+ (fun () -> Str.quote s)
+;;
+
+let regexp_string s =
+ protect
+ (fun () -> Str.regexp_string s)
+;;
+
+let regexp_string_case_fold s =
+ protect
+ (fun () -> Str.regexp_string_case_fold s)
+;;
+
+let return_result pos n_groups =
+ let r =
+ { pos = pos;
+ match_beg = (try Str.match_beginning() with Not_found -> -1);
+ match_end = (try Str.match_end() with Not_found -> -1);
+ group_beg = Array.create n_groups (-1);
+ group_end = Array.create n_groups (-1);
+ }
+ in
+ for g = 0 to n_groups - 1 do
+ r.group_beg.(g) <- (try Str.group_beginning (g+1) with Not_found -> -1);
+ r.group_end.(g) <- (try Str.group_end (g+1) with Not_found -> -1);
+ done;
+ r
+;;
+
+let string_match ?(groups = 9) ~pat s ~pos =
+ protect
+ (fun () ->
+ if Str.string_match pat s pos then
+ Some (return_result pos groups)
+ else
+ None
+ )
+;;
+
+let string_partial_match ?(groups = 9) ~pat s ~pos =
+ protect
+ (fun () ->
+ if Str.string_partial_match pat s pos then
+ Some (return_result pos groups)
+ else
+ None
+ )
+;;
+
+let search_forward ?(groups = 9) ~pat s ~pos =
+ protect
+ (fun () ->
+ let i = Str.search_forward pat s pos in
+ i, return_result pos groups
+ )
+;;
+
+let search_backward ?(groups = 9) ~pat s ~pos =
+ protect
+ (fun () ->
+ let i = Str.search_backward pat s pos in
+ i, return_result pos groups
+ )
+;;
+
+let matched_string result s =
+ if result.match_beg < 0 or result.match_end < 0 then raise Not_found;
+ String.sub s result.match_beg (result.match_end - result.match_beg)
+;;
+
+let match_beginning result =
+ if result.match_beg < 0 then raise Not_found;
+ result.match_beg
+;;
+
+let match_end result =
+ if result.match_end < 0 then raise Not_found;
+ result.match_end
+;;
+
+let matched_group result n s =
+ if n < 0 || n >= Array.length result.group_beg then raise Not_found;
+ let gbeg = result.group_beg.(n-1) in
+ let gend = result.group_end.(n-1) in
+ if gbeg < 0 or gend < 0 then raise Not_found;
+ String.sub s gbeg (gend - gbeg)
+;;
+
+let group_beginning result n =
+ if n < 0 || n >= Array.length result.group_beg then raise Not_found;
+ let gbeg = result.group_beg.(n-1) in
+ if gbeg < 0 then raise Not_found else
+ gbeg
+;;
+
+let group_end result n =
+ if n < 0 || n >= Array.length result.group_end then raise Not_found;
+ let gend = result.group_end.(n-1) in
+ if gend < 0 then raise Not_found else
+ gend
+;;
+
+let global_replace ~pat ~templ s =
+ protect
+ (fun () ->
+ Str.global_replace pat templ s)
+;;
+
+let replace_first ~pat ~templ s =
+ protect
+ (fun () ->
+ Str.replace_first pat templ s)
+;;
+
+let global_substitute ?(groups = 9) ~pat ~subst s =
+ protect
+ (fun () ->
+ let xsubst s =
+ let r = return_result 0 groups in
+ subst r s
+ in
+ Str.global_substitute pat xsubst s)
+;;
+
+let substitute_first ?(groups = 9) ~pat ~subst s =
+ protect
+ (fun () ->
+ let xsubst s =
+ let r = return_result 0 groups in
+ subst r s
+ in
+ Str.substitute_first pat xsubst s)
+;;
+
+(* replace_matched: n/a *)
+
+let split ~sep s =
+ protect
+ (fun () ->
+ Str.split sep s)
+;;
+
+let bounded_split ~sep s ~max =
+ protect
+ (fun () ->
+ Str.bounded_split sep s max)
+;;
+
+let split_delim ~sep s =
+ protect
+ (fun () ->
+ Str.split_delim sep s)
+;;
+
+let bounded_split_delim ~sep s ~max =
+ protect
+ (fun () ->
+ Str.bounded_split_delim sep s max)
+;;
+
+let full_split ~sep s =
+ protect
+ (fun () ->
+ Str.full_split sep s)
+;;
+
+let bounded_full_split ~sep s ~max =
+ protect
+ (fun () ->
+ Str.bounded_full_split sep s max)
+;;
+
+let string_before = Str.string_before;;
+let string_after = Str.string_after;;
+let first_chars = Str.first_chars;;
+let last_chars = Str.last_chars;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/06/25 21:15:48 gerd
+ * Checked thread-safety.
+ *
+ * Revision 1.1 2000/06/25 20:48:19 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netstring_str.mli b/helm/DEVEL/pxp/netstring/netstring_str.mli
new file mode 100644
index 000000000..86d684099
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/netstring_str.mli
@@ -0,0 +1,82 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* This module is a version of Str with a thread-safe interface *)
+
+type regexp = Str.regexp;;
+type split_result = Str.split_result = Text of string | Delim of string;;
+
+type result;;
+ (* The type of matching results *)
+
+val regexp: string -> regexp
+val regexp_case_fold: string -> regexp
+val quote: string -> string
+val regexp_string: string -> regexp
+val regexp_string_case_fold: string -> regexp
+
+val string_match:
+ ?groups:int -> pat:regexp -> string -> pos:int -> result option
+val search_forward:
+ ?groups:int -> pat:regexp -> string -> pos:int -> (int * result)
+val search_backward:
+ ?groups:int -> pat:regexp -> string -> pos:int -> (int * result)
+val string_partial_match:
+ ?groups:int -> pat:regexp -> string -> pos:int -> result option
+
+(* The ~groups option specifies how many groups will be stored into
+ * 'result'. Default: 9
+ *)
+
+val matched_string : result -> string -> string
+val match_beginning : result -> int
+val match_end : result -> int
+val matched_group : result -> int -> string -> string
+val group_beginning : result -> int -> int
+val group_end : result -> int -> int
+
+val global_replace: pat:regexp -> templ:string -> string -> string
+val replace_first: pat:regexp -> templ:string -> string -> string
+val global_substitute:
+ ?groups:int ->
+ pat:regexp -> subst:(result -> string -> string) -> string -> string
+val substitute_first:
+ ?groups:int ->
+ pat:regexp -> subst:(result -> string -> string) -> string -> string
+
+(* replace_matched: not available *)
+
+val split: sep:regexp -> string -> string list
+val bounded_split: sep:regexp -> string -> max:int -> string list
+val split_delim: sep:regexp -> string -> string list
+val bounded_split_delim: sep:regexp -> string -> max:int -> string list
+val full_split: sep:regexp -> string -> split_result list
+val bounded_full_split: sep:regexp -> string -> max:int -> split_result list
+
+val string_before: string -> int -> string
+val string_after: string -> int -> string
+val first_chars: string -> len:int -> string
+val last_chars: string -> len:int -> string
+
+(* Private: *)
+
+val init_mt : (unit -> unit) -> (unit -> unit) -> unit
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/06/25 21:15:48 gerd
+ * Checked thread-safety.
+ *
+ * Revision 1.1 2000/06/25 20:48:19 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netstring_top.ml b/helm/DEVEL/pxp/netstring/netstring_top.ml
new file mode 100644
index 000000000..d25505cfe
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/netstring_top.ml
@@ -0,0 +1,34 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+let exec s =
+ let l = Lexing.from_string s in
+ let ph = !Toploop.parse_toplevel_phrase l in
+ assert(Toploop.execute_phrase false Format.err_formatter ph)
+;;
+
+(* Install the printers: *)
+
+exec "#install_printer Neturl.print_url;;";;
+exec "#install_printer Netbuffer.print_buffer;;";;
+exec "#install_printer Netstream.print_stream;;";;
+exec "#install_printer Cgi.print_argument;;";;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/06/25 22:34:43 gerd
+ * Added labels to arguments.
+ *
+ * Revision 1.1 2000/06/24 20:20:58 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netstring_top.mli b/helm/DEVEL/pxp/netstring/netstring_top.mli
new file mode 100644
index 000000000..1d5ac72eb
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/netstring_top.mli
@@ -0,0 +1,21 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* You may load this module into the toploop in order to install
+ * the printers for the various opaque data types of Netstring.
+ *)
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/06/25 22:53:45 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/neturl.ml b/helm/DEVEL/pxp/netstring/neturl.ml
new file mode 100644
index 000000000..f597b0c1d
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/neturl.ml
@@ -0,0 +1,1302 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+exception Malformed_URL
+
+type url_syntax_option =
+ Url_part_not_recognized
+ | Url_part_allowed
+ | Url_part_required
+
+
+type url_syntax =
+ { url_enable_scheme : url_syntax_option;
+ url_enable_user : url_syntax_option;
+ url_enable_password : url_syntax_option;
+ url_enable_host : url_syntax_option;
+ url_enable_port : url_syntax_option;
+ url_enable_path : url_syntax_option;
+ url_enable_param : url_syntax_option;
+ url_enable_query : url_syntax_option;
+ url_enable_fragment : url_syntax_option;
+ url_enable_other : url_syntax_option;
+ url_accepts_8bits : bool;
+ url_is_valid : url -> bool;
+ }
+
+and url =
+ {
+ url_syntax : url_syntax;
+ mutable url_validity : bool;
+ url_scheme : string option;
+ url_user : string option;
+ url_password : string option;
+ url_host : string option;
+ url_port : int option;
+ url_path : string list;
+ url_param : string list;
+ url_query : string option;
+ url_fragment : string option;
+ url_other : string option;
+ }
+;;
+
+
+type char_category =
+ Accepted
+ | Rejected
+ | Separator
+
+
+
+let scan_url_part s k_from k_to cats accept_8bits =
+ (* Scans the longest word of accepted characters from position 'k_from'
+ * in 's' until at most position 'k_to'. The character following the
+ * word (if any) must be a separator character.
+ * On success, the function returns the position of the last character
+ * of the word + 1.
+ * If there is any rejected character before the separator or the end
+ * of the string (i.e. position 'k_to') is reached, the exception
+ * Malformed_URL is raised.
+ * Furthermore, if the character '%' is accepted it is checked whether
+ * two hexadecimal digits follow (which must be accepted, too). If this
+ * is not true, the exception Malformed_URL is raised, too.
+ * 'cats': contains for every character code (0 to 255) the category
+ * of the character.
+ *)
+ let check_hex c =
+ if cats.( Char.code c ) <> Accepted then raise Malformed_URL;
+ match c with
+ ('0'..'9'|'A'..'F'|'a'..'f') -> ()
+ | _ -> raise Malformed_URL
+ in
+
+ let rec scan k =
+ if k >= k_to then
+ k
+ else begin
+ let c = s.[k] in
+ let cat = cats.(Char.code c) in
+ match cat with
+ Accepted ->
+ if c = '%' then begin
+ if k+2 >= k_to then raise Malformed_URL;
+ let c1 = s.[k+1] in
+ let c2 = s.[k+2] in
+ check_hex c1;
+ check_hex c2;
+ scan (k+3)
+ end
+ else
+ scan (k+1)
+ | Separator -> k
+ | Rejected ->
+ if accept_8bits && c >= '\128'
+ then scan (k+1)
+ else raise Malformed_URL
+ end
+ in
+
+ assert (Array.length cats = 256);
+ assert (k_from >= 0);
+ assert (k_from <= k_to);
+ assert (k_to <= String.length s);
+
+ scan k_from
+;;
+
+
+(* Create a categorization: *)
+
+let lalpha = [ 'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm';
+ 'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z' ]
+
+let ualpha = [ 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M';
+ 'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z' ]
+
+let digit = [ '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9' ]
+
+let safe = [ '$'; '-'; '_'; '.'; '+' ]
+
+let extra = [ '!'; '*'; '\''; '('; ')'; ',' ]
+
+let make_cats accepted separators =
+ (* create a categorization:
+ * - All characters listed in 'separators' are separators.
+ * - All characters listed in 'accepted' and which do not occur in
+ * 'separators' are accepted characters.
+ * - All other characters are rejected.
+ *)
+ let cats = Array.make 256 Rejected in
+ List.iter
+ (fun c ->
+ cats.(Char.code c) <- Accepted
+ )
+ accepted;
+
+ List.iter
+ (fun c ->
+ cats.(Char.code c) <- Separator
+ )
+ separators;
+ cats
+;;
+
+
+let scheme_cats =
+ make_cats (lalpha @ ualpha @ ['+'; '-'; '.']) [':'] ;;
+
+ (* scheme_cats: character categorization to _extract_ the URL scheme *)
+
+
+let login_cats =
+ make_cats
+ (lalpha @ ualpha @ digit @ safe @ extra @ [';'; '?'; '&'; '='; '%'])
+ [':'; '@'; '/'; '#' ]
+;;
+
+ (* login_cats: character categorization to _extract_ user name, password,
+ * host name, and port.
+ *)
+
+let host_cats =
+ make_cats
+ (lalpha @ ualpha @ digit @ ['.'; '-'])
+ []
+;;
+
+ (* host_cats: character categorization to _check_ whether the host name
+ * is formed only by legal characters.
+ * Especially '%' is not allowed here!
+ *)
+
+let port_cats =
+ make_cats
+ digit
+ []
+;;
+
+ (* port_cats: character categorization to _check_ whether the port number
+ * is formed only by legal characters.
+ * Especially '%' is not allowed here!
+ *)
+
+let path_cats separators =
+ make_cats
+ (lalpha @ ualpha @ digit @ safe @ extra @
+ ['?'; ':'; '@'; '&'; '='; ';'; '%'; '/'; '~'])
+ separators
+;;
+
+
+let separators_from_syntax syn =
+ let include_if syn_option clist =
+ if syn_option <> Url_part_not_recognized then
+ clist
+ else
+ []
+ in
+ (include_if syn.url_enable_param [';']) @
+ (include_if syn.url_enable_query ['?']) @
+ (include_if syn.url_enable_fragment ['#'])
+;;
+
+
+let path_cats_from_syntax syn extraseps =
+ let separators = separators_from_syntax syn in
+ path_cats (separators @ extraseps)
+;;
+
+(* path_cats_from_syntax:
+ * Computes a character categorization to extract the path from an URL.
+ * This depends on the syntax because the list of possible separators
+ * contains the characters that may begin the next URL clause.
+ *
+ * Notes:
+ * - The '#' is rejected unless fragments are enabled.
+ * - The '~' is accepted although this violates RFC 1738.
+ *)
+
+
+let other_cats_from_syntax syn =
+ let include_if syn_option clist =
+ if syn_option <> Url_part_not_recognized then
+ clist
+ else
+ []
+ in
+ let separators =
+ (include_if syn.url_enable_param [';']) @
+ (include_if syn.url_enable_query ['?']) @
+ (include_if syn.url_enable_fragment ['#'])
+ in
+
+ make_cats
+ (lalpha @ ualpha @ digit @ safe @ extra @
+ (separators @ ['?'; ':'; '@'; '&'; '='; ';'; '%'; '/']))
+ []
+;;
+
+ (* other_cats: character categorization to extract or check the
+ * "other" part of the URL.
+ *)
+
+
+
+let extract_url_scheme s =
+ let l = String.length s in
+ let k = scan_url_part s 0 l scheme_cats false in
+ (* or raise Malformed_URL *)
+ if k = l then raise Malformed_URL;
+ assert (s.[k] = ':');
+ String.lowercase(String.sub s 0 k)
+;;
+
+
+let ( => ) a b = not a or b;; (* implication *)
+
+let ( <=> ) (a:bool) b = ( a = b );; (* equivalence *)
+
+let url_syntax_is_valid syn =
+ let recognized x = x <> Url_part_not_recognized in
+ let not_recognized x = x = Url_part_not_recognized in
+ (recognized syn.url_enable_password => recognized syn.url_enable_user) &
+ (recognized syn.url_enable_port => recognized syn.url_enable_host) &
+ (recognized syn.url_enable_user => recognized syn.url_enable_host) &
+ not ( (recognized syn.url_enable_user ||
+ recognized syn.url_enable_password ||
+ recognized syn.url_enable_host ||
+ recognized syn.url_enable_port ||
+ recognized syn.url_enable_path) &&
+ (recognized syn.url_enable_other))
+;;
+
+
+let partial_url_syntax syn =
+ let weaken =
+ function
+ Url_part_not_recognized -> Url_part_not_recognized
+ | Url_part_allowed -> Url_part_allowed
+ | Url_part_required -> Url_part_allowed
+ in
+ { url_enable_scheme = weaken syn.url_enable_scheme;
+ url_enable_user = weaken syn.url_enable_user;
+ url_enable_password = weaken syn.url_enable_password;
+ url_enable_host = weaken syn.url_enable_host;
+ url_enable_port = weaken syn.url_enable_port;
+ url_enable_path = weaken syn.url_enable_path;
+ url_enable_param = weaken syn.url_enable_param;
+ url_enable_query = weaken syn.url_enable_query;
+ url_enable_fragment = weaken syn.url_enable_fragment;
+ url_enable_other = weaken syn.url_enable_other;
+ url_accepts_8bits = syn.url_accepts_8bits;
+ url_is_valid = syn.url_is_valid;
+ }
+;;
+
+
+
+let file_url_syntax =
+ { url_enable_scheme = Url_part_required;
+ url_enable_user = Url_part_not_recognized;
+ url_enable_password = Url_part_not_recognized;
+ url_enable_host = Url_part_allowed;
+ url_enable_port = Url_part_not_recognized;
+ url_enable_path = Url_part_required;
+ url_enable_param = Url_part_not_recognized;
+ url_enable_query = Url_part_not_recognized;
+ url_enable_fragment = Url_part_not_recognized;
+ url_enable_other = Url_part_not_recognized;
+ url_accepts_8bits = false;
+ url_is_valid = (fun _ -> true);
+ }
+;;
+
+
+let ftp_url_syntax =
+ { url_enable_scheme = Url_part_required;
+ url_enable_user = Url_part_allowed;
+ url_enable_password = Url_part_allowed;
+ url_enable_host = Url_part_required;
+ url_enable_port = Url_part_allowed;
+ url_enable_path = Url_part_allowed;
+ url_enable_param = Url_part_allowed;
+ url_enable_query = Url_part_not_recognized;
+ url_enable_fragment = Url_part_not_recognized;
+ url_enable_other = Url_part_not_recognized;
+ url_accepts_8bits = false;
+ url_is_valid = (fun _ -> true);
+ }
+;;
+
+
+let http_url_syntax =
+ { url_enable_scheme = Url_part_required;
+ url_enable_user = Url_part_allowed;
+ url_enable_password = Url_part_allowed;
+ url_enable_host = Url_part_required;
+ url_enable_port = Url_part_allowed;
+ url_enable_path = Url_part_allowed;
+ url_enable_param = Url_part_not_recognized;
+ url_enable_query = Url_part_allowed;
+ url_enable_fragment = Url_part_not_recognized;
+ url_enable_other = Url_part_not_recognized;
+ url_accepts_8bits = false;
+ url_is_valid = (fun _ -> true);
+ }
+;;
+
+
+let mailto_url_syntax =
+ { url_enable_scheme = Url_part_required;
+ url_enable_user = Url_part_not_recognized;
+ url_enable_password = Url_part_not_recognized;
+ url_enable_host = Url_part_not_recognized;
+ url_enable_port = Url_part_not_recognized;
+ url_enable_path = Url_part_not_recognized;
+ url_enable_param = Url_part_not_recognized;
+ url_enable_query = Url_part_not_recognized;
+ url_enable_fragment = Url_part_not_recognized;
+ url_enable_other = Url_part_required;
+ url_accepts_8bits = false;
+ url_is_valid = (fun _ -> true);
+ }
+;;
+
+
+let null_url_syntax =
+ { url_enable_scheme = Url_part_not_recognized;
+ url_enable_user = Url_part_not_recognized;
+ url_enable_password = Url_part_not_recognized;
+ url_enable_host = Url_part_not_recognized;
+ url_enable_port = Url_part_not_recognized;
+ url_enable_path = Url_part_not_recognized;
+ url_enable_param = Url_part_not_recognized;
+ url_enable_query = Url_part_not_recognized;
+ url_enable_fragment = Url_part_not_recognized;
+ url_enable_other = Url_part_not_recognized;
+ url_accepts_8bits = false;
+ url_is_valid = (fun _ -> true);
+ }
+;;
+
+
+let ip_url_syntax =
+ { url_enable_scheme = Url_part_allowed;
+ url_enable_user = Url_part_allowed;
+ url_enable_password = Url_part_allowed;
+ url_enable_host = Url_part_allowed;
+ url_enable_port = Url_part_allowed;
+ url_enable_path = Url_part_allowed;
+ url_enable_param = Url_part_allowed;
+ url_enable_query = Url_part_allowed;
+ url_enable_fragment = Url_part_allowed;
+ url_enable_other = Url_part_not_recognized;
+ url_accepts_8bits = false;
+ url_is_valid = (fun _ -> true);
+ }
+;;
+
+
+let common_url_syntax =
+ let h = Hashtbl.create 10 in
+ Hashtbl.add h "file" file_url_syntax;
+ Hashtbl.add h "ftp" ftp_url_syntax;
+ Hashtbl.add h "http" http_url_syntax;
+ Hashtbl.add h "mailto" mailto_url_syntax;
+ h
+;;
+
+
+let url_conforms_to_syntax url =
+ let recognized x = x <> Url_part_not_recognized in
+ let required x = x = Url_part_required in
+ let present x = x <> None in
+ let syn = url.url_syntax in
+ (present url.url_scheme => recognized syn.url_enable_scheme) &
+ (present url.url_user => recognized syn.url_enable_user) &
+ (present url.url_password => recognized syn.url_enable_password) &
+ (present url.url_host => recognized syn.url_enable_host) &
+ (present url.url_port => recognized syn.url_enable_port) &
+ ((url.url_path <> []) => recognized syn.url_enable_path) &
+ ((url.url_param <> []) => recognized syn.url_enable_param) &
+ (present url.url_query => recognized syn.url_enable_query) &
+ (present url.url_fragment => recognized syn.url_enable_fragment) &
+ (present url.url_other => recognized syn.url_enable_other) &
+ (required syn.url_enable_scheme => present url.url_scheme) &
+ (required syn.url_enable_user => present url.url_user) &
+ (required syn.url_enable_password => present url.url_password) &
+ (required syn.url_enable_host => present url.url_host) &
+ (required syn.url_enable_port => present url.url_port) &
+ (required syn.url_enable_path => (url.url_path <> [])) &
+ (required syn.url_enable_param => (url.url_param <> [])) &
+ (required syn.url_enable_query => present url.url_query) &
+ (required syn.url_enable_fragment => present url.url_fragment) &
+ (required syn.url_enable_other => present url.url_other) &
+ (url.url_validity or syn.url_is_valid url)
+;;
+
+
+let url_syntax_of_url url = url.url_syntax
+;;
+
+
+let modify_url
+ ?syntax
+ ?(encoded = false)
+ ?scheme
+ ?user
+ ?password
+ ?host
+ ?port
+ ?path
+ ?param
+ ?query
+ ?fragment
+ ?other
+ url
+ =
+
+ let encode = Netencoding.Url.encode in
+ let enc x =
+ if encoded then
+ x
+ else
+ match x with
+ None -> None
+ | Some x' -> Some (encode x')
+ in
+ let enc_list l =
+ if encoded then
+ l
+ else
+ List.map encode l
+ in
+
+ let new_syntax =
+ match syntax with
+ None -> url.url_syntax
+ | Some syn -> syn
+ in
+
+ let check_string s_opt cats =
+ match s_opt with
+ None -> ()
+ | Some s ->
+ let l = String.length s in
+ let k = scan_url_part s 0 l cats new_syntax.url_accepts_8bits in
+ (* or raise Malformed_URL *)
+ if k <> l then raise Malformed_URL
+ in
+
+ let check_string_list p cats sep =
+ List.iter
+ (fun p_component ->
+ let l = String.length p_component in
+ let k =
+ scan_url_part p_component 0 l cats new_syntax.url_accepts_8bits in
+ (* or raise Malformed_URL *)
+ if k <> l then raise Malformed_URL;
+ if String.contains p_component sep then raise Malformed_URL;
+ )
+ p
+ in
+
+ (* Create the modified record: *)
+ let url' =
+ {
+ url_syntax = new_syntax;
+ url_validity = false;
+ url_scheme = if scheme = None then url.url_scheme else scheme;
+ url_user = if user = None then url.url_user else enc user;
+ url_password = if password = None then url.url_password else enc password;
+ url_host = if host = None then url.url_host else host;
+ url_port = if port = None then url.url_port else port;
+ url_path = (match path with
+ None -> url.url_path
+ | Some p -> enc_list p);
+ url_param = (match param with
+ None -> url.url_param
+ | Some p -> enc_list p);
+ url_query = if query = None then url.url_query else enc query;
+ url_fragment = if fragment = None then url.url_fragment else enc fragment;
+ url_other = if other = None then url.url_other else enc other;
+ }
+ in
+ (* Check whether the URL conforms to the syntax:
+ *)
+ if not (url_conforms_to_syntax url') then raise Malformed_URL;
+ if url'.url_password <> None && url'.url_user = None then raise Malformed_URL;
+ if url'.url_user <> None && url'.url_host = None then raise Malformed_URL;
+ if url'.url_port <> None && url'.url_host = None then raise Malformed_URL;
+ (* Check every part: *)
+ check_string url'.url_scheme scheme_cats;
+ check_string url'.url_user login_cats;
+ check_string url'.url_password login_cats;
+ check_string url'.url_host host_cats;
+ (match url'.url_port with
+ None -> ()
+ | Some p -> if p < 0 || p > 65535 then raise Malformed_URL
+ );
+ let path_cats = path_cats_from_syntax new_syntax [] in
+ let other_cats = other_cats_from_syntax new_syntax in
+ check_string url'.url_query path_cats;
+ check_string url'.url_fragment path_cats;
+ check_string url'.url_other other_cats;
+ (* Check the lists: *)
+ check_string_list url'.url_param path_cats ';';
+ check_string_list url'.url_path path_cats '/';
+ (* Further path checks: *)
+ begin match url'.url_path with
+ [] ->
+ (* The path is empty: There must not be a 'param' or 'query' *)
+ if url'.url_host <> None then begin
+ if url'.url_param <> [] then raise Malformed_URL;
+ if url'.url_query <> None then raise Malformed_URL;
+ end
+ | ["";""] ->
+ (* This is illegal. *)
+ raise Malformed_URL;
+ | "" :: p' ->
+ (* The path is absolute: always ok *)
+ ()
+ | _ ->
+ (* The path is relative: there must not be a host *)
+ if url'.url_host <> None then raise Malformed_URL;
+ end;
+ begin match url'.url_path with
+ _ :: rest -> (* "//" ambiguity *)
+ begin match List.rev rest with
+ _ :: rest' ->
+ if List.exists (fun p -> p = "") rest' then
+ raise Malformed_URL;
+ | [] ->
+ ()
+ end
+ | [] ->
+ ()
+ end;
+ (* Cache that the URL is valid: *)
+ url'.url_validity <- true;
+
+ url'
+;;
+
+
+let null_url =
+ {
+ url_syntax = null_url_syntax;
+ url_validity = true;
+ url_scheme = None;
+ url_user = None;
+ url_password = None;
+ url_host = None;
+ url_port = None;
+ url_path = [];
+ url_param = [];
+ url_query = None;
+ url_fragment = None;
+ url_other = None;
+ }
+;;
+
+
+let make_url
+ ?(encoded = false)
+ ?scheme
+ ?user
+ ?password
+ ?host
+ ?port
+ ?path
+ ?param
+ ?query
+ ?fragment
+ ?other
+ url_syntax
+ =
+
+ if not (url_syntax_is_valid url_syntax) then
+ invalid_arg "Neturl.make_url";
+
+ modify_url
+ ~encoded:encoded
+ ~syntax:url_syntax
+ ?scheme:scheme
+ ?user:user
+ ?password:password
+ ?host:host
+ ?port:port
+ ?path:path
+ ?param:param
+ ?query:query
+ ?fragment:fragment
+ ?other:other
+ null_url
+;;
+
+
+let remove_from_url
+ ?(scheme = false)
+ ?(user = false)
+ ?(password = false)
+ ?(host = false)
+ ?(port = false)
+ ?(path = false)
+ ?(param = false)
+ ?(query = false)
+ ?(fragment = false)
+ ?(other = false)
+ url
+ =
+
+ make_url
+ ~encoded: true
+ ?scheme: (if scheme then None else url.url_scheme)
+ ?user: (if user then None else url.url_user)
+ ?password: (if password then None else url.url_password)
+ ?host: (if host then None else url.url_host)
+ ?port: (if port then None else url.url_port)
+ ?path: (if path then None else Some url.url_path)
+ ?param: (if param then None else Some url.url_param)
+ ?query: (if query then None else url.url_query)
+ ?fragment: (if fragment then None else url.url_fragment)
+ ?other: (if other then None else url.url_other)
+ url.url_syntax
+;;
+
+
+let default_url
+ ?(encoded = false)
+ ?scheme
+ ?user
+ ?password
+ ?host
+ ?port
+ ?(path = [])
+ ?(param = [])
+ ?query
+ ?fragment
+ ?other
+ url
+ =
+
+ let encode = Netencoding.Url.encode in
+
+ let enc x =
+ if encoded then
+ x
+ else
+ match x with
+ None -> None
+ | Some x' -> Some (encode x')
+ in
+
+ let enc_list l =
+ if encoded then
+ l
+ else
+ List.map encode l
+ in
+
+ let pass_if_missing current arg =
+ match current with
+ None -> arg
+ | _ -> current
+ in
+
+ make_url
+ ~encoded: true
+ ?scheme: (pass_if_missing url.url_scheme scheme)
+ ?user: (pass_if_missing url.url_user (enc user))
+ ?password: (pass_if_missing url.url_password (enc password))
+ ?host: (pass_if_missing url.url_host host)
+ ?port: (pass_if_missing url.url_port port)
+ ~path: (if url.url_path = [] then enc_list path else url.url_path)
+ ~param: (if url.url_param = [] then enc_list param else url.url_param)
+ ?query: (pass_if_missing url.url_query (enc query))
+ ?fragment: (pass_if_missing url.url_fragment (enc fragment))
+ ?other: (pass_if_missing url.url_other (enc other))
+ url.url_syntax
+;;
+
+
+let undefault_url
+ ?scheme
+ ?user
+ ?password
+ ?host
+ ?port
+ ?path
+ ?param
+ ?query
+ ?fragment
+ ?other
+ url
+ =
+
+ let remove_if_matching current arg =
+ match current with
+ None -> None
+ | Some x ->
+ (match arg with
+ None -> current
+ | Some x' ->
+ if x=x' then
+ None
+ else
+ current)
+ in
+
+ make_url
+ ~encoded: true
+ ?scheme: (remove_if_matching url.url_scheme scheme)
+ ?user: (remove_if_matching url.url_user user)
+ ?password: (remove_if_matching url.url_password password)
+ ?host: (remove_if_matching url.url_host host)
+ ?port: (remove_if_matching url.url_port port)
+ ~path: (match path with
+ None -> url.url_path
+ | Some x ->
+ if x = url.url_path then
+ []
+ else
+ url.url_path)
+ ~param: (match param with
+ None -> url.url_param
+ | Some x ->
+ if x = url.url_param then
+ []
+ else
+ url.url_param)
+ ?query: (remove_if_matching url.url_query query)
+ ?fragment: (remove_if_matching url.url_fragment fragment)
+ ?other: (remove_if_matching url.url_other other)
+ url.url_syntax
+;;
+
+
+let url_provides
+ ?(scheme = false)
+ ?(user = false)
+ ?(password = false)
+ ?(host = false)
+ ?(port = false)
+ ?(path = false)
+ ?(param = false)
+ ?(query = false)
+ ?(fragment = false)
+ ?(other = false)
+ url
+ =
+
+ (scheme => (url.url_scheme <> None)) &
+ (user => (url.url_user <> None)) &
+ (password => (url.url_password <> None)) &
+ (host => (url.url_host <> None)) &
+ (port => (url.url_port <> None)) &
+ (path => (url.url_path <> [])) &
+ (param => (url.url_param <> [])) &
+ (query => (url.url_query <> None)) &
+ (fragment => (url.url_fragment <> None)) &
+ (other => (url.url_other <> None))
+;;
+
+
+let return_if value =
+ match value with
+ None -> raise Not_found
+ | Some x -> x
+;;
+
+
+let decode_if want_encoded value =
+ let value' = return_if value in
+ if want_encoded then
+ value'
+ else
+ Netencoding.Url.decode value' (* WARNING: not thread-safe! *)
+;;
+
+
+let decode_path_if want_encoded value =
+ if want_encoded then
+ value
+ else
+ List.map Netencoding.Url.decode value (* WARNING: not thread-safe! *)
+;;
+
+
+let url_scheme url = return_if url.url_scheme;;
+let url_user ?(encoded=false) url = decode_if encoded url.url_user;;
+let url_password ?(encoded=false) url = decode_if encoded url.url_password;;
+let url_host url = return_if url.url_host;;
+let url_port url = return_if url.url_port;;
+let url_path ?(encoded=false) url = decode_path_if encoded url.url_path;;
+let url_param ?(encoded=false) url = decode_path_if encoded url.url_param;;
+let url_query ?(encoded=false) url = decode_if encoded url.url_query;;
+let url_fragment ?(encoded=false) url = decode_if encoded url.url_fragment;;
+let url_other ?(encoded=false) url = decode_if encoded url.url_other;;
+
+
+let string_of_url url =
+ if not (url.url_validity) then
+ failwith "Neturl.string_of_url: URL not flagged as valid";
+ (match url.url_scheme with
+ None -> ""
+ | Some s -> s ^ ":") ^
+ (match url.url_host with
+ None -> ""
+ | Some host ->
+ "//" ^
+ (match url.url_user with
+ None -> ""
+ | Some user ->
+ user ^
+ (match url.url_password with
+ None -> ""
+ | Some password ->
+ ":" ^ password
+ ) ^
+ "@") ^
+ host ^
+ (match url.url_port with
+ None -> ""
+ | Some port ->
+ ":" ^ string_of_int port)) ^
+ (match url.url_path with
+ | [""] ->
+ "/"
+ | x :: p when url.url_scheme = None &&
+ url.url_host = None &&
+ String.contains x ':'
+ ->
+ (* Really a special case: The colon contained in 'x' may cause
+ * that a prefix of 'x' is interpreted as URL scheme. In this
+ * case, "./" is prepended (as recommended in RFC 1808, 5.3).
+ *)
+ "./"
+ | _ ->
+ ""
+ ) ^
+ String.concat "/" url.url_path ^
+ (match url.url_other with
+ None -> ""
+ | Some other ->
+ other) ^
+ String.concat "" (List.map (fun s -> ";" ^ s) url.url_param) ^
+ (match url.url_query with
+ None -> ""
+ | Some query ->
+ "?" ^ query) ^
+ (match url.url_fragment with
+ None -> ""
+ | Some fragment ->
+ "#" ^ fragment)
+;;
+
+
+let url_of_string url_syntax s =
+ let l = String.length s in
+ let recognized x = x <> Url_part_not_recognized in
+
+ let rec collect_words terminators eof_char cats k =
+ (* Collect words as recognized by 'cats', starting at position 'k' in
+ * 's'. Collection stops if one the characters listed in 'terminators'
+ * is found. If the end of the string is reached, it is treated as
+ * 'eof_char'.
+ *)
+ let k' = scan_url_part s k l cats url_syntax.url_accepts_8bits in
+ (* or raise Malformed_URL *)
+ let word, sep =
+ String.sub s k (k'-k), (if k' None, 0
+ else
+ None, 0
+ in
+
+ (* If there is a "//", a host will follow: *)
+ let host, port, user, password, k2 =
+ if recognized url_syntax.url_enable_host &&
+ k1 + 2 <= l && s.[k1]='/' && s.[k1+1]='/' then begin
+
+ let word_sep_list, k' = collect_words [ '/'; '#' ] '/' login_cats (k1+2)
+ in
+ (* or raise Malformed_URL *)
+
+ let int x =
+ try int_of_string x with _ -> raise Malformed_URL in
+
+ match word_sep_list with
+ [ host, ('/'|'#') ] ->
+ Some host, None, None, None, k'
+ | [ host, ':'; port, ('/'|'#') ] ->
+ Some host, Some (int port), None, None, k'
+ | [ user, '@'; host, ('/'|'#') ] ->
+ Some host, None, Some user, None, k'
+ | [ user, '@'; host, ':'; port, ('/'|'#') ] ->
+ Some host, Some (int port), Some user, None, k'
+ | [ user, ':'; password, '@'; host, ('/'|'#') ] ->
+ Some host, None, Some user, Some password, k'
+ | [ user, ':'; password, '@'; host, ':'; port, ('/'|'#') ] ->
+ Some host, Some (int port), Some user, Some password, k'
+ | _ ->
+ raise Malformed_URL
+ end
+ else
+ None, None, None, None, k1
+ in
+
+ let path, k3 =
+ if recognized url_syntax.url_enable_path &&
+ k2 < l (* && s.[k2]='/' *)
+ then begin
+ let cats = path_cats_from_syntax url_syntax [ '/' ] in
+ let seps = separators_from_syntax url_syntax in
+
+ (* Note: '>' is not allowed within URLs; because of this we can use
+ * it as end-of-string character.
+ *)
+
+ let word_sep_list, k' = collect_words ('>'::seps) '>' cats k2 in
+ (* or raise Malformed_URL *)
+ match word_sep_list with
+ [ "", '/'; "", _ ] ->
+ [ "" ], k'
+ | [ "", _ ] ->
+ [], k'
+ | _ ->
+ List.map fst word_sep_list, k'
+ end
+ else begin
+ (* If there is a single '/': skip it *)
+ if not (recognized url_syntax.url_enable_other) &&
+ k2 < l && s.[k2]='/'
+ then
+ [], (k2+1)
+ else
+ [], k2
+ end
+ in
+
+ let other, k4 =
+ if recognized url_syntax.url_enable_other &&
+ k3 < l
+ then begin
+
+ let cats = other_cats_from_syntax url_syntax in
+
+ (* Note: '>' is not allowed within URLs; because of this we can use
+ * it as end-of-string character.
+ *)
+
+ let word_sep_list, k' = collect_words ['>';'#'] '>' cats k3 in
+ (* or raise Malformed_URL *)
+
+ match word_sep_list with
+ [ other, _ ] -> Some other, k'
+ | _ -> assert false
+ end
+ else
+ None, k3
+ in
+
+ let param, k5 =
+ if recognized url_syntax.url_enable_param &&
+ k4 < l && s.[k4]=';'
+ then begin
+ let cats = path_cats_from_syntax url_syntax [] in
+ let seps = separators_from_syntax url_syntax in
+ let seps' = List.filter (fun c -> c <> ';') seps in
+
+ (* Note: '>' is not allowed within URLs; because of this we can use
+ * it as end-of-string character.
+ *)
+
+ let word_sep_list, k' = collect_words ('>'::seps') '>' cats (k4+1) in
+ (* or raise Malformed_URL *)
+
+ List.map fst word_sep_list, k'
+ end
+ else
+ [], k4
+ in
+
+ let query, k6 =
+ if recognized url_syntax.url_enable_query &&
+ k5 < l && s.[k5]='?'
+ then begin
+ let cats = path_cats_from_syntax url_syntax [] in
+ let seps = separators_from_syntax url_syntax in
+
+ (* Note: '>' is not allowed within URLs; because of this we can use
+ * it as end-of-string character.
+ *)
+
+ let word_sep_list, k' = collect_words ('>'::seps) '>' cats (k5+1) in
+ (* or raise Malformed_URL *)
+
+ match word_sep_list with
+ [ query, _ ] -> Some query, k'
+ | _ -> assert false
+ end
+ else
+ None, k5
+ in
+
+ let fragment, k7 =
+ if recognized url_syntax.url_enable_fragment &&
+ k6 < l && s.[k6]='#'
+ then begin
+ let cats = path_cats_from_syntax url_syntax [] in
+ let seps = separators_from_syntax url_syntax in
+
+ (* Note: '>' is not allowed within URLs; because of this we can use
+ * it as end-of-string character.
+ *)
+
+ let word_sep_list, k' = collect_words ('>'::seps) '>' cats (k6+1) in
+ (* or raise Malformed_URL *)
+
+ match word_sep_list with
+ [ fragment, _ ] -> Some fragment, k'
+ | _ -> assert false
+ end
+ else
+ None, k6
+ in
+
+ if k7 <> l then raise Malformed_URL;
+
+ make_url
+ ~encoded:true
+ ?scheme:scheme
+ ?user:user
+ ?password:password
+ ?host:host
+ ?port:port
+ ~path:path
+ ~param:param
+ ?query:query
+ ?fragment:fragment
+ ?other:other
+ url_syntax
+;;
+
+
+let split_path s =
+ let l = String.length s in
+ let rec collect_words k =
+ let k' =
+ try
+ String.index_from s k '/'
+ with
+ Not_found -> l
+ in
+ let word = String.sub s k (k'-k) in
+ if k' >= l then
+ [word]
+ else
+ word :: collect_words (k'+1)
+ in
+ match collect_words 0 with
+ [ "" ] -> []
+ | [ "";"" ] -> [ "" ]
+ | other -> other
+;;
+
+
+let join_path l =
+ match l with
+ [ "" ] -> "/"
+ | _ -> String.concat "/" l;;
+
+
+let norm_path l =
+
+ let rec remove_slash_slash l first =
+ match l with
+ | [ "" ] ->
+ [ "" ]
+ | [ ""; "" ] when first ->
+ [ "" ]
+ | "" :: l' when not first ->
+ remove_slash_slash l' false
+ | x :: l' ->
+ x :: remove_slash_slash l' false
+ | [] ->
+ []
+ in
+
+ let rec remove_dot l first =
+ match l with
+ | ([ "." ] | ["."; ""]) ->
+ if first then [] else [ "" ]
+ | "." :: x :: l' ->
+ remove_dot (x :: l') false
+ | x :: l' ->
+ x :: remove_dot l' false
+ | [] ->
+ []
+ in
+
+ let rec remove_dot_dot_once l first =
+ match l with
+ x :: ".." :: [] when x <> "" && x <> ".." && not first ->
+ [ "" ]
+ | x :: ".." :: l' when x <> "" && x <> ".." ->
+ l'
+ | x :: l' ->
+ x :: remove_dot_dot_once l' false
+ | [] ->
+ raise Not_found
+ in
+
+ let rec remove_dot_dot l =
+ try
+ let l' = remove_dot_dot_once l true in
+ remove_dot_dot l'
+ with
+ Not_found -> l
+ in
+
+ let l' = remove_dot_dot (remove_dot (remove_slash_slash l true) true) in
+ match l' with
+ [".."] -> [".."; ""]
+ | ["";""] -> [ "" ]
+ | _ -> l'
+;;
+
+
+let apply_relative_url baseurl relurl =
+ if not (baseurl.url_validity) or not (relurl.url_validity) then
+ failwith "Neturl.apply_relative_url: URL not flagged as valid";
+
+ if relurl.url_scheme <> None then
+ modify_url
+ ~syntax:baseurl.url_syntax (* inherit syntax *)
+ relurl
+ else
+ if relurl.url_host <> None then
+ modify_url
+ ~syntax:baseurl.url_syntax (* inherit syntax and scheme *)
+ ?scheme:baseurl.url_scheme
+ relurl
+ else
+ match relurl.url_path with
+ "" :: other ->
+ (* An absolute path *)
+ modify_url
+ ~syntax:baseurl.url_syntax (* inherit syntax, scheme, and *)
+ ~encoded:true
+ ?scheme:baseurl.url_scheme (* login info *)
+ ?host:baseurl.url_host
+ ?port:baseurl.url_port
+ ?user:baseurl.url_user
+ ?password:baseurl.url_password
+ relurl
+ | [] ->
+ (* Empty: Inherit also path, params, query, and fragment *)
+ let new_params, new_query, new_fragment =
+ match relurl.url_param, relurl.url_query, relurl.url_fragment
+ with
+ [], None, None ->
+ (* Inherit all three *)
+ baseurl.url_param, baseurl.url_query, baseurl.url_fragment
+ | [], None, f ->
+ (* Inherit params and query *)
+ baseurl.url_param, baseurl.url_query, f
+ | [], q, f ->
+ (* Inherit params *)
+ baseurl.url_param, q, f
+ | p, q, f ->
+ (* Inherit none of them *)
+ p, q, f
+ in
+ modify_url
+ ~syntax:baseurl.url_syntax
+ ~encoded:true
+ ?scheme:baseurl.url_scheme
+ ?host:baseurl.url_host
+ ?port:baseurl.url_port
+ ?user:baseurl.url_user
+ ?password:baseurl.url_password
+ ~path:baseurl.url_path
+ ~param:new_params
+ ?query:new_query
+ ?fragment:new_fragment
+ relurl
+ | relpath ->
+ (* A relative path *)
+ let rec change_path basepath =
+ match basepath with
+ | [] ->
+ relpath
+ | [ x ] ->
+ relpath
+ | x :: basepath' ->
+ x :: change_path basepath'
+ in
+ let new_path = norm_path (change_path baseurl.url_path) in
+ modify_url
+ ~syntax:baseurl.url_syntax (* inherit syntax, scheme, and *)
+ ~encoded:true
+ ?scheme:baseurl.url_scheme (* login info *)
+ ?host:baseurl.url_host
+ ?port:baseurl.url_port
+ ?user:baseurl.url_user
+ ?password:baseurl.url_password
+ ~path:new_path (* and change path *)
+ relurl
+
+;;
+
+
+let print_url url =
+ Format.print_string ("")
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.4 2000/07/04 21:50:51 gerd
+ * Fixed typo.
+ *
+ * Revision 1.3 2000/06/26 22:57:49 gerd
+ * Change: The record 'url_syntax' has an additional component
+ * 'url_accepts_8bits'. Setting this option to 'true' causes that
+ * the bytes >= 0x80 are no longer rejected.
+ *
+ * Revision 1.2 2000/06/25 19:39:48 gerd
+ * Lots of Bugfixes.
+ *
+ * Revision 1.1 2000/06/24 20:19:59 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/neturl.mli b/helm/DEVEL/pxp/netstring/neturl.mli
new file mode 100644
index 000000000..988aef6c8
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/neturl.mli
@@ -0,0 +1,460 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* This module applies already O'Caml-3 features. *)
+
+(* Uniform Resource Locators (URLs):
+ *
+ * This module provides functions to parse URLs, to print URLs, to
+ * store URLs, to modify URLs, and to apply relative URLs.
+ *
+ * URLs are strings formed according to pattern (1) or (2):
+ *
+ * (1) scheme://user:password@host:port/path;params?query#fragment
+ * (2) scheme:other;params?query#fragment
+ *
+ * The word at the beginning of the URL identifies the URL scheme
+ * (such as "http" or "file"). Depending on the scheme, not all of the
+ * parts are allowed, or parts may be omitted. This module defines the
+ * type 'url_syntax' whose values describe which parts are allowed/required/
+ * not allowed for a concrete URL scheme (see below).
+ *
+ * Not all characters are allowed in a URL. Some characters are allowed,
+ * but have the special task to separate the various parts of the URL
+ * (reserved characters).
+ * However, it is possible to include even invalid or reserved characters
+ * as normal content by applying the '%'-encoding on these characters:
+ * A '%' indicates that an encoded character follows, and the character
+ * is denoted by a two-digit hexadecimal number (e.g. %2f for '/').
+ * In the following descriptions, the term "encoded string" means a string
+ * containing such %-encoded characters, and the "decoded string" means a
+ * string not containing such characters.
+ * See the module Netencoding.Url for functions encoding or decoding
+ * strings.
+ *
+ * The type 'url' describes values storing the components of a URL,
+ * and the 'url_syntax' for the URL. In general, the components are
+ * stored as encoded strings; however, not for all components the
+ * '%'-encoding is applicable.
+ * For convenience, the functions creating, modifying, and accessing
+ * URLs can handle both encoded and decoded strings. In order to
+ * avoid errors, the functions pass strings even in their decoded form.
+ *
+ * Note that there is currently no function to compare URLs. The
+ * canoncical comparison ( = ) is not applicable because the same URL
+ * may be written differently.
+ *
+ * Note that nothing is said about the character set/encoding of URLs.
+ * Some protocols and standards prefer UTF-8 as fundamental encoding
+ * and apply the '%'-encoding on top of it; i.e. the byte sequence
+ * representing a character in UTF-8 is '%'-encoded. There is no special
+ * support for this technique.
+ *
+ * For more information about URLs, see RFCs 1738 and 1808.
+ *)
+
+exception Malformed_URL
+(* Is raised by a number of functions when encountering a badly formed
+ * URL.
+ *)
+
+val extract_url_scheme : string -> string
+ (* Returns the URL scheme from the string representation of an URL.
+ * E.g. extract_url_scheme "http://host/path" = "http".
+ * The scheme name is always converted to lowercase characters.
+ * Raises Malformed_URL if the scheme name is not found.
+ *)
+
+type url_syntax_option =
+ Url_part_not_recognized
+ | Url_part_allowed
+ | Url_part_required
+
+
+type url_syntax =
+ { url_enable_scheme : url_syntax_option;
+ url_enable_user : url_syntax_option;
+ url_enable_password : url_syntax_option;
+ url_enable_host : url_syntax_option;
+ url_enable_port : url_syntax_option;
+ url_enable_path : url_syntax_option;
+ url_enable_param : url_syntax_option;
+ url_enable_query : url_syntax_option;
+ url_enable_fragment : url_syntax_option;
+ url_enable_other : url_syntax_option;
+ url_accepts_8bits : bool;
+ url_is_valid : url -> bool;
+ }
+
+and url
+;;
+
+(* Values of type 'url_syntax' describe which components of an URL are
+ * recognized, which are allowed (and optional), and which are required.
+ * Not all combinations are valid; the predicate expressed by the
+ * function 'url_syntax_is_valid' must hold.
+ * The function 'url_is_valid' is applied when a fresh URL is created
+ * and must return 'true'. This function allows it to add an arbitrary
+ * validity criterion to 'url_syntax'. (Note that the URL passed to
+ * this function is not fully working; you can safely assume that the
+ * accessor functions url_scheme etc. can be applied to it.)
+ *
+ * Switch 'url_accepts_8bit': If 'true', the bytes with code 128 to
+ * 255 are treated like alphanumeric characters; if 'false' these bytes
+ * are illegal (but it is still possible to include such byte in their
+ * encoded form: %80 to %FF).
+ *
+ * Values of type 'url' describe concrete URLs. Every URL must have
+ * a fundamental 'url_syntax', and it is only possible to create URLs
+ * conforming to the syntax. See 'make_url' for further information.
+ *)
+
+
+val url_syntax_is_valid : url_syntax -> bool
+ (* Checks whether the passed url_syntax is valid. This means:
+ *
+ * - If passwords are recognized, users (and hosts) must be recognized, too
+ * - If ports are recognized, hosts must be recognized, too
+ * - If users are recognized, hosts must be recognized, too
+ * - Either the syntax recognizes one of the phrases
+ * { user, password, host, port, path }, or the syntax recognized
+ * the phrase 'other'.
+ *)
+
+
+val partial_url_syntax : url_syntax -> url_syntax
+ (* Transforms the syntax into another syntax where all required parts are
+ * changed into optional parts.
+ *)
+
+
+(* Note that all following url_syntaxes do not allow 8bit bytes. *)
+
+val null_url_syntax : url_syntax
+
+val ip_url_syntax : url_syntax
+ (* Maximum syntax for IP based protocols *)
+
+val common_url_syntax : (string, url_syntax) Hashtbl.t
+ (* Syntax descriptions for common URL schemes:
+ *
+ * null_url_syntax: nothing is recognized
+ *
+ * common_url_syntax: Hashtable mapping from URL scheme names to
+ * definitions of syntaxes:
+ *
+ * "file": scheme, host?, path
+ * "ftp": scheme, user?, password?, host, port?, path?, param?
+ * "http": scheme, user?, password?, host, port?, path?, query?
+ * "mailto": scheme, other
+ *
+ * Notes:
+ * (1) These syntax descriptions can be weakened for partial/relative URLs
+ * by changing the required parts to optional parts: See the function
+ * 'partial_url_syntax'.
+ * (2) None of the descriptions allows fragments. These can be enabled by
+ * setting 'url_enable_fragment' to Url_part_allowed. E.g.
+ * { file_url_syntax with url_enable_fragment = Url_part_allowed }
+ *)
+
+val null_url : url
+ (* A URL without any component and 'null_url_syntax'
+ *)
+
+val make_url :
+ ?encoded:bool ->
+ ?scheme:string ->
+ ?user:string ->
+ ?password:string ->
+ ?host:string ->
+ ?port:int ->
+ ?path:string list ->
+ ?param:string list ->
+ ?query:string ->
+ ?fragment:string ->
+ ?other:string ->
+ url_syntax ->
+ url
+ (* Creates a URL from components:
+ *
+ * - The components "scheme" and "host" are simple strings to which the
+ * '%'-encoding is not applicable.
+ * - The component "port" is a simple number. Of course, the '%'-encoding
+ * is not applicable, too.
+ * - The components "user", "password", "query", "fragment", and "other"
+ * are strings which may contains '%'-encoded characters. By default,
+ * you can pass any string for these components, and problematic characters
+ * are automatically encoded. If you set ~encoded:true, the passed
+ * strings must already be encoded, but the function checks whether
+ * the encoding is correct.
+ * Note that for "query" even the characters '?' and '=' are encoded
+ * by default, so you need to set ~encoded:true to pass a reasonable
+ * query string.
+ * - The components "path" and "param" are lists of strings which may
+ * contain '%'-encoded characters. Again, the default is to pass
+ * decoded strings to the function, and the function encodes them
+ * automatically, and by setting ~encoded:true the caller is responsible
+ * for encoding the strings.
+ * path = [] and params = [] mean that no path and no parameters are
+ * specified, respectively.
+ * See below for the respresentation of these components.
+ *
+ * Except of "path", the strings representing the components do not
+ * contain the characters separating the components from each other.
+ * The "path" component includes the '/' at the beginning of the path
+ * (if present).
+ *
+ * The created URL must conform to the 'url_syntax', i.e.
+ * - The URL must only contain components which are recognized by the
+ * syntax
+ * - The URL must contain components which are required by the syntax
+ * - The URL must fulfill the predicate expressed by the 'url_is_valid'
+ * function of the syntax.
+ *
+ * The path of a URL is represented as a list of '/'-separated path
+ * components. i.e.
+ * [ s1; s2; ...; sN ] represents the path
+ * s1 ^ "/" ^ s2 ^ "/" ^ ... ^ "/" ^ sN
+ * As special cases:
+ * [] is the non-existing path
+ * [ "" ] is "/"
+ * [ "";"" ] is illegal
+ *
+ * Except of s1 and sN, the path components must not be empty strings.
+ *
+ * To avoid ambiguities, it is illegal to create URLs with both relative
+ * paths (s1 <> "") and host components.
+ *
+ * Parameters of URLs are components beginning with ';'. The list
+ * of parameters is represented as list of strings where the strings
+ * contain the value following ';'.
+ *)
+
+val modify_url :
+ ?syntax:url_syntax ->
+ ?encoded:bool ->
+ ?scheme:string ->
+ ?user:string ->
+ ?password:string ->
+ ?host:string ->
+ ?port:int ->
+ ?path:string list ->
+ ?param:string list ->
+ ?query:string ->
+ ?fragment:string ->
+ ?other:string ->
+ url ->
+ url
+ (* Modifies the passed components and returns the modified URL.
+ * The modfied URL shares unmodified components with the original
+ * URL.
+ *)
+
+val remove_from_url :
+ ?scheme:bool ->
+ ?user:bool ->
+ ?password:bool ->
+ ?host:bool ->
+ ?port:bool ->
+ ?path:bool ->
+ ?param:bool ->
+ ?query:bool ->
+ ?fragment:bool ->
+ ?other:bool ->
+ url ->
+ url
+ (* Removes the 'true' components from the URL, and returns the modified
+ * URL.
+ * The modfied URL shares unmodified components with the original
+ * URL.
+ *)
+
+val default_url :
+ ?encoded:bool ->
+ ?scheme:string ->
+ ?user:string ->
+ ?password:string ->
+ ?host:string ->
+ ?port:int ->
+ ?path:string list ->
+ ?param:string list ->
+ ?query:string ->
+ ?fragment:string ->
+ ?other:string ->
+ url ->
+ url
+ (* Adds missing components and returns the modified URL.
+ * The modfied URL shares unmodified components with the original
+ * URL.
+ *)
+
+val undefault_url :
+ ?scheme:string ->
+ ?user:string ->
+ ?password:string ->
+ ?host:string ->
+ ?port:int ->
+ ?path:string list ->
+ ?param:string list ->
+ ?query:string ->
+ ?fragment:string ->
+ ?other:string ->
+ url ->
+ url
+ (* Removes components from the URL if they have the passed value, and
+ * returns the modified URL.
+ * Note: The values must always be passed in _encoded_ form!
+ * The modfied URL shares unmodified components with the original
+ * URL.
+ *)
+
+val url_syntax_of_url : url -> url_syntax
+ (* Returns the 'url_syntax' record of a URL. *)
+
+val url_of_string : url_syntax -> string -> url
+ (* Parses the passed string according to the passed url_syntax. *)
+
+val string_of_url : url -> string
+ (* Returns the URL as string *)
+
+val url_provides :
+ ?scheme:bool ->
+ ?user:bool ->
+ ?password:bool ->
+ ?host:bool ->
+ ?port:bool ->
+ ?path:bool ->
+ ?param:bool ->
+ ?query:bool ->
+ ?fragment:bool ->
+ ?other:bool ->
+ url ->
+ bool
+ (* Returns 'true' iff the URL has all of the components passed with
+ * 'true' value.
+ *)
+
+val url_scheme : url -> string
+val url_user : ?encoded:bool -> url -> string
+val url_password : ?encoded:bool -> url -> string
+val url_host : url -> string
+val url_port : url -> int
+val url_path : ?encoded:bool -> url -> string list
+val url_param : ?encoded:bool -> url -> string list
+val url_query : ?encoded:bool -> url -> string
+val url_fragment : ?encoded:bool -> url -> string
+val url_other : ?encoded:bool -> url -> string
+ (* Return components of the URL. The functions return decoded strings
+ * unless ~encoded:true is set.
+ * If the component does not exist, the exception Not_found
+ * is raised.
+ *)
+
+val split_path : string -> string list
+ (* Splits a '/'-separated path into components (e.g. to set up the
+ * ~path argument of make_url).
+ * E.g. split_path "a/b/c" = [ "a"; "b"; "c" ],
+ * split_path "/a/b" = [ ""; "a"; "b" ],
+ * split_path "a/b/" = [ "a"; "b"; "" ]
+ *)
+
+val join_path : string list -> string
+ (* Concatenates the path components (reverse function of split_path).
+ *)
+
+val norm_path : string list -> string list
+ (* Removes "." and ".." from the path if possible. Deletes double slashes.
+ *
+ * EXAMPLES:
+ *
+ * norm_path ["."] = []
+ * means: "." = ""
+ * norm_path ["."; ""] = []
+ * means: "./" = ""
+ * norm_path ["a"; "."] = ["a"; ""]
+ * means: "a/." = "a/"
+ * norm_path ["a"; "b"; "."] = ["a"; "b"; ""]
+ * means: "a/b/." = "a/b/"
+ * norm_path ["a"; "."; "b"; "."] = ["a"; "b"; ""]
+ * means: "a/./b/." = "a/b/"
+ * norm_path [".."] = [".."; ""]
+ * means: ".." = "../"
+ * norm_path [".."; ""] = [".."; ""]
+ * means: "../" = "../"
+ * norm_path ["a"; "b"; ".."; "c" ] = ["a"; "c"]
+ * means: "a/b/../c" = "a/c"
+ * norm_path ["a"; "b"; ".."; "c"; ""] = ["a"; "c"; ""]
+ * means: "a/b/../c/" = "a/c/"
+ * norm_path ["";"";"a";"";"b"] = [""; "a"; "b"]
+ * means: "//a//b" = "/a/b"
+ * norm_path ["a"; "b"; ""; ".."; "c"; ""] = ["a"; "c"; ""]
+ * means: "a/b//../c/" = "a/c/"
+ * norm_path ["a"; ".."] = []
+ * means: "a/.." = ""
+ *)
+
+
+val apply_relative_url : url -> url -> url
+ (* apply_relative_url base rel:
+ * Interprets 'rel' relative to 'base' and returns the new URL. This
+ * function implements RFC 1808.
+ *)
+
+val print_url : url -> unit
+ (* Printer for the toploop. *)
+
+(* ---------------------------------------------------------------------- *)
+
+(* EXAMPLES:
+ *
+ * let http = Hashtbl.find common_url_syntax "http";;
+ * let u = url_of_string http "http://g:pw@host/a/%62/";;
+ * string_of_url u;;
+ * --> "http://g:pw@host/a/%62/"
+ * url_scheme u;;
+ * --> "http"
+ * url_user u;;
+ * --> "g"
+ * url_password u;;
+ * --> "pw"
+ * url_host u;;
+ * --> "host"
+ * url_path u;;
+ * --> [ ""; "a"; "b"; "" ] (* sic! *)
+ * url_path ~encoded:true u;;
+ * --> [ ""; "a"; "%62"; "" ]
+ * let v = make_url
+ * ~path:[ ".."; "c" ]
+ * ~fragment:"near-the-#-character"
+ * { (partial_url_syntax http) with url_enable_fragment = Url_part_allowed };;
+ * string_of_url v;;
+ * --> "../c#near-the-%23-character"
+ * let u' = modify_url ~syntax:(url_syntax_of_url v) u;;
+ * (* u does not permit fragments *)
+ * let w = apply_relative_url u' v;;
+ * string_of_url w;;
+ * --> "http://g:pw@host/c#near-the-%23-character"
+ *)
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/06/26 22:57:49 gerd
+ * Change: The record 'url_syntax' has an additional component
+ * 'url_accepts_8bits'. Setting this option to 'true' causes that
+ * the bytes >= 0x80 are no longer rejected.
+ *
+ * Revision 1.2 2000/06/25 22:55:47 gerd
+ * Doc update.
+ *
+ * Revision 1.1 2000/06/24 20:19:59 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/tests/.cvsignore b/helm/DEVEL/pxp/netstring/tests/.cvsignore
new file mode 100644
index 000000000..c1fcbc4ae
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/tests/.cvsignore
@@ -0,0 +1,7 @@
+*.cmo
+*.cmx
+*.cmi
+
+*.o
+*.a
+
diff --git a/helm/DEVEL/pxp/netstring/tests/Makefile b/helm/DEVEL/pxp/netstring/tests/Makefile
new file mode 100644
index 000000000..1aa5265ba
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/tests/Makefile
@@ -0,0 +1,26 @@
+# Note: you need an appopriate toploop "ocamlfattop" to run the
+# tests.
+
+# 2nd note: "test_encoding.cgi" is a CGI script; you must invoke
+# it through browser and WWW server.
+
+test: test_recode
+ ocamlfattop test_netencoding.ml
+ ocamlfattop test_mimestring.ml
+ ocamlfattop test_cgi.ml
+ ocamlfattop test_neturl.ml
+ ./test_recode
+
+test_recode: test_recode.ml
+ ocamlc -custom -o test_recode unix.cma threads.cma str.cma \
+ ../netstring.cma ../netmappings_iso.cmo \
+ -I .. -thread test_recode.ml
+
+distclean: clean
+ rm -f *~ test_recode
+
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
+
+CLEAN:
+
diff --git a/helm/DEVEL/pxp/netstring/tests/test_cgi.ml b/helm/DEVEL/pxp/netstring/tests/test_cgi.ml
new file mode 100644
index 000000000..43d9886ed
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/tests/test_cgi.ml
@@ -0,0 +1,423 @@
+#require "str";;
+#directory "..";;
+#load "netstring.cma";;
+
+
+open Cgi;;
+
+(**********************************************************************)
+(* dest_form_encoded_parameters *)
+(**********************************************************************)
+
+let t001 f =
+ let r =
+ f
+ "blah blah
+--snip
+Content-Disposition: form-data; name=blupp
+
+This is a text
+--snip--
+blah blah"
+ "snip"
+ in
+ r = ["blupp", "text/plain", "This is a text"]
+;;
+
+
+let t002 f =
+ let r =
+ f
+ "blah blah
+--snip
+Content-Disposition: form-data; name=blupp
+
+This is a text
+--snip--
+blah blah"
+ "snip"
+ in
+ r = ["blupp", "text/plain", "This is a text"]
+;;
+
+
+let t003 f =
+ let r =
+ f
+ "--snip
+Content-Disposition: form-data; name=blupp
+
+This is a text
+--snip--"
+ "snip"
+ in
+ r = ["blupp", "text/plain", "This is a text"]
+;;
+
+
+let t004 f =
+ let r =
+ f
+ "--snip
+Content-Disposition: form-data; name=blupp
+
+This is a text
+
+--snip--"
+ "snip"
+ in
+ r = ["blupp", "text/plain", "This is a text\013\n"]
+;;
+
+
+let t005 f =
+ let r =
+ f
+ "--snip
+Content-Disposition: form-data; name=blupp
+
+This is a text
+
+--snip--"
+ "snip"
+ in
+ r = ["blupp", "text/plain", "This is a text\n"]
+;;
+
+
+let t006 f =
+ let r =
+ f
+ "blah blah
+--snip
+Content-Disposition: form-data;name= \"blupp\"
+
+This is a text
+--snip--
+blah blah"
+ "snip"
+ in
+ r = ["blupp", "text/plain", "This is a text"]
+;;
+
+
+let t007 f =
+ let r =
+ f
+ "blah blah
+--snip
+Content-Disposition: form-data;name= \"name=blupp\"
+
+This is a text
+--snip--
+blah blah"
+ "snip"
+ in
+ r = ["name=blupp", "text/plain", "This is a text"]
+;;
+
+
+let t008 f =
+ let r =
+ f
+ "blah blah
+--snip
+Content-Disposition: form-data; strange=\"name=blop\"; name= \"blupp\"
+
+This is a text
+--snip--
+blah blah"
+ "snip"
+ in
+ r = ["blupp", "text/plain", "This is a text"]
+;;
+
+
+let t009 f =
+ let r =
+ f
+ "blah blah
+--snip
+Content-Disposition: form-data; strange=\" name=blop \"; name=blupp
+
+This is a text
+--snip--
+blah blah"
+ "snip"
+ in
+ r = ["blupp", "text/plain", "This is a text"]
+;;
+
+
+let t010 f =
+ (* There is a space after "octet-stream"! *)
+ let r =
+ f
+ "--snip
+Content-Disposition: form-data; name=blupp
+Content-type: application/octet-stream
+
+This is a text
+--snip--"
+ "snip"
+ in
+ r = ["blupp", "application/octet-stream", "This is a text"]
+;;
+
+
+let t011 f =
+ let r =
+ f
+ "blah blah
+--snip
+Content-Disposition: form-data; name=blupp
+
+This is a text
+--snip
+Content-Disposition: form-data; name=blipp
+
+Another line
+--snip-- blah
+blah blah"
+ "snip"
+ in
+ r = ["blupp", "text/plain", "This is a text";
+ "blipp", "text/plain", "Another line" ]
+;;
+
+
+let t012 f =
+ (* A real example *)
+ let r =
+ f
+"-----------------------------10843891265508332411092264958
+Content-Disposition: form-data; name=\"line\"
+
+aaa
+-----------------------------10843891265508332411092264958
+Content-Disposition: form-data; name=\"submit\"
+
+Submit
+-----------------------------10843891265508332411092264958--
+"
+ "---------------------------10843891265508332411092264958"
+ in
+ r = [ "line", "text/plain", "aaa";
+ "submit", "text/plain", "Submit";
+ ]
+;;
+
+
+(**********************************************************************)
+(* encode/decode *)
+(**********************************************************************)
+
+let t100() =
+ let s = String.create 256 in
+ for i = 0 to 255 do s.[i] <- Char.chr i done;
+ let r = encode s in
+ r = ("%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F" ^
+ "%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F" ^
+ "+!%22%23$%25%26'()*%2B,-.%2F" ^
+ "0123456789%3A%3B%3C%3D%3E%3F" ^
+ "%40ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_" ^
+ "%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D%7E%7F" ^
+ "%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F" ^
+ "%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F" ^
+ "%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF" ^
+ "%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF" ^
+ "%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF" ^
+ "%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF" ^
+ "%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF" ^
+ "%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF")
+;;
+
+
+let t101() =
+ let r = String.create 256 in
+ for i = 0 to 255 do r.[i] <- Char.chr i done;
+ let s = decode
+ ("%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F" ^
+ "%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F" ^
+ "+!%22%23$%25%26'()*%2B,-.%2F" ^
+ "0123456789%3A%3B%3C%3D%3E%3F" ^
+ "%40ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_" ^
+ "%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D%7E%7F" ^
+ "%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F" ^
+ "%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F" ^
+ "%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF" ^
+ "%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF" ^
+ "%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF" ^
+ "%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF" ^
+ "%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF" ^
+ "%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF") in
+ r = s
+;;
+
+
+let t102() =
+ let r = String.create 256 in
+ for i = 0 to 255 do r.[i] <- Char.chr i done;
+ let s = decode
+ ((String.lowercase
+ ("%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F" ^
+ "%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F" ^
+ "+!%22%23$%25%26'()*%2B,-.%2F" ^
+ "0123456789%3A%3B%3C%3D%3E%3F")) ^
+ "%40ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_" ^
+ (String.lowercase
+ ("%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D%7E%7F" ^
+ "%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F" ^
+ "%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F" ^
+ "%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF" ^
+ "%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF" ^
+ "%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF" ^
+ "%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF" ^
+ "%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF" ^
+ "%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF"))) in
+ r = s
+;;
+
+(**********************************************************************)
+(* dest_url_encoded_parameters *)
+(**********************************************************************)
+
+let t200() =
+ let r = dest_url_encoded_parameters "a=b&c=d" in
+ r = ["a", "b"; "c", "d" ]
+;;
+
+
+let t201() =
+ let r = dest_url_encoded_parameters "a=&c=d" in
+ r = ["a", ""; "c", "d" ]
+;;
+
+
+let t202() =
+ let r = dest_url_encoded_parameters "a=&c=" in
+ r = ["a", ""; "c", "" ]
+;;
+
+
+let t203() =
+ let r = dest_url_encoded_parameters "" in
+ r = []
+;;
+
+
+let t204() =
+ let r = dest_url_encoded_parameters "%41=%42" in
+ r = ["A", "B"]
+;;
+
+
+(**********************************************************************)
+
+let test f n =
+ if f() then
+ print_endline ("Test " ^ n ^ " ok")
+ else
+ print_endline ("Test " ^ n ^ " FAILED!!!!");
+ flush stdout
+;;
+
+
+let test_dest_form_encoded_parameters f n =
+ let dest s b =
+ let args = dest_form_encoded_parameters s b default_config in
+ List.map
+ (fun a -> arg_name a, arg_mimetype a, arg_value a)
+ args
+ in
+ if f dest then
+ print_endline ("Test dest_form_encoded_parameters " ^ n ^ " ok")
+ else
+ print_endline ("Test dest_form_encoded_parameters " ^ n ^ " FAILED!!!!");
+ flush stdout
+;;
+
+
+let fill_stream s =
+ (* Returns a channel that reads from string s.
+ * This requires forking.
+ *)
+ let rd, wr = Unix.pipe() in
+ let pid = Unix.fork() in
+ if pid = 0 then begin
+ Unix.close rd;
+ let out = Unix.out_channel_of_descr wr in
+ output_string out s;
+ close_out out;
+ exit(0);
+ end;
+ Unix.close wr;
+ Unix.in_channel_of_descr rd
+;;
+
+
+let test_dest_form_encoded_parameters_from_netstream f n =
+ let dest s b =
+ let fd = fill_stream s in
+ let bs = String.length b * 2 in
+ let stream = Netstream.create_from_channel fd None bs in
+ let args = dest_form_encoded_parameters_from_netstream
+ stream b default_config in
+
+(*
+ List.iter
+ (fun a ->
+ Printf.printf "name=%s mimetype=%s value=%s\n"
+ (arg_name a) (arg_mimetype a) (arg_value a))
+ args;
+*)
+ List.map
+ (fun a -> arg_name a, arg_mimetype a, arg_value a)
+ args
+ in
+ if f dest then
+ Printf.printf
+ "Test dest_form_encoded_parameters_from_netstream %s ok\n"
+ n
+ else
+ print_endline ("Test dest_form_encoded_parameters_from_netstream " ^ n ^ " FAILED!!!!");
+ flush stdout
+;;
+
+
+
+test_dest_form_encoded_parameters t001 "001";;
+test_dest_form_encoded_parameters t002 "002";;
+test_dest_form_encoded_parameters t003 "003";;
+test_dest_form_encoded_parameters t004 "004";;
+test_dest_form_encoded_parameters t005 "005";;
+test_dest_form_encoded_parameters t006 "006";;
+test_dest_form_encoded_parameters t007 "007";;
+test_dest_form_encoded_parameters t008 "008";;
+test_dest_form_encoded_parameters t009 "009";;
+test_dest_form_encoded_parameters t010 "010";;
+test_dest_form_encoded_parameters t011 "011";;
+test_dest_form_encoded_parameters t012 "012";;
+
+test_dest_form_encoded_parameters_from_netstream t001 "001";;
+test_dest_form_encoded_parameters_from_netstream t002 "002";;
+test_dest_form_encoded_parameters_from_netstream t003 "003";;
+test_dest_form_encoded_parameters_from_netstream t004 "004";;
+test_dest_form_encoded_parameters_from_netstream t005 "005";;
+test_dest_form_encoded_parameters_from_netstream t006 "006";;
+test_dest_form_encoded_parameters_from_netstream t007 "007";;
+test_dest_form_encoded_parameters_from_netstream t008 "008";;
+test_dest_form_encoded_parameters_from_netstream t009 "009";;
+test_dest_form_encoded_parameters_from_netstream t010 "010";;
+test_dest_form_encoded_parameters_from_netstream t011 "011";;
+test_dest_form_encoded_parameters_from_netstream t012 "012";;
+
+test t100 "100";;
+test t101 "101";;
+test t102 "102";;
+
+test t200 "200";;
+test t201 "201";;
+test t202 "202";;
+test t203 "203";;
+test t204 "204";;
diff --git a/helm/DEVEL/pxp/netstring/tests/test_encoding.cgi b/helm/DEVEL/pxp/netstring/tests/test_encoding.cgi
new file mode 100755
index 000000000..2402cdadf
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/tests/test_encoding.cgi
@@ -0,0 +1,72 @@
+#! /bin/sh
+# (*
+exec /opt/ocaml-2.04/bin/ocamlfattop "$0" "$@"
+*) directory ".";;
+
+#directory "..";;
+#load "netstring.cma";;
+
+Cgi.header "";
+Cgi.parse_arguments
+ { Cgi.default_config with
+ Cgi.how_to_process_arguments = (fun _ -> Cgi.File)
+ };
+let params = Cgi.arguments() in
+print_string "\n";
+print_string "Parameters: \n";
+print_string "\n";
+List.iter
+ (fun (n,a) ->
+ print_string "";
+ print_string n;
+ print_string ":";
+ print_string (Cgi.arg_mimetype a);
+ print_string "=";
+ (match Cgi.arg_filename a with
+ None -> ()
+ | Some fn -> print_string ("[filename=" ^ fn ^ "]")
+ );
+ print_string (Cgi.arg_value a);
+ print_string " \n";
+
+ )
+ params;
+
+Cgi.cleanup();
+
+print_string " \n";
+
+print_string "GET URL-encoded form \n";
+print_string "\n";
+
+print_string "POST URL-encoded form \n";
+print_string "\n";
+
+print_string "POST FORM-encoded form \n";
+print_string "\n";
+
+print_string "File upload \n";
+print_string "\n";
+
+
+
+print_string "\n";
+
+flush stdout
+;;
+
+
diff --git a/helm/DEVEL/pxp/netstring/tests/test_mimestring.ml b/helm/DEVEL/pxp/netstring/tests/test_mimestring.ml
new file mode 100644
index 000000000..db5eac930
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/tests/test_mimestring.ml
@@ -0,0 +1,589 @@
+#require "str";;
+#directory "..";;
+#load "netstring.cma";;
+
+open Mimestring;;
+
+(**********************************************************************)
+(* scan_structured_value *)
+(**********************************************************************)
+
+let t001() =
+ let r = scan_structured_value "user@domain.com" [ '@'; '.' ] [] in
+ r = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+;;
+
+
+let t002() =
+ let r = scan_structured_value "user @ domain . com" [ '@'; '.' ] [] in
+ r = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+;;
+
+
+let t003() =
+ let r = scan_structured_value "user(Do you know him?)@domain.com" [ '@'; '.' ]
+ [] in
+ r = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+;;
+
+
+let t004() =
+ let r = scan_structured_value "user @ domain . com" [ '@'; '.'; ' ' ] [] in
+ r = [ Atom "user"; Special ' '; Special '@'; Special ' '; Atom "domain";
+ Special ' '; Special '.'; Special ' '; Atom "com" ]
+;;
+
+
+let t005() =
+ let r = scan_structured_value "user(Do you know him?)@domain.com"
+ ['@'; '.'; '('] [] in
+ r = [ Atom "user"; Special '('; Atom "Do"; Atom "you"; Atom "know";
+ Atom "him?)"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+;;
+
+
+let t006() =
+ let r = scan_structured_value "\"My.name\"@domain.com" [ '@'; '.' ] [] in
+ r = [ QString "My.name"; Special '@'; Atom "domain"; Special '.';
+ Atom "com" ]
+;;
+
+
+let t007() =
+ let r = scan_structured_value "\"\\\"()@. \"@domain.com" [ '@'; '.' ] [] in
+ r = [ QString "\"()@. "; Special '@'; Atom "domain"; Special '.';
+ Atom "com" ]
+;;
+
+
+let t008() =
+ let r = scan_structured_value "a(b(c(d)e)f)g" [] [] in
+ r = [ Atom "a"; Atom "g" ]
+;;
+
+
+let t009() =
+ let r = scan_structured_value "a(b(c(d)e)f" [] [] in
+ r = [ Atom "a" ]
+;;
+
+
+let t010() =
+ let r = scan_structured_value "a(b\\(c\\(d\\)e)f" [] [] in
+ r = [ Atom "a"; Atom "f" ]
+;;
+
+
+let t011() =
+ let r = scan_structured_value "a(b(c(d)e)f\\" [] [] in
+ r = [ Atom "a" ]
+;;
+
+
+let t012() =
+ let r = scan_structured_value "\"abc" [] [] in
+ r = [ QString "abc" ]
+;;
+
+
+let t013() =
+ let r = scan_structured_value "\"abc\\" [] [] in
+ r = [ QString "abc\\" ]
+;;
+
+
+(* New tests for netstring-0.9: *)
+
+let t020() =
+ let r = scan_structured_value "user(Do you know him?)@domain.com"
+ [ '@'; '.' ] [ Return_comments ] in
+ r = [ Atom "user"; Comment; Special '@'; Atom "domain"; Special '.';
+ Atom "com" ]
+;;
+
+let t021() =
+ let r = scan_structured_value "user (Do you know him?) @ domain . com"
+ [ '@'; '.'; ' ' ] [] in
+ r = [ Atom "user"; Special ' '; Special ' '; Special ' '; Special '@';
+ Special ' '; Atom "domain";
+ Special ' '; Special '.'; Special ' '; Atom "com" ]
+;;
+
+let t022() =
+ let r = scan_structured_value "user (Do you know him?) @ domain . com"
+ [ '@'; '.'; ' ' ] [ Return_comments ] in
+ r = [ Atom "user"; Special ' '; Comment; Special ' '; Special '@';
+ Special ' '; Atom "domain";
+ Special ' '; Special '.'; Special ' '; Atom "com" ]
+;;
+
+let t023() =
+ let r = scan_structured_value "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?="
+ [] [] in
+ r = [ Atom "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" ]
+;;
+
+let t024() =
+ let r = scan_structured_value "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?="
+ [ ] [ Recognize_encoded_words ] in
+ r = [ EncodedWord("ISO-8859-1", "Q", "Keld_J=F8rn_Simonsen") ]
+;;
+
+let t025() =
+ let r = scan_structured_value
+ "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?="
+ []
+ [ Recognize_encoded_words ] in
+ r = [ EncodedWord
+ ("ISO-8859-1", "B", "SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=");
+ EncodedWord
+ ("ISO-8859-2", "B", "dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==")
+ ]
+;;
+
+(**********************************************************************)
+(* s_extended_token *)
+(**********************************************************************)
+
+let scan specials options str =
+ let scn = create_mime_scanner specials options str in
+ scan_token_list scn;;
+
+let t100() =
+ let r = scan [] [] "Two atoms" in
+ match r with
+ [ a1, Atom "Two"; a2, Atom "atoms" ] ->
+
+ (get_pos a1 = 0) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 0) &&
+ (get_length a1 = 3) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 4) &&
+ (get_line a2 = 1) &&
+ (get_column a2 = 4) &&
+ (get_length a2 = 5) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+
+let t101() =
+ let r = scan [] [] " Two atoms " in
+ match r with
+ [ a1, Atom "Two"; a2, Atom "atoms" ] ->
+
+ (get_pos a1 = 2) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 2) &&
+ (get_length a1 = 3) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 7) &&
+ (get_line a2 = 1) &&
+ (get_column a2 = 7) &&
+ (get_length a2 = 5) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+
+let t102() =
+ let r = scan [] [] " Two\n atoms " in
+ match r with
+ [ a1, Atom "Two"; a2, Atom "atoms" ] ->
+
+ (get_pos a1 = 2) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 2) &&
+ (get_length a1 = 3) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 7) &&
+ (get_line a2 = 2) &&
+ (get_column a2 = 1) &&
+ (get_length a2 = 5) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t110() =
+ let r = scan [] [] "\"Two\" \"qstrings\"" in
+ match r with
+ [ a1, QString "Two"; a2, QString "qstrings" ] ->
+
+ (get_pos a1 = 0) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 0) &&
+ (get_length a1 = 5) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 6) &&
+ (get_line a2 = 1) &&
+ (get_column a2 = 6) &&
+ (get_length a2 = 10) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t111() =
+ let r = scan [] [] " \"Two\" \"qstrings\" " in
+ match r with
+ [ a1, QString "Two"; a2, QString "qstrings" ] ->
+
+ (get_pos a1 = 2) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 2) &&
+ (get_length a1 = 5) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 9) &&
+ (get_line a2 = 1) &&
+ (get_column a2 = 9) &&
+ (get_length a2 = 10) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t112() =
+ let r = scan [] [] " \"Two\nlines\" \"and\nqstrings\" " in
+ match r with
+ [ a1, QString "Two\nlines"; a2, QString "and\nqstrings" ] ->
+
+ (get_pos a1 = 2) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 2) &&
+ (get_length a1 = 11) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 15) &&
+ (get_line a2 = 2) &&
+ (get_column a2 = 8) &&
+ (get_length a2 = 14) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t113() =
+ let r = scan [] [] " \"Two\\\nlines\" \"and\\\nqstrings\" " in
+ match r with
+ [ a1, QString "Two\nlines"; a2, QString "and\nqstrings" ] ->
+
+ (get_pos a1 = 2) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 2) &&
+ (get_length a1 = 12) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 16) &&
+ (get_line a2 = 2) &&
+ (get_column a2 = 8) &&
+ (get_length a2 = 15) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t120() =
+ (* Domain literals are implemented like quoted strings, so only the
+ * most complicated test case.
+ *)
+ let r = scan [] [] " [Two\\\nlines] [and\\\nliterals] " in
+ match r with
+ [ a1, DomainLiteral "Two\nlines"; a2, DomainLiteral "and\nliterals" ] ->
+
+ (get_pos a1 = 2) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 2) &&
+ (get_length a1 = 12) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 16) &&
+ (get_line a2 = 2) &&
+ (get_column a2 = 8) &&
+ (get_length a2 = 15) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t130() =
+ let r = scan [] [ Return_comments ] "(Two) (comments)" in
+ match r with
+ [ a1, Comment; a2, Comment ] ->
+
+ (get_pos a1 = 0) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 0) &&
+ (get_length a1 = 5) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 6) &&
+ (get_line a2 = 1) &&
+ (get_column a2 = 6) &&
+ (get_length a2 = 10) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t131() =
+ let r = scan [] [ Return_comments ] "(Two\nlines) (and\ncomments)" in
+ match r with
+ [ a1, Comment; a2, Comment ] ->
+
+ (get_pos a1 = 0) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 0) &&
+ (get_length a1 = 11) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 12) &&
+ (get_line a2 = 2) &&
+ (get_column a2 = 7) &&
+ (get_length a2 = 14) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t132() =
+ let r = scan [] [ Return_comments ] "(Two\\\nlines) (and\\\ncomments)" in
+ match r with
+ [ a1, Comment; a2, Comment ] ->
+
+ (get_pos a1 = 0) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 0) &&
+ (get_length a1 = 12) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 13) &&
+ (get_line a2 = 2) &&
+ (get_column a2 = 7) &&
+ (get_length a2 = 15) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t133() =
+ let r = scan [] [ Return_comments ] "(a\n(b\nc)d\ne(f)) atom" in
+ match r with
+ [ a1, Comment; a2, Atom "atom" ] ->
+
+ (get_pos a1 = 0) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 0) &&
+ (get_length a1 = 15) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 16) &&
+ (get_line a2 = 4) &&
+ (get_column a2 = 6) &&
+ (get_length a2 = 4) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t140() =
+ let r = scan [] [] "\031\031" in
+ match r with
+ [ a1, Control '\031'; a2, Control '\031' ] ->
+
+ (get_pos a1 = 0) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 0) &&
+ (get_length a1 = 1) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 1) &&
+ (get_line a2 = 1) &&
+ (get_column a2 = 1) &&
+ (get_length a2 = 1) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t150() =
+ let r = scan [ '\t'; '\n' ] [] " \t\n \n \t" in
+ match r with
+ [ a1, Special '\t'; _, Special '\n'; _, Special '\n'; a2, Special '\t'] ->
+
+ (get_pos a1 = 1) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 1) &&
+ (get_length a1 = 1) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 8) &&
+ (get_line a2 = 3) &&
+ (get_column a2 = 2) &&
+ (get_length a2 = 1) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t160() =
+ let r = scan [] [ Recognize_encoded_words ]
+ "=?iso8859-1?q?G=F6rd?= =?iso8859-1?q?G=F6rd?=" in
+ match r with
+ [ a1, EncodedWord("ISO8859-1", "Q", "G=F6rd");
+ a2, EncodedWord("ISO8859-1", "Q", "G=F6rd"); ] ->
+
+ (get_pos a1 = 0) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 0) &&
+ (get_length a1 = 22) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+ (get_decoded_word a1 = "Görd") &&
+ (get_charset a1 = "ISO8859-1") &&
+
+ (get_pos a2 = 23) &&
+ (get_line a2 = 1) &&
+ (get_column a2 = 23) &&
+ (get_length a2 = 22) &&
+ (separates_adjacent_encoded_words a2 = false) &&
+ (get_decoded_word a2 = "Görd") &&
+ (get_charset a2 = "ISO8859-1")
+
+ | _ ->
+ false
+;;
+
+let t161() =
+ let r = scan [ ' ' ] [ Recognize_encoded_words ]
+ "=?iso8859-1?q?G=F6rd?= =?iso8859-1?q?G=F6rd?=" in
+ match r with
+ [ a1, EncodedWord("ISO8859-1", "Q", "G=F6rd");
+ sp, Special ' ';
+ a2, EncodedWord("ISO8859-1", "Q", "G=F6rd"); ] ->
+
+ (get_pos a1 = 0) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 0) &&
+ (get_length a1 = 22) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+ (get_decoded_word a1 = "Görd") &&
+ (get_charset a1 = "ISO8859-1") &&
+
+ (get_pos a2 = 23) &&
+ (get_line a2 = 1) &&
+ (get_column a2 = 23) &&
+ (get_length a2 = 22) &&
+ (separates_adjacent_encoded_words a2 = false) &&
+ (get_decoded_word a2 = "Görd") &&
+ (get_charset a2 = "ISO8859-1") &&
+
+ (separates_adjacent_encoded_words sp = true)
+
+ | _ ->
+ false
+;;
+
+let t162() =
+ let r = scan [ ' ' ] [ Recognize_encoded_words ]
+ "=?iso8859-1?q?G=F6rd?= =?iso8859-1?q?G=F6rd?=" in
+ match r with
+ [ a1, EncodedWord("ISO8859-1", "Q", "G=F6rd");
+ sp1, Special ' ';
+ sp2, Special ' ';
+ a2, EncodedWord("ISO8859-1", "Q", "G=F6rd"); ] ->
+
+ (get_pos a1 = 0) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 0) &&
+ (get_length a1 = 22) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+ (get_decoded_word a1 = "Görd") &&
+ (get_charset a1 = "ISO8859-1") &&
+
+ (get_pos a2 = 24) &&
+ (get_line a2 = 1) &&
+ (get_column a2 = 24) &&
+ (get_length a2 = 22) &&
+ (separates_adjacent_encoded_words a2 = false) &&
+ (get_decoded_word a2 = "Görd") &&
+ (get_charset a2 = "ISO8859-1") &&
+
+ (separates_adjacent_encoded_words sp1 = true) &&
+ (separates_adjacent_encoded_words sp2 = true)
+
+ | _ ->
+ false
+;;
+
+
+
+(**********************************************************************)
+
+let test f n =
+ if f() then
+ print_endline ("Test " ^ n ^ " ok")
+ else
+ print_endline ("Test " ^ n ^ " FAILED!!!!");
+ flush stdout
+;;
+
+test t001 "001";;
+test t002 "002";;
+test t003 "003";;
+test t004 "004";;
+test t005 "005";;
+test t006 "006";;
+test t007 "007";;
+test t008 "008";;
+test t009 "009";;
+test t010 "010";;
+test t011 "011";;
+test t012 "012";;
+test t013 "013";;
+
+test t020 "020";;
+test t021 "021";;
+test t022 "022";;
+test t023 "023";;
+test t024 "024";;
+test t025 "025";;
+
+test t100 "100";;
+test t101 "101";;
+test t102 "102";;
+test t110 "110";;
+test t111 "111";;
+test t112 "112";;
+test t113 "113";;
+test t120 "120";;
+test t130 "130";;
+test t131 "131";;
+test t132 "132";;
+test t133 "133";;
+test t140 "140";;
+test t150 "150";;
+test t160 "160";;
+test t161 "161";;
+test t162 "162";;
diff --git a/helm/DEVEL/pxp/netstring/tests/test_netencoding.ml b/helm/DEVEL/pxp/netstring/tests/test_netencoding.ml
new file mode 100644
index 000000000..29673fa5f
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/tests/test_netencoding.ml
@@ -0,0 +1,223 @@
+#require "str";;
+#directory "..";;
+#load "netstring.cma";;
+
+
+open Netencoding;;
+
+(**********************************************************************)
+(* Base64 *)
+(**********************************************************************)
+
+(* Test strings:
+ * "", "a", "ab", "abc", "abcd", "abcde",
+ * "abcdefghijklmnopqrstuvwxyz".
+ *)
+
+let t001() =
+ (* ENCODE. No line breaks. *)
+ Base64.encode "" = "" &
+ Base64.encode "a" = "YQ==" &
+ Base64.encode "ab" = "YWI=" &
+ Base64.encode "abc" = "YWJj" &
+ Base64.encode "abcd" = "YWJjZA==" &
+ Base64.encode "abcde" = "YWJjZGU=" &
+ Base64.encode "abcdefghijklmnopqrstuvwxyz" =
+ "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo="
+;;
+
+
+let t002() =
+ (* ENCODE. Lines with length of 4, separated by LF *)
+ let abc = "abcdefghijklmnopqrstuvwxyz" in
+ Base64.encode_substring abc 0 0 4 false = "" &
+ Base64.encode_substring abc 0 1 4 false = "YQ==\n" &
+ Base64.encode_substring abc 0 2 4 false = "YWI=\n" &
+ Base64.encode_substring abc 0 3 4 false = "YWJj\n" &
+ Base64.encode_substring abc 0 4 4 false = "YWJj\nZA==\n" &
+ Base64.encode_substring abc 0 5 4 false = "YWJj\nZGU=\n" &
+ Base64.encode_substring abc 0 26 4 false =
+ "YWJj\nZGVm\nZ2hp\namts\nbW5v\ncHFy\nc3R1\ndnd4\neXo=\n"
+;;
+
+
+let t003() =
+ (* ENCODE. Lines with length of 5, separated by LF *)
+ let abc = "abcdefghijklmnopqrstuvwxyz" in
+ Base64.encode_substring abc 0 0 5 false = "" &
+ Base64.encode_substring abc 0 1 5 false = "YQ==\n" &
+ Base64.encode_substring abc 0 2 5 false = "YWI=\n" &
+ Base64.encode_substring abc 0 3 5 false = "YWJj\n" &
+ Base64.encode_substring abc 0 4 5 false = "YWJj\nZA==\n" &
+ Base64.encode_substring abc 0 5 5 false = "YWJj\nZGU=\n" &
+ Base64.encode_substring abc 0 26 5 false =
+ "YWJj\nZGVm\nZ2hp\namts\nbW5v\ncHFy\nc3R1\ndnd4\neXo=\n"
+;;
+
+
+let t004() =
+ (* ENCODE. Lines with length of 7, separated by LF *)
+ let abc = "abcdefghijklmnopqrstuvwxyz" in
+ Base64.encode_substring abc 0 0 7 false = "" &
+ Base64.encode_substring abc 0 1 7 false = "YQ==\n" &
+ Base64.encode_substring abc 0 2 7 false = "YWI=\n" &
+ Base64.encode_substring abc 0 3 7 false = "YWJj\n" &
+ Base64.encode_substring abc 0 4 7 false = "YWJj\nZA==\n" &
+ Base64.encode_substring abc 0 5 7 false = "YWJj\nZGU=\n" &
+ Base64.encode_substring abc 0 26 7 false =
+ "YWJj\nZGVm\nZ2hp\namts\nbW5v\ncHFy\nc3R1\ndnd4\neXo=\n"
+;;
+
+
+let t005() =
+ (* ENCODE. Lines with length of 8, separated by LF *)
+ let abc = "abcdefghijklmnopqrstuvwxyz" in
+ Base64.encode_substring abc 0 0 8 false = "" &
+ Base64.encode_substring abc 0 1 8 false = "YQ==\n" &
+ Base64.encode_substring abc 0 2 8 false = "YWI=\n" &
+ Base64.encode_substring abc 0 3 8 false = "YWJj\n" &
+ Base64.encode_substring abc 0 4 8 false = "YWJjZA==\n" &
+ Base64.encode_substring abc 0 5 8 false = "YWJjZGU=\n" &
+ Base64.encode_substring abc 0 26 8 false =
+ "YWJjZGVm\nZ2hpamts\nbW5vcHFy\nc3R1dnd4\neXo=\n"
+;;
+
+
+let t006() =
+ (* ENCODE. Lines with length of 8, separated by CRLF *)
+ let abc = "abcdefghijklmnopqrstuvwxyz" in
+ Base64.encode_substring abc 0 0 8 true = "" &
+ Base64.encode_substring abc 0 1 8 true = "YQ==\r\n" &
+ Base64.encode_substring abc 0 2 8 true = "YWI=\r\n" &
+ Base64.encode_substring abc 0 3 8 true = "YWJj\r\n" &
+ Base64.encode_substring abc 0 4 8 true = "YWJjZA==\r\n" &
+ Base64.encode_substring abc 0 5 8 true = "YWJjZGU=\r\n" &
+ Base64.encode_substring abc 0 26 8 true =
+ "YWJjZGVm\r\nZ2hpamts\r\nbW5vcHFy\r\nc3R1dnd4\r\neXo=\r\n"
+;;
+
+
+let t020() =
+ (* DECODE. First test without spaces *)
+ Base64.decode_substring "" 0 0 false false = "" &
+ Base64.decode_substring "YQ==" 0 4 false false = "a" &
+ Base64.decode_substring "YWI=" 0 4 false false = "ab" &
+ Base64.decode_substring "YWJj" 0 4 false false = "abc" &
+ Base64.decode_substring "YWJjZA==" 0 8 false false = "abcd" &
+ Base64.decode_substring "YWJjZGU=" 0 8 false false = "abcde" &
+ Base64.decode_substring
+ "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo=" 0 36 false false =
+ "abcdefghijklmnopqrstuvwxyz"
+;;
+
+
+let t021() =
+ (* DECODE. With spaces *)
+ Base64.decode_substring " \r\n\t" 0 4 false true = "" &
+ Base64.decode_substring " Y W J j\n Z G U = " 0 18 false true = "abcde"
+;;
+
+
+let t022() =
+ (* DECODE. With URL characters and spaces *)
+ Base64.decode_substring " Y W J j\n Z G U = " 0 18 true true = "abcde" &
+ Base64.decode_substring " Y W J j\n Z G U . " 0 18 true true = "abcde"
+;;
+
+(**********************************************************************)
+(* Quoted Printable *)
+(**********************************************************************)
+
+let t100() =
+ (* ENCODE. *)
+ QuotedPrintable.encode "a %= 12345 &$[]\"" = "a %=3D 12345 &=24=5B=5D=22" &
+ QuotedPrintable.encode "\000\001\002" = "=00=01=02" &
+ QuotedPrintable.encode "abc\r\ndef\nghi" = "abc\r\ndef\nghi" &
+ QuotedPrintable.encode " abc\r\n def\n ghi" = " abc\r\n def\n ghi" &
+ QuotedPrintable.encode "abc \r\n def\nghi " = "abc=20\r\n def\nghi=20"
+;;
+
+
+let t120() =
+ (* DECODE. *)
+ QuotedPrintable.decode "a %=3D 12345 &=24=5B=5D=22" = "a %= 12345 &$[]\"" &
+ QuotedPrintable.decode "=00=01=02" = "\000\001\002" &
+ QuotedPrintable.decode "abc\r\ndef\nghi" = "abc\r\ndef\nghi" &
+ QuotedPrintable.decode " abc\r\n def\n ghi" = " abc\r\n def\n ghi" &
+ QuotedPrintable.decode "abc=20\r\n def\nghi=20" = "abc \r\n def\nghi " &
+ QuotedPrintable.decode "abc=\r\n def\nghi=20" = "abc def\nghi "
+;;
+
+(**********************************************************************)
+(* Q *)
+(**********************************************************************)
+
+let t200() =
+ (* ENCODE. *)
+ Q.encode "a %= 12345 &$[]\"" = "a=20=25=3D=2012345=20=26=24=5B=5D=22" &
+ Q.encode "\000\001\002\r\n" = "=00=01=02=0D=0A"
+;;
+
+
+let t220() =
+ (* DECODE. *)
+ Q.decode "a=20=25=3D=2012345=20=26=24=5B=5D=22" = "a %= 12345 &$[]\"" &
+ Q.decode "=00=01=02=0D=0A" = "\000\001\002\r\n" &
+ Q.decode "a=20=25=3d=2012345=20=26=24=5b=5d=22" = "a %= 12345 &$[]\""
+;;
+
+(**********************************************************************)
+(* Url *)
+(**********************************************************************)
+
+(* Already tested for Cgi *)
+
+(**********************************************************************)
+(* Html *)
+(**********************************************************************)
+
+let t300() =
+ Html.encode_from_latin1 "<>&\"abcdefäöÜ\160\025'" =
+ "<>&"abcdefäöÜ '"
+;;
+
+
+let t320() =
+ Html.decode_to_latin1
+ "<>&"abcdefäöÜ " =
+ "<>&\"abcdefäöÜ\160\025" &
+ Html.decode_to_latin1 "'" = "'" &
+ Html.decode_to_latin1 "&nonsense;" = "&nonsense;" &
+ Html.decode_to_latin1 "Ā" = "Ā"
+;;
+
+
+(**********************************************************************)
+
+let test f n =
+ if f() then
+ print_endline ("Test " ^ n ^ " ok")
+ else
+ print_endline ("Test " ^ n ^ " FAILED!!!!");
+ flush stdout
+;;
+
+test t001 "001";
+test t002 "002";
+test t003 "003";
+test t004 "004";
+test t005 "005";
+test t006 "006";
+
+test t020 "020";
+test t021 "021";
+test t022 "022";
+
+test t100 "100";
+test t120 "120";
+
+test t200 "200";
+test t220 "220";
+
+test t300 "300";
+test t320 "320";
diff --git a/helm/DEVEL/pxp/netstring/tests/test_neturl.ml b/helm/DEVEL/pxp/netstring/tests/test_neturl.ml
new file mode 100644
index 000000000..633bfda09
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/tests/test_neturl.ml
@@ -0,0 +1,969 @@
+#directory "..";;
+#load "netstring.cma";;
+
+open Neturl;;
+
+
+let expect_malformed_url f =
+ try ignore(f()); false with Malformed_URL -> true;;
+
+let works f =
+ not (expect_malformed_url f)
+;;
+
+(**********************************************************************)
+(* extract_url_scheme *)
+(**********************************************************************)
+
+let t001 () =
+ extract_url_scheme "a:bc" = "a" &&
+ extract_url_scheme "A:bc" = "a" &&
+ extract_url_scheme "a:b:c" = "a" &&
+ extract_url_scheme "a+b-c:d:e" = "a+b-c"
+;;
+
+
+let t002 () =
+ let test s =
+ try ignore(extract_url_scheme s); false with Malformed_URL -> true
+ in
+ test "a" &&
+ test "a/b:c" &&
+ test "%61:b" &&
+ test "a%3ab"
+;;
+
+(**********************************************************************)
+(* url_syntax *)
+(**********************************************************************)
+
+let hashtbl_for_all f h =
+ let b = ref true in
+ Hashtbl.iter
+ (fun k v -> b := !b && f k v)
+ h;
+ !b
+;;
+
+let t010 () =
+ url_syntax_is_valid null_url_syntax &&
+ url_syntax_is_valid ip_url_syntax &&
+ hashtbl_for_all
+ (fun _ syn ->
+ url_syntax_is_valid syn
+ )
+ common_url_syntax
+;;
+
+let t011 () =
+ url_syntax_is_valid (partial_url_syntax null_url_syntax) &&
+ url_syntax_is_valid (partial_url_syntax ip_url_syntax) &&
+ hashtbl_for_all
+ (fun _ syn ->
+ url_syntax_is_valid (partial_url_syntax syn)
+ )
+ common_url_syntax
+;;
+
+let t012 () =
+ let f = fun _ -> true in
+ let syn =
+ { url_enable_scheme = Url_part_not_recognized;
+ url_enable_user = Url_part_required;
+ url_enable_password = Url_part_allowed;
+ url_enable_host = Url_part_required;
+ url_enable_port = Url_part_not_recognized;
+ url_enable_path = Url_part_required;
+ url_enable_param = Url_part_not_recognized;
+ url_enable_query = Url_part_not_recognized;
+ url_enable_fragment = Url_part_required;
+ url_enable_other = Url_part_not_recognized;
+ url_accepts_8bits = false;
+ url_is_valid = f;
+ } in
+ let syn' = partial_url_syntax syn in
+
+ (syn'.url_enable_scheme = Url_part_not_recognized) &&
+ (syn'.url_enable_user = Url_part_allowed) &&
+ (syn'.url_enable_password = Url_part_allowed) &&
+ (syn'.url_enable_host = Url_part_allowed) &&
+ (syn'.url_enable_port = Url_part_not_recognized) &&
+ (syn'.url_enable_path = Url_part_allowed) &&
+ (syn'.url_enable_param = Url_part_not_recognized) &&
+ (syn'.url_enable_query = Url_part_not_recognized) &&
+ (syn'.url_enable_fragment = Url_part_allowed) &&
+ (syn'.url_enable_other = Url_part_not_recognized) &&
+ (syn'.url_is_valid == f) &&
+
+ url_syntax_is_valid syn &&
+ url_syntax_is_valid syn'
+;;
+
+(**********************************************************************)
+(* make_url *)
+(**********************************************************************)
+
+let t020 () =
+ (* Basic functionality: *)
+ let http_syn = Hashtbl.find common_url_syntax "http" in
+
+ let u1 = make_url
+ (* default: not encoded *)
+ ~scheme:"http"
+ ~user:"U"
+ ~password:"%()~$@"
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";"?";""]
+ http_syn in
+
+ url_provides
+ ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true
+ u1 &&
+
+ not
+ (url_provides
+ ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true
+ ~query:true u1) &&
+
+ (url_syntax_of_url u1 == http_syn) &&
+
+ (url_scheme u1 = "http") &&
+ (url_user u1 = "U") &&
+ (url_password u1 = "%()~$@") &&
+ (url_host u1 = "a.b.c") &&
+ (url_port u1 = 81) &&
+ (url_path u1 = ["";"?";""]) &&
+
+ (url_user ~encoded:true u1 = "U") &&
+ (url_password ~encoded:true u1 = "%25()%7E$%40") &&
+ (url_path ~encoded:true u1 = ["";"%3F";""]) &&
+
+ string_of_url u1 = "http://U:%25()%7E$%40@a.b.c:81/%3F/"
+;;
+
+
+let t021 () =
+ (* Basic functionality: *)
+ let http_syn = Hashtbl.find common_url_syntax "http" in
+
+ let u1 = make_url
+ ~encoded:true
+ ~scheme:"http"
+ ~user:"%55"
+ ~password:"%25()%7e$%40"
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";"%3F";""]
+ http_syn in
+
+ url_provides
+ ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true
+ u1 &&
+
+ not
+ (url_provides
+ ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true
+ ~query:true u1) &&
+
+ (url_syntax_of_url u1 == http_syn) &&
+
+ (url_scheme u1 = "http") &&
+ (url_user u1 = "U") &&
+ (url_password u1 = "%()~$@") &&
+ (url_host u1 = "a.b.c") &&
+ (url_port u1 = 81) &&
+ (url_path u1 = ["";"?";""]) &&
+
+ (url_user ~encoded:true u1 = "%55") &&
+ (url_password ~encoded:true u1 = "%25()%7e$%40") &&
+ (url_path ~encoded:true u1 = ["";"%3F";""]) &&
+
+ string_of_url u1 = "http://%55:%25()%7e$%40@a.b.c:81/%3F/"
+;;
+
+
+(* NEGATIVE TESTS *)
+
+let t030 () =
+ (* It is not possible to add a component which is not recognized *)
+ let http_syn = Hashtbl.find common_url_syntax "http" in
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~scheme:"http"
+ ~user:"U"
+ ~password:"%()~$@"
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";"?";""]
+ ~fragment:"abc"
+ http_syn)
+;;
+
+
+let t031 () =
+ (* It is not possible to put malformed '%'-encodings into the URL *)
+ let http_syn = Hashtbl.find common_url_syntax "http" in
+
+ works (* reference *)
+ (fun () ->
+ make_url
+ ~encoded:true
+ ~scheme:"http"
+ ~user:"U"
+ ~password:"XX"
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";"a";""]
+ http_syn) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~encoded:true
+ ~scheme:"http"
+ ~user:"U"
+ ~password:"%XX"
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";"a";""]
+ http_syn) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~encoded:true
+ ~scheme:"http"
+ ~user:"U"
+ ~password:"%X"
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";"a";""]
+ http_syn) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~encoded:true
+ ~scheme:"http"
+ ~user:"U"
+ ~password:"%"
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";"a";""]
+ http_syn)
+;;
+
+let t032 () =
+ (* It is not possible to put unsafe characters into the URL *)
+ let http_syn = Hashtbl.find common_url_syntax "http" in
+
+ let make c =
+ make_url
+ ~encoded:true
+ ~scheme:"http"
+ ~user:"U"
+ ~password:(String.make 1 c)
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";"a";""]
+ http_syn
+ in
+
+ works (fun () -> make 'a') && (* reference *)
+
+ (* List of unsafe characters taken from RFC1738: *)
+ expect_malformed_url (fun () -> make '<') &&
+ expect_malformed_url (fun () -> make '>') &&
+ expect_malformed_url (fun () -> make '"') &&
+ expect_malformed_url (fun () -> make '#') &&
+ (* Note: '#' would be considered as reserved if fragments were enabled *)
+ expect_malformed_url (fun () -> make '%') &&
+ expect_malformed_url (fun () -> make '{') &&
+ expect_malformed_url (fun () -> make '}') &&
+ expect_malformed_url (fun () -> make '|') &&
+ expect_malformed_url (fun () -> make '\\') &&
+ expect_malformed_url (fun () -> make '^') &&
+ expect_malformed_url (fun () -> make '[') &&
+ expect_malformed_url (fun () -> make ']') &&
+ expect_malformed_url (fun () -> make '`') &&
+ expect_malformed_url (fun () -> make '~') &&
+ (* Note: '~' is considered as safe in paths: *)
+ works
+ (fun () ->
+ make_url
+ ~encoded:true
+ ~scheme:"http"
+ ~user:"U"
+ ~password:"a"
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";"~";""]
+ http_syn)
+;;
+
+let t033 () =
+ (* It is not possible to put reserved characters into the URL *)
+ let http_syn = Hashtbl.find common_url_syntax "http" in
+
+ let make_password c =
+ make_url
+ ~encoded:true
+ ~scheme:"http"
+ ~user:"U"
+ ~password:(String.make 1 c)
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";"a";""]
+ http_syn
+ in
+ let make_path c =
+ make_url
+ ~encoded:true
+ ~scheme:"http"
+ ~user:"U"
+ ~password:"a"
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";String.make 1 c;""]
+ http_syn
+ in
+ let make_query c =
+ make_url
+ ~encoded:true
+ ~scheme:"http"
+ ~user:"U"
+ ~password:"a"
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";"a";""]
+ ~query:(String.make 1 c)
+ http_syn
+ in
+
+ (* Note: There is a difference between RFC 1738 and RFC 1808 regarding
+ * which characters are reserved. RFC 1808 defines a fixed set of characters
+ * as reserved while RFC 1738 defines the reserved characters depending
+ * on the scheme.
+ * This implementation of URLs follows RFC 1738 (because of practical
+ * reasons).
+ *)
+
+ works (fun () -> make_password 'a') && (* reference *)
+ works (fun () -> make_path 'a') &&
+ works (fun () -> make_query 'a') &&
+
+ expect_malformed_url (fun () -> make_password ':') &&
+ expect_malformed_url (fun () -> make_password '@') &&
+ expect_malformed_url (fun () -> make_password '/') &&
+ works (fun () -> make_password ';') &&
+ works (fun () -> make_password '?') &&
+ works (fun () -> make_password '=') &&
+ works (fun () -> make_password '&') &&
+
+ (* Note: ';' is allowed in path and query because parameters are not
+ * recognized in HTTP syntax.
+ *)
+
+ expect_malformed_url (fun () -> make_path '/') &&
+ expect_malformed_url (fun () -> make_path '?') &&
+ works (fun () -> make_path ':') &&
+ works (fun () -> make_path '@') &&
+ works (fun () -> make_path ';') &&
+ works (fun () -> make_path '=') &&
+ works (fun () -> make_path '&') &&
+
+ expect_malformed_url (fun () -> make_query '?') &&
+ works (fun () -> make_query '/') &&
+ works (fun () -> make_query ':') &&
+ works (fun () -> make_query '@') &&
+ works (fun () -> make_query ';') &&
+ works (fun () -> make_query '=') &&
+ works (fun () -> make_query '&')
+;;
+
+
+let t034 () =
+ (* It is not possible to create a URL with a password, but without user;
+ * and neither to create a URL with a port, but without host;
+ * and neither to create a URL with a user, but without host
+ *)
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~scheme:"http"
+ ~password:"a"
+ ~host:"a.b.c"
+ ~path:["";"a";""]
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~scheme:"http"
+ ~user:"U"
+ ~path:["";"a";""]
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~scheme:"http"
+ ~port:81
+ ~path:["";"a";""]
+ ip_url_syntax)
+;;
+
+
+let t035 () =
+ (* It is not possible to create a URL with illegal scheme prefix *)
+
+ (* reference: *)
+ works
+ (fun () ->
+ make_url
+ ~scheme:"a"
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~scheme:":"
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~scheme:"a=b"
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~scheme:"a%62b"
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~scheme:"a&b"
+ ip_url_syntax)
+;;
+
+
+let t036 () =
+ (* It is not possible to have a path with double slashes *)
+
+ (* reference: *)
+ works
+ (fun () ->
+ make_url
+ ~path:["";"a";""]
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~path:["";""]
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~path:["a";"";""]
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~path:["";"";"a"]
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~path:["a";"";"a"]
+ ip_url_syntax)
+;;
+
+
+let t037 () =
+ (* It is not possible to have port numbers outside 0..65535 *)
+
+ (* reference: *)
+ works
+ (fun () ->
+ make_url
+ ~host:"a"
+ ~port:1
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~host:"a"
+ ~port:(-1)
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~host:"a"
+ ~port:65536
+ ip_url_syntax)
+;;
+
+
+let t038 () =
+ (* Several cases which are not allowed. *)
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~host:"a"
+ ~path:["a"]
+ ip_url_syntax
+ ) && (* illegal: host + relative path *)
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~host:"a"
+ ~path:[]
+ ~param:["x"]
+ ip_url_syntax
+ ) && (* illegal: host + no path + params *)
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~host:"a"
+ ~path:[]
+ ~query:"x"
+ ip_url_syntax
+ ) (* illegal: host + no path + query *)
+;;
+
+(**********************************************************************)
+(* url_of_string *)
+(**********************************************************************)
+
+let t050 () =
+ (* absolute URLs with ip_url_syntax *)
+ let identical s =
+ string_of_url (url_of_string ip_url_syntax s) = s in
+
+ let fails s =
+ try ignore(url_of_string ip_url_syntax s); false
+ with Malformed_URL -> true
+ in
+
+ identical "http:" &&
+
+ identical "http://host" &&
+ identical "http://user@host" &&
+ identical "http://user:password@host" &&
+ identical "http://user@host:99" &&
+ identical "http://user:password@host:99" &&
+
+ identical "http://host/" &&
+ identical "http://user@host/" &&
+ identical "http://user:password@host/" &&
+ identical "http://user@host:99/" &&
+ identical "http://user:password@host:99/" &&
+
+ identical "http://host/a/b" &&
+ identical "http://user@host/a/b" &&
+ identical "http://user:password@host/a/b" &&
+ identical "http://user@host:99/a/b" &&
+ identical "http://user:password@host:99/a/b" &&
+
+ identical "http://host/a/b/" &&
+ identical "http://user@host/a/b/" &&
+ identical "http://user:password@host/a/b/" &&
+ identical "http://user@host:99/a/b/" &&
+ identical "http://user:password@host:99/a/b/" &&
+
+ identical "http://host/?a=b&c=d" &&
+ identical "http://user@host/?a=b&c=d" &&
+ identical "http://user:password@host/?a=b&c=d" &&
+ identical "http://user@host:99/?a=b&c=d" &&
+ identical "http://user:password@host:99/?a=b&c=d" &&
+
+ fails "http://host?a=b&c=d" &&
+ fails "http://user@host?a=b&c=d" &&
+ fails "http://user:password@host?a=b&c=d" &&
+ fails "http://user@host:99?a=b&c=d" &&
+ fails "http://user:password@host:99?a=b&c=d" &&
+
+ identical "http://host/?a=/&c=/" &&
+ identical "http://user@host/?a=/&c=/" &&
+ identical "http://user:password@host/?a=/&c=/" &&
+ identical "http://user@host:99/?a=/&c=/" &&
+ identical "http://user:password@host:99/?a=/&c=/" &&
+
+ identical "http://host/;a;b" &&
+ identical "http://user@host/;a;b" &&
+ identical "http://user:password@host/;a;b" &&
+ identical "http://user@host:99/;a;b" &&
+ identical "http://user:password@host:99/;a;b" &&
+
+ fails "http://host;a;b" &&
+ fails "http://user@host;a;b" &&
+ fails "http://user:password@host;a;b" &&
+ fails "http://user@host:99;a;b" &&
+ fails "http://user:password@host:99;a;b" &&
+
+ identical "http://host/;a;b?a=b&c=d" &&
+ identical "http://user@host/;a;b?a=b&c=d" &&
+ identical "http://user:password@host/;a;b?a=b&c=d" &&
+ identical "http://user@host:99/;a;b?a=b&c=d" &&
+ identical "http://user:password@host:99/;a;b?a=b&c=d" &&
+
+ identical "http:#f" &&
+
+ identical "http://host#f" &&
+ identical "http://user@host#f" &&
+ identical "http://user:password@host#f" &&
+ identical "http://user@host:99#f" &&
+ identical "http://user:password@host:99#f" &&
+
+ identical "http://host/;a;b?a=b&c=d#f" &&
+ identical "http://user@host/;a;b?a=b&c=d#f" &&
+ identical "http://user:password@host/;a;b?a=b&c=d#f" &&
+ identical "http://user@host:99/;a;b?a=b&c=d#f" &&
+ identical "http://user:password@host:99/;a;b?a=b&c=d#f" &&
+
+ true
+;;
+
+
+let t051 () =
+ (* relative URLs with ip_url_syntax *)
+ let identical s =
+ string_of_url (url_of_string ip_url_syntax s) = s in
+
+ let fails s =
+ try ignore(url_of_string ip_url_syntax s); false
+ with Malformed_URL -> true
+ in
+
+ identical "//host" &&
+ identical "//user@host" &&
+ identical "//user:password@host" &&
+ identical "//user@host:99" &&
+ identical "//user:password@host:99" &&
+
+ identical "//host/" &&
+ identical "//user@host/" &&
+ identical "//user:password@host/" &&
+ identical "//user@host:99/" &&
+ identical "//user:password@host:99/" &&
+
+ identical "//host#f" &&
+ identical "//user@host#f" &&
+ identical "//user:password@host#f" &&
+ identical "//user@host:99#f" &&
+ identical "//user:password@host:99#f" &&
+
+ identical "/" &&
+ identical "/a" &&
+ identical "/a/" &&
+ identical "/a/a" &&
+
+ identical "/;a;b" &&
+ identical "/a;a;b" &&
+ identical "/a/;a;b" &&
+ identical "/a/a;a;b" &&
+
+ identical "/?a=b&c=d" &&
+ identical "/a?a=b&c=d" &&
+ identical "/a/?a=b&c=d" &&
+ identical "/a/a?a=b&c=d" &&
+
+ identical "/;a;b?a=b&c=d" &&
+ identical "/a;a;b?a=b&c=d" &&
+ identical "/a/;a;b?a=b&c=d" &&
+ identical "/a/a;a;b?a=b&c=d" &&
+
+ identical "/#f" &&
+ identical "/a#f" &&
+ identical "/a/#f" &&
+ identical "/a/a#f" &&
+
+ identical "/;a;b#f" &&
+ identical "/a;a;b#f" &&
+ identical "/a/;a;b#f" &&
+ identical "/a/a;a;b#f" &&
+
+ identical "/;a;b?a=b&c=d#f" &&
+ identical "/a;a;b?a=b&c=d#f" &&
+ identical "/a/;a;b?a=b&c=d#f" &&
+ identical "/a/a;a;b?a=b&c=d#f" &&
+
+ identical "" &&
+ identical "a" &&
+ identical "a/" &&
+ identical "a/a" &&
+
+ identical ";a;b" &&
+ identical "a;a;b" &&
+ identical "a/;a;b" &&
+ identical "a/a;a;b" &&
+
+ identical "?a=b&c=d" &&
+ identical "a?a=b&c=d" &&
+ identical "a/?a=b&c=d" &&
+ identical "a/a?a=b&c=d" &&
+
+ identical ";a;b?a=b&c=d" &&
+ identical "a;a;b?a=b&c=d" &&
+ identical "a/;a;b?a=b&c=d" &&
+ identical "a/a;a;b?a=b&c=d" &&
+
+ identical "#f" &&
+ identical "a#f" &&
+ identical "a/#f" &&
+ identical "a/a#f" &&
+
+ identical ";a;b#f" &&
+ identical "a;a;b#f" &&
+ identical "a/;a;b#f" &&
+ identical "a/a;a;b#f" &&
+
+ identical ";a;b?a=b&c=d#f" &&
+ identical "a;a;b?a=b&c=d#f" &&
+ identical "a/;a;b?a=b&c=d#f" &&
+ identical "a/a;a;b?a=b&c=d#f" &&
+
+ identical "." &&
+ identical "./" &&
+ identical "./a" &&
+
+ identical ".;a;b" &&
+ identical "./;a;b" &&
+ identical "./a;a;b" &&
+
+ identical ".?a=b&c=d" &&
+ identical "./?a=b&c=d" &&
+ identical "./a?a=b&c=d" &&
+
+ identical ".;a;b?a=b&c=d" &&
+ identical "./;a;b?a=b&c=d" &&
+ identical "./a;a;b?a=b&c=d" &&
+
+ identical ".#f" &&
+ identical "./#f" &&
+ identical "./a#f" &&
+
+ identical ".;a;b#f" &&
+ identical "./;a;b#f" &&
+ identical "./a;a;b#f" &&
+
+ identical ".;a;b?a=b&c=d#f" &&
+ identical "./;a;b?a=b&c=d#f" &&
+ identical "./a;a;b?a=b&c=d#f" &&
+
+ identical ".." &&
+ identical "../" &&
+ identical "../a" &&
+
+ identical "..;a;b" &&
+ identical "../;a;b" &&
+ identical "../a;a;b" &&
+
+ identical "..?a=b&c=d" &&
+ identical "../?a=b&c=d" &&
+ identical "../a?a=b&c=d" &&
+
+ identical "..;a;b?a=b&c=d" &&
+ identical "../;a;b?a=b&c=d" &&
+ identical "../a;a;b?a=b&c=d" &&
+
+ identical "..#f" &&
+ identical "../#f" &&
+ identical "../a#f" &&
+
+ identical "..;a;b#f" &&
+ identical "../;a;b#f" &&
+ identical "../a;a;b#f" &&
+
+ identical "..;a;b?a=b&c=d#f" &&
+ identical "../;a;b?a=b&c=d#f" &&
+ identical "../a;a;b?a=b&c=d#f" &&
+
+ string_of_url
+ (make_url ~path:["a:b"] ip_url_syntax) = "a%3Ab" &&
+
+ string_of_url
+ (make_url ~encoded:true ~path:["a:b"] ip_url_syntax) = "./a:b" &&
+
+ true
+;;
+
+
+let t052 () =
+ (* mailto: URLs *)
+ let mailto_syn = Hashtbl.find common_url_syntax "mailto" in
+
+ let identical s =
+ string_of_url (url_of_string mailto_syn s) = s in
+
+ let fails s =
+ try ignore(url_of_string mailto_syn s); false
+ with Malformed_URL -> true
+ in
+
+ identical "mailto:user@host" &&
+ identical "mailto:user@host;?;?" &&
+ fails "mailto:user@host#f"
+;;
+
+(**********************************************************************)
+(* split_path/join_path/norm_path: *)
+(**********************************************************************)
+
+let t060 () =
+ (split_path "" = []) &&
+ (split_path "/" = [ "" ]) &&
+ (split_path "/a" = [ ""; "a" ]) &&
+ (split_path "a" = [ "a" ]) &&
+ (split_path "a/" = [ "a"; "" ]) &&
+ (split_path "/a/" = [ ""; "a"; "" ]) &&
+ (split_path "/a/b" = [ ""; "a"; "b" ]) &&
+ (split_path "/a/b/" = [ ""; "a"; "b"; "" ]) &&
+ (split_path "/a/b/c" = [ ""; "a"; "b"; "c" ]) &&
+
+ (join_path [] = "") &&
+ (join_path [ "" ] = "/") &&
+ (join_path [ ""; "a" ] = "/a") &&
+ (join_path [ "a" ] = "a") &&
+ (join_path [ "a"; "" ] = "a/") &&
+ (join_path [ ""; "a"; "" ] = "/a/") &&
+ (join_path [ ""; "a"; "b" ] = "/a/b") &&
+ (join_path [ ""; "a"; "b"; "" ] = "/a/b/") &&
+ (join_path [ ""; "a"; "b"; "c" ] = "/a/b/c") &&
+
+ true
+;;
+
+
+let t061 () =
+ (norm_path ["."] = []) &&
+ (norm_path ["."; ""] = []) &&
+ (norm_path ["a"; "."] = ["a"; ""]) &&
+ (norm_path ["a"; "b"; "."] = ["a"; "b"; ""]) &&
+ (norm_path ["a"; "b"; ".."] = ["a"; ""]) &&
+ (norm_path ["a"; "."; "b"; "."] = ["a"; "b"; ""]) &&
+ (norm_path [".."] = [".."; ""]) &&
+ (norm_path [".."; ""] = [".."; ""]) &&
+ (norm_path ["a"; "b"; ".."; "c" ] = ["a"; "c"]) &&
+ (norm_path ["a"; "b"; ".."; "c"; ""] = ["a"; "c"; ""]) &&
+ (norm_path ["";"";"a";"";"b"] = [""; "a"; "b"]) &&
+ (norm_path ["a"; "b"; ""; ".."; "c"; ""] = ["a"; "c"; ""]) &&
+ (norm_path ["a"; ".."] = []) &&
+ (norm_path ["";""] = [""]) &&
+ (norm_path [""] = [""]) &&
+ (norm_path [] = []) &&
+
+ true
+;;
+
+(**********************************************************************)
+(* apply_relative_url: *)
+(**********************************************************************)
+
+let t070() =
+ (* Examples taken from RFC 1808 *)
+ let url = url_of_string ip_url_syntax in
+ let base = url "http://a/b/c/d;p?q#f" in
+ let aru = apply_relative_url base in
+
+ (aru (url "g:h") = url "g:h") &&
+ (aru (url "g") = url "http://a/b/c/g") &&
+ (aru (url "./g") = url "http://a/b/c/g") &&
+ (aru (url "g/") = url "http://a/b/c/g/") &&
+ (aru (url "/g") = url "http://a/g") &&
+ (aru (url "//g") = url "http://g") &&
+ (aru (url "?y") = url "http://a/b/c/d;p?y") &&
+ (aru (url "g?y") = url "http://a/b/c/g?y") &&
+ (aru (url "g?y/./x") = url "http://a/b/c/g?y/./x") &&
+ (aru (url "#s") = url "http://a/b/c/d;p?q#s") &&
+ (aru (url "g#s") = url "http://a/b/c/g#s") &&
+ (aru (url "g#s/./x") = url "http://a/b/c/g#s/./x") &&
+ (aru (url "g?y#s") = url "http://a/b/c/g?y#s") &&
+ (aru (url ";x") = url "http://a/b/c/d;x") &&
+ (aru (url "g;x") = url "http://a/b/c/g;x") &&
+ (aru (url "g;x?y#s") = url "http://a/b/c/g;x?y#s") &&
+ (aru (url ".") = url "http://a/b/c/") &&
+ (aru (url "./") = url "http://a/b/c/") &&
+ (aru (url "..") = url "http://a/b/") &&
+ (aru (url "../") = url "http://a/b/") &&
+ (aru (url "../g") = url "http://a/b/g") &&
+ (aru (url "../..") = url "http://a/") &&
+ (aru (url "../../") = url "http://a/") &&
+ (aru (url "../../g") = url "http://a/g") &&
+
+ (aru (url "") = url "http://a/b/c/d;p?q#f") &&
+ (aru (url "../../../g") = url "http://a/../g") &&
+ (aru (url "../../../../g") = url "http://a/../../g") &&
+ (aru (url "/./g") = url "http://a/./g") &&
+ (aru (url "/../g") = url "http://a/../g") &&
+ (aru (url "g.") = url "http://a/b/c/g.") &&
+ (aru (url ".g") = url "http://a/b/c/.g") &&
+ (aru (url "g..") = url "http://a/b/c/g..") &&
+ (aru (url "..g") = url "http://a/b/c/..g") &&
+ (aru (url "./../g") = url "http://a/b/g") &&
+ (aru (url "./g/.") = url "http://a/b/c/g/") &&
+ (aru (url "g/./h") = url "http://a/b/c/g/h") &&
+ (aru (url "g/../h") = url "http://a/b/c/h") &&
+ (aru (url "http:g") = url "http:g") &&
+ (aru (url "http:") = url "http:") &&
+
+ true
+;;
+
+
+(**********************************************************************)
+
+let test f n =
+ if f() then
+ print_endline ("Test " ^ n ^ " ok")
+ else
+ print_endline ("Test " ^ n ^ " FAILED!!!!");
+ flush stdout
+;;
+
+test t001 "001";
+test t002 "002";
+
+test t010 "010";
+test t011 "011";
+test t012 "012";
+
+test t020 "020";
+test t021 "021";
+
+test t030 "030";
+test t031 "031";
+test t032 "032";
+test t033 "033";
+test t034 "034";
+test t035 "035";
+test t036 "036";
+test t037 "037";
+test t038 "038";
+
+test t050 "050";
+test t051 "051";
+test t052 "052";
+
+test t060 "060";
+test t061 "061";
+
+test t070 "070";
+()
+;;
diff --git a/helm/DEVEL/pxp/netstring/tests/test_recode.ml b/helm/DEVEL/pxp/netstring/tests/test_recode.ml
new file mode 100644
index 000000000..64a04caae
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/tests/test_recode.ml
@@ -0,0 +1,169 @@
+
+
+let make_iso enc =
+ let s = ref "" in
+ for i = 0 to 255 do
+ let u = try Netconversion.makechar (enc :> Netconversion.encoding) i
+ with Not_found -> "" in
+ s := !s ^ u
+ done;
+ !s
+;;
+
+let make_ucs2 start stop =
+ let s = String.create ((stop - start) * 2) in
+ for i = 0 to stop-start-1 do
+ let k = 2 * i in
+ let c = i + start in
+ s.[k] <- Char.chr(c lsr 8);
+ s.[k+1] <- Char.chr(c land 0xff);
+ done;
+ s
+;;
+
+let make_ucs4 start stop =
+ let s = String.create ((stop - start) * 4) in
+ for i = 0 to stop-start-1 do
+ let k = 4 * i in
+ let c = i + start in
+ s.[k] <- Char.chr(c lsr 24);
+ s.[k+1] <- Char.chr((c lsr 16) land 0xff);
+ s.[k+2] <- Char.chr((c lsr 8) land 0xff);
+ s.[k+3] <- Char.chr(c land 0xff);
+ done;
+ s
+;;
+
+let name_of_encoding enc =
+ match enc with
+ `Enc_iso88591 -> "ISO_8859-1"
+ | `Enc_iso88592 -> "ISO_8859-2"
+ | `Enc_iso88593 -> "ISO_8859-3"
+ | `Enc_iso88594 -> "ISO_8859-4"
+ | `Enc_iso88595 -> "ISO_8859-5"
+ | `Enc_iso88596 -> "ISO_8859-6"
+ | `Enc_iso88597 -> "ISO_8859-7"
+ | `Enc_iso88598 -> "ISO_8859-8"
+ | `Enc_iso88599 -> "ISO_8859-9"
+ | `Enc_iso885910 -> "ISO_8859-10"
+ | `Enc_iso885913 -> "ISO_8859-13"
+ | `Enc_iso885914 -> "ISO_8859-14"
+ | `Enc_iso885915 -> "ISO_8859-15"
+ | `Enc_utf8 -> "UTF-8"
+ | `Enc_ucs4 -> "UCS-4"
+ | `Enc_ucs2 -> "UCS-2"
+ | `Enc_utf16 -> "UTF-16"
+
+ (* Note: GNU-iconv assumes big endian byte order *)
+;;
+
+let iconv_recode_string in_enc out_enc in_s =
+ let in_enc_name = name_of_encoding in_enc in
+ let out_enc_name = name_of_encoding out_enc in
+ let out_s = ref "" in
+
+ let out_ch,in_ch = Unix.open_process ("iconv -f " ^ in_enc_name ^ " -t " ^
+ out_enc_name) in
+ (* Write in_s to in_ch in a new thread: *)
+ ignore
+ (Thread.create
+ (fun () ->
+ output_string in_ch in_s;
+ close_out in_ch;
+ )
+ ()
+ );
+ (* Read the result in the current thread: *)
+ let buf = String.create 1024 in
+ let n = ref 1 in
+ while !n <> 0 do
+ let n' = input out_ch buf 0 1024 in
+ out_s := !out_s ^ String.sub buf 0 n';
+ n := n'
+ done;
+ ignore(Unix.close_process (out_ch,in_ch));
+ !out_s
+;;
+
+let test_iso_and_utf8 enc =
+ let name = name_of_encoding enc in
+ print_string ("Recode: " ^ name ^ " and UTF-8... "); flush stdout;
+ let s = make_iso enc in
+ let s1' = Netconversion.recode_string (enc :> Netconversion.encoding)
+ `Enc_utf8 s in
+ let s2' = iconv_recode_string enc `Enc_utf8 s in
+ assert(s1' = s2');
+ let s1 = Netconversion.recode_string `Enc_utf8
+ (enc :> Netconversion.encoding) s1' in
+ let s2 = iconv_recode_string `Enc_utf8 enc s1' in
+ assert(s1 = s2 && s1 = s);
+ print_endline "OK"; flush stdout
+;;
+
+let test_utf16_and_utf8_0000_d7ff () =
+ print_string "Recode: UTF-16-BE and UTF-8, #0000-#D7FF... ";
+ flush stdout;
+ let s = make_ucs2 0 0xd800 in
+ let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in
+ let s2' = iconv_recode_string `Enc_utf16 `Enc_utf8 s in
+ assert(s1' = s2');
+ let s1 = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in
+ let s2 = iconv_recode_string `Enc_utf8 `Enc_utf16 s1' in
+ assert(s1 = s2 && s1 = s);
+ print_endline "OK"; flush stdout
+;;
+
+let test_utf16_and_utf8_e000_fffd () =
+ print_string "Recode: UTF-16-BE and UTF-8, #E000-#FFFD... ";
+ flush stdout;
+ let s = make_ucs2 0xe000 0xfffe in
+ let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in
+ let s2' = iconv_recode_string `Enc_utf16 `Enc_utf8 s in
+ assert(s1' = s2');
+ let s1 = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in
+ let s2 = iconv_recode_string `Enc_utf8 `Enc_utf16 s1' in
+ assert(s1 = s2 && s1 = s);
+ print_endline "OK"; flush stdout
+;;
+
+let test_utf16_and_utf8_10000_10FFFF () =
+ print_string "Recode: UTF-16-BE and UTF-8, #10000-#10FFFF... ";
+ flush stdout;
+ for i = 1 to 16 do
+ let s0 = make_ucs4 (i * 0x10000) (i * 0x10000 + 0x10000) in
+ let s = iconv_recode_string `Enc_ucs4 `Enc_utf16 s0 in
+ let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in
+ let s2' = iconv_recode_string `Enc_utf16 `Enc_utf8 s in
+ assert(s1' = s2');
+ let s1 = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in
+ let s2 = iconv_recode_string `Enc_utf8 `Enc_utf16 s1' in
+ assert(s1 = s2 && s1 = s);
+ print_string "+"; flush stdout;
+ done;
+ print_endline "OK"; flush stdout
+;;
+
+
+print_endline "Warning: You need the command 'iconv' to run this test!";
+flush stdout;
+test_iso_and_utf8 `Enc_iso88591;
+test_iso_and_utf8 `Enc_iso88592;
+test_iso_and_utf8 `Enc_iso88593;
+test_iso_and_utf8 `Enc_iso88594;
+test_iso_and_utf8 `Enc_iso88595;
+test_iso_and_utf8 `Enc_iso88596;
+test_iso_and_utf8 `Enc_iso88597;
+(* test_iso_and_utf8 `Enc_iso88598; *)
+test_iso_and_utf8 `Enc_iso88599;
+test_iso_and_utf8 `Enc_iso885910;
+(* test_iso_and_utf8 `Enc_iso885913; *)
+(* test_iso_and_utf8 `Enc_iso885914; *)
+(* test_iso_and_utf8 `Enc_iso885915; *)
+test_utf16_and_utf8_0000_d7ff();
+test_utf16_and_utf8_e000_fffd();
+(* This test does not work because iconv does not support the surrogate
+ * representation of UTF-16:
+ * test_utf16_and_utf8_10000_10FFFF();
+ *)
+()
+;;
diff --git a/helm/DEVEL/pxp/netstring/tools/Makefile b/helm/DEVEL/pxp/netstring/tools/Makefile
new file mode 100644
index 000000000..b3c148db7
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/tools/Makefile
@@ -0,0 +1,10 @@
+all:
+ $(MAKE) -C unimap_to_ocaml
+
+clean:
+
+CLEAN: clean
+ $(MAKE) -C unimap_to_ocaml CLEAN
+
+distclean: clean
+ $(MAKE) -C unimap_to_ocaml distclean
diff --git a/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/.cvsignore b/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/.cvsignore
new file mode 100644
index 000000000..c1fcbc4ae
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/.cvsignore
@@ -0,0 +1,7 @@
+*.cmo
+*.cmx
+*.cmi
+
+*.o
+*.a
+
diff --git a/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/Makefile b/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/Makefile
new file mode 100644
index 000000000..ed4277389
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/Makefile
@@ -0,0 +1,15 @@
+all: unimap_to_ocaml
+
+unimap_to_ocaml: unimap_to_ocaml.ml
+ ocamlfind ocamlc -g -package str -linkpkg -custom \
+ -o unimap_to_ocaml \
+ unimap_to_ocaml.ml
+
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
+
+CLEAN: clean
+
+distclean: clean
+ rm -f *~ unimap_to_ocaml
+
diff --git a/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/unimap_to_ocaml.ml b/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/unimap_to_ocaml.ml
new file mode 100644
index 000000000..14a89e9d9
--- /dev/null
+++ b/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/unimap_to_ocaml.ml
@@ -0,0 +1,201 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Printf;;
+
+let comment_re = Str.regexp "#.*$";;
+let space_re = Str.regexp "[ \t\r\n]+";;
+
+let read_unimap_format_a fname f =
+ (* Reads a Unicode mapping in format A from a "local" code to Unicode.
+ * Returns a list of pairs (localcode, unicode).
+ *)
+
+ let read_unimap_line() =
+ let s = input_line f in (* may raise End_of_file *)
+ let s' = Str.global_replace comment_re "" s in
+ let words = Str.split space_re s' in
+ match words with
+ [] -> raise Not_found
+ | [ localcode; unicode ] ->
+ int_of_string localcode, int_of_string unicode
+ | _ ->
+ failwith ("File " ^ fname ^ ": Do not know what to do with:\n" ^ s')
+ in
+
+ let rec read_following_lines() =
+ try
+ let localcode, unicode = read_unimap_line() in
+ (* may raise End_of_file, Not_found *)
+ (localcode, unicode) :: read_following_lines()
+ with
+ Not_found -> read_following_lines()
+ | End_of_file -> []
+ in
+
+ read_following_lines()
+;;
+
+
+type from_uni_list =
+ U_nil
+ | U_single of (int * int)
+ | U_list of (int * int) list
+
+type from_unicode =
+ from_uni_list array;;
+ (* A hashtable with fixed size (256). A pair (unicode, localcode) is
+ * stored at the position unicode mod 256 in the array.
+ *)
+
+
+let make_bijection unimap =
+ (* unimap: a list of pairs (localcode, unicode)
+ * returns a pair of arrays (m_to_unicode, m_from_unicode) with:
+ * - m_to_unicode.(localcode) = Some unicode,
+ * if the pair (localcode, unicode) exists
+ * m_to_unicode.(x) = None otherwise
+ * - m_from_unicode.(unicode lsr 8) = [ ...; (unicode,localcode); ... ]
+ *)
+
+ let m_to_unicode = Array.create 256 None in
+ let m_from_unicode = Array.create 256 [] in
+
+ List.iter
+ (fun (localcode, unicode) ->
+ assert(localcode < 256);
+
+ (* Update m_to_unicode: *)
+ if m_to_unicode.(localcode) <> None then
+ failwith ("Local code point " ^ string_of_int localcode ^
+ " mapped twice");
+ m_to_unicode.(localcode) <- Some unicode;
+
+ (* Update m_from_unicode: *)
+ let unilow = unicode land 255 in
+ if List.mem_assoc unicode (m_from_unicode.(unilow)) then
+ failwith ("Unicode code point " ^ string_of_int unicode ^
+ " mapped twice");
+ m_from_unicode.(unilow) <-
+ m_from_unicode.(unilow) @ [unicode,localcode];
+ )
+ unimap;
+
+ m_to_unicode, m_from_unicode
+;;
+
+
+let to_unimap_as_string to_unimap =
+ let make_repr x =
+ match x with
+ None -> -1
+ | Some u -> u
+ in
+ Marshal.to_string (Array.map make_repr to_unimap) [ Marshal.No_sharing ]
+;;
+
+
+let from_unimap_as_string from_unimap =
+ let make_repr l =
+ match l with
+ [] -> U_nil
+ | [u,l] -> U_single(u,l)
+ | _ -> U_list l
+ in
+ let m = Array.map make_repr from_unimap in
+ Marshal.to_string m [ Marshal.No_sharing ]
+;;
+
+
+let print_bijection f name m_to_unicode m_from_unicode =
+ (* Prints on file f this O'Caml code:
+ * let _to_unicode = ...
+ * let _from_unicode = ...
+ *)
+ fprintf f "let %s_to_unicode = lazy (Marshal.from_string \"%s\" 0 : int array);;\n"
+ name
+ (String.escaped (to_unimap_as_string m_to_unicode));
+
+ fprintf f "let %s_from_unicode = lazy (Marshal.from_string \"%s\" 0 : Netmappings.from_uni_list array);;\n "
+ name
+ (String.escaped (from_unimap_as_string m_from_unicode));
+;;
+
+
+let main() =
+ let files = ref [] in
+ let outch = ref (lazy stdout) in
+ Arg.parse
+ [ "-o", Arg.String (fun s -> outch := lazy (open_out s)),
+ " Write result to this file"]
+ (fun s -> files := !files @ [s])
+ "usage: unimap_to_ocaml file.unimap ...";
+
+ (* First read in all unimaps: *)
+ let unimaps =
+ List.map
+ (fun filename ->
+ let mapname = Str.replace_first (Str.regexp "\.unimap$") ""
+ (Filename.basename filename) in
+ let f = open_in filename in
+ prerr_endline ("Reading " ^ filename);
+ let unimap = read_unimap_format_a filename f in
+ close_in f;
+ mapname, unimap
+ )
+ !files
+ in
+
+ (* Second compute all bijections: *)
+ let bijections =
+ List.map
+ (fun (mapname, unimap) ->
+ prerr_endline ("Processing " ^ mapname);
+ let to_unicode, from_unicode = make_bijection unimap in
+ mapname, to_unicode, from_unicode
+ )
+ unimaps
+ in
+
+ let out = Lazy.force !outch in
+ (* Third output all results: *)
+ output_string out "(* WARNING! This is a generated file! *)\n";
+
+ List.iter
+ (fun (mapname, to_unicode, from_unicode) ->
+ print_bijection out mapname to_unicode from_unicode)
+ bijections;
+ List.iter
+ (fun (mapname, _, _) ->
+ fprintf out "Hashtbl.add Netmappings.to_unicode `Enc_%s %s_to_unicode;\n"
+ mapname mapname;
+ fprintf out "Hashtbl.add Netmappings.from_unicode `Enc_%s %s_from_unicode;\n"
+ mapname mapname;
+ )
+ (List.rev bijections);
+ fprintf out "();;\n";
+
+ close_out out
+;;
+
+
+main();;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/08/29 00:48:52 gerd
+ * Conversion tables are now stored in marshalled form.
+ * New type for the conversion table Unicode to 8bit.
+ *
+ * Revision 1.2 2000/08/12 23:54:56 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/.cvsignore b/helm/DEVEL/pxp/pxp/.cvsignore
new file mode 100644
index 000000000..deb5b7fba
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/.cvsignore
@@ -0,0 +1,4 @@
+*.cmo
+*.cmx
+*.cmi
+
diff --git a/helm/DEVEL/pxp/pxp/LICENSE b/helm/DEVEL/pxp/pxp/LICENSE
new file mode 100644
index 000000000..55182a74d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/LICENSE
@@ -0,0 +1,22 @@
+Copyright 1999 by Gerd Stolpmann
+
+The package "markup" is copyright by Gerd Stolpmann.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this document and the "markup" software (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+The Software is provided ``as is'', without warranty of any kind, express
+or implied, including but not limited to the warranties of
+merchantability, fitness for a particular purpose and noninfringement.
+In no event shall Gerd Stolpmann be liable for any claim, damages or
+other liability, whether in an action of contract, tort or otherwise,
+arising from, out of or in connection with the Software or the use or
+other dealings in the software.
diff --git a/helm/DEVEL/pxp/pxp/META b/helm/DEVEL/pxp/pxp/META
new file mode 100644
index 000000000..020128a0d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/META
@@ -0,0 +1,20 @@
+version = "1.0"
+requires = "netstring"
+description = "Validating parser for XML-1.0"
+archive(byte) = "pxp_types.cma
+ pxp_lex_iso88591.cma
+ pxp_lex_utf8.cma
+ pxp_engine.cma
+ pxp_utf8.cmo"
+archive(byte, pxp_without_utf8) = "pxp_types.cma
+ pxp_lex_iso88591.cma
+ pxp_engine.cma"
+archive(native) = "pxp_types.cmxa
+ pxp_lex_iso88591.cmxa
+ pxp_lex_utf8.cmxa
+ pxp_engine.cmxa
+ pxp_utf8.cmx"
+archive(native, pxp_without_utf8) = "pxp_types.cmxa
+ pxp_lex_iso88591.cmxa
+ pxp_engine.cmxa"
+
diff --git a/helm/DEVEL/pxp/pxp/Makefile b/helm/DEVEL/pxp/pxp/Makefile
new file mode 100644
index 000000000..f08eab99d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/Makefile
@@ -0,0 +1,105 @@
+# make all: make bytecode archive
+# make opt: make native archive
+# make install: install bytecode archive, and if present, native archive
+# make uninstall: uninstall package
+# make clean: remove intermediate files (in this directory)
+# make CLEAN: remove intermediate files (recursively)
+# make distclean: remove any superflous files (recursively)
+# make release: cleanup, create archive, tag CVS module
+# (for developers)
+
+#----------------------------------------------------------------------
+
+include Makefile.conf
+
+.PHONY: all
+all:
+ $(MAKE) -C m2parsergen all
+ $(MAKE) -C tools/ucs2_to_utf8 all
+ $(MAKE) -f Makefile.code all
+ $(MAKE) -C compatibility all
+
+.PHONY: opt
+opt:
+ $(MAKE) -C m2parsergen all
+ $(MAKE) -C tools/ucs2_to_utf8 all
+ $(MAKE) -f Makefile.code opt
+ $(MAKE) -C compatibility opt
+
+.PHONY: install
+install: all tmp/pxp_entity.mli
+ files=`tools/collect_files *.cmi *.cma *.cmxa *.a \
+ pxp_utf8.cmo pxp_utf8.cmx pxp_utf8.o` && \
+ ocamlfind install $(NAME) $(MLI) tmp/pxp_entity.mli $$files META
+
+.PHONY: uninstall
+uninstall:
+ ocamlfind remove $(NAME)
+
+.PHONY: markup-install
+markup-install:
+ $(MAKE) -C compatibility install
+
+.PHONY: markup-uninstall
+markup-uninstall:
+ $(MAKE) -C compatibility uninstall
+
+tmp/pxp_entity.mli: pxp_entity.ml
+ mkdir -p tmp
+ rm -f tmp/pxp_entity.*
+ cp pxp_entity.ml tmp
+ echo '(* Sorry, this is currently undocumented *)' >tmp/mli
+ ocamlc -i -c tmp/pxp_entity.ml >>tmp/mli
+ mv tmp/mli tmp/pxp_entity.mli
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa *.new *.old
+ rm -f pxp_yacc.ml
+ touch lexers/objects_iso88591 lexers/objects_utf8 lexers/depend
+ $(MAKE) -C lexers clean
+ $(MAKE) -C compatibility clean
+
+.PHONY: CLEAN
+CLEAN: clean
+ $(MAKE) -C doc CLEAN
+ $(MAKE) -C examples CLEAN
+ $(MAKE) -C rtests CLEAN
+ $(MAKE) -C m2parsergen CLEAN
+ touch tools/ucs2_to_utf8/depend
+ $(MAKE) -C tools/ucs2_to_utf8 clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~ depend depend.pkg
+ $(MAKE) -C doc distclean
+ $(MAKE) -C examples distclean
+ $(MAKE) -C rtests distclean
+ $(MAKE) -C m2parsergen distclean
+ touch tools/ucs2_to_utf8/depend
+ $(MAKE) -C tools/ucs2_to_utf8 clean
+ $(MAKE) -C compatibility distclean
+
+RELEASE: META
+ awk '/version/ { print substr($$3,2,length($$3)-2) }' META >RELEASE
+
+.PHONY: dist
+dist: RELEASE
+ r=`head -1 RELEASE`; cd ..; gtar czf $(NAME)-$$r.tar.gz --exclude='*/CVS*' --exclude="*~" --exclude="*/depend.pkg" --exclude="*/depend" --exclude="*/oo_questions*" --exclude="*/testsamples*" --exclude="*/tmp/*" --exclude="*reptil*" --exclude="*/doc/common.xml" --exclude="*/doc/config.xml" --exclude="*.fig.bak" --exclude="*/ps/pic*" --exclude="*/examples/panel*" --exclude="*/examples/xmlforms_gtk*" --exclude="*/Mail*" $(NAME)/*
+
+.PHONY: tag-release
+tag-release: RELEASE
+ r=`head -1 RELEASE | sed -e s/\\\./-/g`; cd ..; cvs tag -F $(NAME)-$$r markup
+
+.PHONY: release
+release: distclean
+ $(MAKE) tag-release
+ $(MAKE) dist
+
+.PHONY: dev
+dev:
+ $(MAKE) all
+ -$(MAKE) uninstall
+ $(MAKE) install
+ $(MAKE) -C examples/validate distclean
+ $(MAKE) -C examples/validate validate
diff --git a/helm/DEVEL/pxp/pxp/Makefile.code b/helm/DEVEL/pxp/pxp/Makefile.code
new file mode 100644
index 000000000..3afed39ca
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/Makefile.code
@@ -0,0 +1,96 @@
+# make all: make bytecode archives
+# make opt: make native archives
+#----------------------------------------------------------------------
+
+include Makefile.conf
+
+all:
+ $(MAKE) -f Makefile.code pxp_types.cma
+ $(MAKE) -f Makefile.code pxp_lex_iso88591.cma
+ if [ "x$(UTF8_SUPPORT)" = "xyes" ]; then $(MAKE) -f Makefile.code pxp_lex_utf8.cma; else rm -f pxp_lex_utf8.cma; fi
+ $(MAKE) -f Makefile.code pxp_engine.cma
+ if [ "x$(UTF8_SUPPORT)" = "xyes" ]; then $(MAKE) -f Makefile.code pxp_utf8.cmo; else rm -f pxp_utf8.cmo; fi
+
+opt:
+ $(MAKE) -f Makefile.code pxp_types.cmxa
+ $(MAKE) -f Makefile.code pxp_lex_iso88591.cmxa
+ if [ "x$(UTF8_SUPPORT)" = "xyes" ]; then $(MAKE) -f Makefile.code pxp_lex_utf8.cmxa; else rm -f pxp_lex_utf8.cmxa; fi
+ $(MAKE) -f Makefile.code pxp_engine.cmxa
+ if [ "x$(UTF8_SUPPORT)" = "xyes" ]; then $(MAKE) -f Makefile.code pxp_utf8.cmx; else rm -f pxp_utf8.cmx; fi
+
+#----------------------------------------------------------------------
+
+pxp_types.cma: $(OBJECTS_types)
+ $(OCAMLC) -a -o pxp_types.cma $(OBJECTS_types)
+
+pxp_types.cmxa: $(XOBJECTS_types)
+ $(OCAMLOPT) -a -o pxp_types.cmxa $(XOBJECTS_types)
+
+pxp_engine.cma: $(OBJECTS_engine)
+ $(OCAMLC) -a -o pxp_engine.cma $(OBJECTS_engine)
+
+pxp_engine.cmxa: $(XOBJECTS_engine)
+ $(OCAMLOPT) -a -o pxp_engine.cmxa $(XOBJECTS_engine)
+
+
+# The following rules are "phony" to force 'make' to go into the
+# "lexers" subdirectory.
+
+.PHONY: pxp_lex_iso88591.cma
+pxp_lex_iso88591.cma: $(CMI_types)
+ $(MAKE) -C lexers all_iso88591
+ cp lexers/pxp_lex_iso88591.cma .
+
+.PHONY: pxp_lex_iso88591.cmxa
+pxp_lex_iso88591.cmxa: $(CMI_types)
+ $(MAKE) -C lexers opt_iso88591
+ cp lexers/pxp_lex_iso88591.cmxa lexers/pxp_lex_iso88591.a .
+
+.PHONY: pxp_lex_utf8.cma
+pxp_lex_utf8.cma: $(CMI_types)
+ $(MAKE) -C lexers all_utf8
+ cp lexers/pxp_lex_utf8.cma .
+
+.PHONY: pxp_lex_utf8.cmxa
+pxp_lex_utf8.cmxa: $(CMI_types)
+ $(MAKE) -C lexers opt_utf8
+ cp lexers/pxp_lex_utf8.cmxa lexers/pxp_lex_utf8.a .
+
+#----------------------------------------------------------------------
+# general rules:
+
+OPTIONS =
+OCAMLC = $(OCAMLFIND) ocamlc -package "$(PACKAGES)" \
+ -g -I lexers $(OPTIONS) $(ROPTIONS)
+OCAMLOPT = $(OCAMLFIND) ocamlopt -package "$(PACKAGES)" \
+ -p -I lexers $(OPTIONS) $(ROPTIONS)
+OCAMLDEP = ocamldep $(OPTIONS)
+OCAMLFIND = ocamlfind
+
+depend: *.ml *.mli pxp_yacc.ml
+ $(OCAMLDEP) *.ml *.mli >depend
+
+.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll .m2y
+
+.ml.cmx:
+ $(OCAMLOPT) -c $<
+
+.ml.cmo:
+ $(OCAMLC) -c $<
+
+.mli.cmi:
+ $(OCAMLC) -c $<
+
+.mll.ml:
+ ocamllex $<
+
+.m2y.ml:
+ ./m2parsergen/m2parsergen < $< >`basename $< .m2y`.ml || { rm -f `basename $< .m2y`.ml; false; }
+
+*.mli:
+
+
+# Generated dependencies:
+
+include depend
+
diff --git a/helm/DEVEL/pxp/pxp/Makefile.conf b/helm/DEVEL/pxp/pxp/Makefile.conf
new file mode 100644
index 000000000..749c702c7
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/Makefile.conf
@@ -0,0 +1,37 @@
+# User-configurable section:
+
+# yes or no: Do you want that the parser has support for the internal
+# representation as UTF-8 strings? "yes" is recommended, but the parser
+# becomes much bigger
+UTF8_SUPPORT = yes
+
+# --- End of User-configurable section.
+
+# Settings.
+
+NAME = pxp
+PACKAGES = netstring
+
+# Caml objects that are needed by the lexers:
+OBJECTS_types = \
+ pxp_types.cmo pxp_lexer_types.cmo
+
+CMI_types = $(OBJECTS_types:.cmo=.cmi)
+
+# Caml objects that depend on the lexers:
+OBJECTS_engine = \
+ pxp_lexers.cmo \
+ pxp_dfa.cmo \
+ pxp_aux.cmo pxp_reader.cmo \
+ pxp_entity.cmo pxp_dtd.cmo pxp_document.cmo \
+ pxp_yacc.cmo pxp_codewriter.cmo
+
+# Same as native objects:
+XOBJECTS_types = $(OBJECTS_types:.cmo=.cmx)
+XOBJECTS_engine = $(OBJECTS_engine:.cmo=.cmx)
+
+# .mli files to install:
+
+MLI = pxp_document.mli pxp_dtd.mli \
+ pxp_types.mli pxp_yacc.mli \
+ pxp_codewriter.mli pxp_dfa.mli
diff --git a/helm/DEVEL/pxp/pxp/RELEASE b/helm/DEVEL/pxp/pxp/RELEASE
new file mode 100644
index 000000000..d3827e75a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/RELEASE
@@ -0,0 +1 @@
+1.0
diff --git a/helm/DEVEL/pxp/pxp/compatibility/.cvsignore b/helm/DEVEL/pxp/pxp/compatibility/.cvsignore
new file mode 100644
index 000000000..deb5b7fba
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/compatibility/.cvsignore
@@ -0,0 +1,4 @@
+*.cmo
+*.cmx
+*.cmi
+
diff --git a/helm/DEVEL/pxp/pxp/compatibility/META b/helm/DEVEL/pxp/pxp/compatibility/META
new file mode 100644
index 000000000..441e30a0f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/compatibility/META
@@ -0,0 +1,6 @@
+version = "PXP-emulator"
+requires = "pxp"
+description = "Validating parser for XML-1.0"
+archive(byte) = "markup.cma"
+archive(native) = "markup.cmxa"
+
diff --git a/helm/DEVEL/pxp/pxp/compatibility/Makefile b/helm/DEVEL/pxp/pxp/compatibility/Makefile
new file mode 100644
index 000000000..187116ccb
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/compatibility/Makefile
@@ -0,0 +1,40 @@
+# make all: make bytecode archive
+# make opt: make native archive
+# make install: install bytecode archive, and if present, native archive
+# make uninstall: uninstall package
+# make clean: remove intermediate files (in this directory)
+# make CLEAN: remove intermediate files (recursively)
+# make distclean: remove any superflous files (recursively)
+
+#----------------------------------------------------------------------
+
+include Makefile.conf
+
+.PHONY: all
+all:
+ $(MAKE) -f Makefile.code all
+
+.PHONY: opt
+opt:
+ $(MAKE) -f Makefile.code opt
+
+.PHONY: install
+install: all
+ files=`../tools/collect_files *.cmi *.cma *.cmxa *.a` && \
+ ocamlfind install $(NAME) $(MLI) $$files META
+
+.PHONY: uninstall
+uninstall:
+ ocamlfind remove $(NAME)
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa *.new *.old
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~ depend depend.pkg
+
diff --git a/helm/DEVEL/pxp/pxp/compatibility/Makefile.code b/helm/DEVEL/pxp/pxp/compatibility/Makefile.code
new file mode 100644
index 000000000..2733faa09
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/compatibility/Makefile.code
@@ -0,0 +1,50 @@
+# make all: make bytecode archives
+# make opt: make native archives
+#----------------------------------------------------------------------
+
+include Makefile.conf
+
+.PHONY: all
+all: markup.cma
+
+.PHONY: opt
+opt: markup.cmxa
+
+#----------------------------------------------------------------------
+
+markup.cma: $(OBJECTS)
+ $(OCAMLC) -a -o markup.cma $(OBJECTS)
+
+markup.cmxa: $(XOBJECTS)
+ $(OCAMLOPT) -a -o markup.cmxa $(XOBJECTS)
+
+#----------------------------------------------------------------------
+# general rules:
+
+OPTIONS =
+OCAMLC = ocamlfind ocamlc -g -I .. -package netstring $(OPTIONS) $(ROPTIONS)
+OCAMLOPT = ocamlfind ocamlopt -p -I .. -package netstring $(OPTIONS) $(ROPTIONS)
+OCAMLDEP = ocamldep $(OPTIONS)
+OCAMLFIND = ocamlfind
+
+depend: *.ml *.mli
+ $(OCAMLDEP) *.ml *.mli >depend
+
+.SUFFIXES: .cmo .cmi .cmx .ml .mli
+
+.ml.cmx:
+ $(OCAMLOPT) -c $<
+
+.ml.cmo:
+ $(OCAMLC) -c $<
+
+.mli.cmi:
+ $(OCAMLC) -c $<
+
+*.mli:
+
+
+# Generated dependencies:
+
+include depend
+
diff --git a/helm/DEVEL/pxp/pxp/compatibility/Makefile.conf b/helm/DEVEL/pxp/pxp/compatibility/Makefile.conf
new file mode 100644
index 000000000..061d0cae1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/compatibility/Makefile.conf
@@ -0,0 +1,9 @@
+NAME = markup
+
+OBJECTS = markup_types.cmo markup_dtd.cmo markup_reader.cmo \
+ markup_document.cmo markup_yacc.cmo
+XOBJECTS = $(OBJECTS:.cmo=.cmx)
+
+MLI = markup_document.mli markup_dtd.mli \
+ markup_types.mli markup_yacc.mli markup_reader.mli
+
diff --git a/helm/DEVEL/pxp/pxp/compatibility/README b/helm/DEVEL/pxp/pxp/compatibility/README
new file mode 100644
index 000000000..50086732a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/compatibility/README
@@ -0,0 +1,21 @@
+This directory contains the modules for Markup-0.2.10
+compatibility. The modules consist mainly of wrapper classes for the
+new PXP classes, and translate the old methods to the new ones.
+
+Please note that the compatibility is not perfect. Sometimes there are
+new methods which do not exist in Markup-0.2.10, and sometimes even
+existing methods changed their signature. I have tried to avoid that,
+but there are some ugly cases which are hard to solve without such
+modifications.
+
+Translating old methods into new methods costs time and
+memory. Because of this, it is best to consider the compatibility
+modules as migration path to PXP: You can test whether PXP parses your
+input files, and you can compare the old API with the new API
+directly. (However, it is hard to test new features of PXP with the
+compatibility modules; the old API does not reflect the new features.)
+
+The compatibility modules are currently maintained, but that will stop
+once PXP has been established.
+
+(Gerd)
diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_document.ml b/helm/DEVEL/pxp/pxp/compatibility/markup_document.ml
new file mode 100644
index 000000000..bbc497953
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/compatibility/markup_document.ml
@@ -0,0 +1,374 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+type node_type =
+ T_element of string
+ | T_data
+
+class type [ 'node ] extension = [ 'node ] Pxp_document.extension
+
+class type [ 'ext, 'node ] pxp_extension_type =
+object ('self)
+ method clone : 'self
+ method node : 'self Pxp_document.node
+ method set_node : 'self Pxp_document.node -> unit
+
+ method markup_node : 'node
+ method set_markup_node : 'node -> unit
+
+ method set_index : 'self Pxp_yacc.index -> unit
+ method index : 'self Pxp_yacc.index
+ end
+;;
+
+
+class type [ 'ext ] node =
+ object ('self)
+ constraint 'ext = 'ext node #extension
+ method pxp_node : (('ext, 'ext node) pxp_extension_type) Pxp_document.node
+
+ method extension : 'ext
+ method delete : unit
+ method parent : 'ext node
+ method root : 'ext node
+ method orphaned_clone : 'ext node
+ method orphaned_flat_clone : 'ext node
+ method add_node : 'ext node -> unit
+ method add_pinstr : Markup_dtd.proc_instruction -> unit
+ method pinstr : string -> Markup_dtd.proc_instruction list
+ method pinstr_names : string list
+ method sub_nodes : 'ext node list
+ method iter_nodes : ('ext node -> unit) -> unit
+ method iter_nodes_sibl :
+ ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
+ method set_nodes : 'ext node list -> unit
+ method data : string
+ method node_type : node_type
+ method attribute : string -> Markup_types.att_value
+ method attribute_names : string list
+ method attribute_type : string -> Markup_types.att_type
+ method attributes : (string * Markup_types.att_value) list
+ method required_string_attribute : string -> string
+ method required_list_attribute : string -> string list
+ method optional_string_attribute : string -> string option
+ method optional_list_attribute : string -> string list
+ method quick_set_attributes : (string * Markup_types.att_value) list -> unit
+ method find : string -> 'ext node
+ method reset_finder : unit
+ method dtd : Markup_dtd.dtd
+ method create_element :
+ Markup_dtd.dtd -> node_type -> (string * string) list -> 'ext node
+ method create_data : Markup_dtd.dtd -> string -> 'ext node
+ method local_validate : unit
+ method keep_always_whitespace_mode : unit
+ method write_compact_as_latin1 : Markup_types.output_stream -> unit
+ method internal_adopt : 'ext node option -> unit
+ method internal_delete : 'ext node -> unit
+ method internal_init : Markup_dtd.dtd -> string -> (string * string) list -> unit
+ end
+;;
+
+
+class [ 'ext ] pxp_extension init_markup_node =
+ (object (self : 'self)
+ (* constraint 'ext = 'ext node #extension *)
+ val mutable pxp_node = (None :
+ 'self Pxp_document.node option)
+ (* 'ext pxp_extension Pxp_document.node option *)
+ val mutable markup_node = (init_markup_node : 'ext node)
+
+ val mutable index = (None : 'self Pxp_yacc.index option)
+
+ method clone =
+ {< >}
+
+ method node =
+ match pxp_node with
+ None ->
+ assert false
+ | Some n -> n
+
+ method set_node n =
+ pxp_node <- Some n
+
+ method markup_node = markup_node
+
+ method set_markup_node n = markup_node <- n
+
+ method set_index ix =
+ index <- Some ix
+
+ method index =
+ match index with
+ None -> assert false
+ | Some x -> x
+
+ end
+ : ['ext, 'ext node] pxp_extension_type )
+;;
+
+
+class [ 'ext ] emulate_markup_node init_ext init_pxp_node =
+ object (self)
+ constraint 'ext = 'ext node #extension
+ val mutable pxp_node = (init_pxp_node :
+ ('ext, 'ext #node)
+ pxp_extension_type Pxp_document.node option)
+ val mutable extension = (init_ext : 'ext)
+
+ method pxp_node =
+ match pxp_node with
+ None -> assert false
+ | Some n -> n
+
+ method extension = extension
+ method delete = self # pxp_node # delete
+ method parent = self # pxp_node # parent # extension # markup_node
+ method root = self # pxp_node # root # extension # markup_node
+
+ method orphaned_clone =
+ let ext' = extension # clone in
+ let pxp' = self # pxp_node # orphaned_clone in
+ let n = new emulate_markup_node ext' (Some pxp') in
+ ext' # set_node (n : 'ext #node :> 'ext node);
+ pxp' # extension # set_markup_node n;
+ n
+
+ method orphaned_flat_clone =
+ let ext' = extension # clone in
+ let pxp' = self # pxp_node # orphaned_flat_clone in
+ let n = new emulate_markup_node ext' (Some pxp') in
+ ext' # set_node (n : 'ext #node :> 'ext node);
+ pxp' # extension # set_markup_node n;
+ n
+
+ method dtd = self # pxp_node # dtd
+
+ method add_node (n : 'ext node) =
+ let n_pxp = n # pxp_node in
+ self # pxp_node # add_node n_pxp
+
+ method add_pinstr pi =
+ self # pxp_node # add_pinstr pi
+
+ method sub_nodes =
+ let l = self # pxp_node # sub_nodes in
+ List.map (fun n_pxp -> n_pxp # extension # markup_node) l
+
+ method pinstr name =
+ self # pxp_node # pinstr name
+
+ method pinstr_names =
+ self # pxp_node # pinstr_names
+
+ method iter_nodes f =
+ self # pxp_node # iter_nodes
+ (fun n_pxp -> f (n_pxp # extension # markup_node))
+
+ method iter_nodes_sibl f =
+ self # pxp_node # iter_nodes_sibl
+ (fun left_pxp node_pxp right_pxp ->
+ let left =
+ match left_pxp with
+ None -> None
+ | Some n_pxp -> Some (n_pxp # extension # markup_node) in
+ let right =
+ match right_pxp with
+ None -> None
+ | Some n_pxp -> Some (n_pxp # extension # markup_node) in
+ let node =
+ node_pxp # extension # markup_node in
+ f left node right
+ )
+
+ method set_nodes (l : 'ext node list) =
+ let l_pxp = List.map (fun n -> n # pxp_node) l in
+ self # pxp_node # set_nodes l_pxp
+
+ method data = self # pxp_node # data
+
+ method node_type =
+ match self # pxp_node # node_type with
+ Pxp_document.T_data -> T_data
+ | Pxp_document.T_element name -> T_element name
+ | Pxp_document.T_super_root -> T_element "-vr"
+ | Pxp_document.T_pinstr _ -> T_element "-pi"
+ | _ -> assert false
+
+ method attribute name =
+ self # pxp_node # attribute name
+
+ method attribute_names =
+ self # pxp_node # attribute_names
+
+ method attribute_type name =
+ self # pxp_node # attribute_type name
+
+ method attributes =
+ self # pxp_node # attributes
+
+ method required_string_attribute name =
+ self # pxp_node # required_string_attribute name
+
+ method required_list_attribute name =
+ self # pxp_node # required_list_attribute name
+
+ method optional_string_attribute name =
+ self # pxp_node # optional_string_attribute name
+
+ method optional_list_attribute name =
+ self # pxp_node # optional_list_attribute name
+
+ method quick_set_attributes l =
+ self # pxp_node # quick_set_attributes l
+
+ method find (name : string) =
+ let index = self # root # pxp_node # extension # index in
+ let n = index # find name in (* may raise Not_found *)
+ n # extension # markup_node
+
+ method reset_finder = ()
+
+ method create_element dtd nt atts =
+ let nt_pxp =
+ match nt with
+ T_data -> Pxp_document.T_data
+ | T_element name -> Pxp_document.T_element name in
+ let node_pxp =
+ self # pxp_node # create_element dtd nt_pxp atts in
+ let ext' = extension # clone in
+ let n = new emulate_markup_node ext' (Some node_pxp) in
+ ext' # set_node (n : 'ext #node :> 'ext node);
+ node_pxp # extension # set_markup_node n;
+ n
+
+ method create_data dtd s =
+ let node_pxp =
+ self # pxp_node # create_data dtd s in
+ let ext' = extension # clone in
+ let n = new emulate_markup_node ext' (Some node_pxp) in
+ ext' # set_node (n : 'ext #node :> 'ext node);
+ node_pxp # extension # set_markup_node n;
+ n
+
+ method keep_always_whitespace_mode =
+ self # pxp_node # keep_always_whitespace_mode
+
+ method write_compact_as_latin1 out =
+ self # pxp_node # write_compact_as_latin1 out
+
+ method local_validate =
+ self # pxp_node # local_validate()
+
+ method internal_adopt (p:'ext node option) =
+ assert false;
+ ()
+
+ method internal_delete (n:'ext node) =
+ assert false;
+ ()
+
+ method internal_init (d:Markup_dtd.dtd) (s:string) (atts:(string*string)list) =
+ assert false;
+ ()
+ end
+;;
+
+class [ 'ext ] data_impl ext data =
+ object (self)
+ inherit [ 'ext ] emulate_markup_node ext None
+ constraint 'ext = 'ext node #extension
+ initializer
+ if data <> "" then
+ failwith "Emulation of Markup_document: Cannot instantiate data node with non-empty string";
+ let self' = (self : 'ext #node :> 'ext node ) in
+ pxp_node <- Some (new Pxp_document.data_impl (new pxp_extension self'))
+
+ end
+;;
+
+class [ 'ext ] element_impl ext =
+ object (self)
+ inherit [ 'ext ] emulate_markup_node ext None
+ initializer
+ let self' = (self : 'ext #node :> 'ext node ) in
+ pxp_node <- Some (new Pxp_document.element_impl (new pxp_extension self'))
+ end
+;;
+
+
+class [ 'ext ] document w =
+ object (self)
+ val pxp_doc = new Pxp_document.document
+ (w : Markup_types.collect_warnings :> Pxp_types.collect_warnings)
+
+ val mutable standalone_flag = false
+
+ method init_xml_version v =
+ pxp_doc # init_xml_version v
+
+ method xml_version =
+ pxp_doc # xml_version
+
+ method init_xml_standalone b =
+ standalone_flag <- b
+
+ method xml_standalone = standalone_flag
+
+ method init_root (r : 'ext node) =
+ pxp_doc # init_root (r # pxp_node);
+ self # dtd # set_standalone_declaration standalone_flag
+ (* questionable *)
+
+ method root =
+ let pxp_root = pxp_doc # root in
+ pxp_root # extension # markup_node
+
+ method dtd =
+ pxp_doc # dtd
+
+ method add_pinstr pi =
+ pxp_doc # add_pinstr pi
+
+ method pinstr name =
+ pxp_doc # pinstr name
+
+ method pinstr_names =
+ pxp_doc # pinstr_names
+
+ method write_compact_as_latin1 out =
+ pxp_doc # write_compact_as_latin1 out
+
+ end
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:30 lpadovan
+ * Initial revision
+ *
+ * Revision 1.6 2000/08/18 20:19:00 gerd
+ * Changed the emulation: there are now wrapper objects for nodes.
+ * This was necessary because node_type changed in PXP such that it became
+ * incompatible with Markup's node_type.
+ *
+ * Revision 1.5 2000/07/14 21:35:35 gerd
+ * Updated because of the simplification of Pxp_types.collect_warnings.
+ *
+ * Revision 1.4 2000/07/08 17:40:50 gerd
+ * Updated the simulation.
+ *
+ * Revision 1.3 2000/06/14 22:19:27 gerd
+ * Update because of additional 'encoding' methods.
+ *
+ * Revision 1.2 2000/05/30 00:08:40 gerd
+ * Bugfix.
+ *
+ * Revision 1.1 2000/05/29 23:43:51 gerd
+ * Initial compatibility revision.
+ *
+ *)
+
diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_document.mli b/helm/DEVEL/pxp/pxp/compatibility/markup_document.mli
new file mode 100644
index 000000000..2e37f0f22
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/compatibility/markup_document.mli
@@ -0,0 +1,420 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * Markup! The validating XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *
+ * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_document.mli.
+ * It corresponds to revision 1.13 of markup_document.mli.
+ *)
+
+(**********************************************************************)
+(* *)
+(* Markup_document: *)
+(* Object model of the document/element instances *)
+(* *)
+(**********************************************************************)
+
+
+(* ======================================================================
+ * OVERVIEW
+ *
+ * class type node ............. The common class type of the nodes of
+ * the element tree. Nodes are either
+ * elements (inner nodes) or data nodes
+ * (leaves)
+ * class type extension ........ The minimal properties of the so-called
+ * extensions of the nodes: Nodes can be
+ * customized by applying a class parameter
+ * that adds methods/values to nodes.
+ * class data_impl : node ...... Implements data nodes.
+ * class element_impl : node ... Implements element nodes
+ * class document .............. A document is an element with some additional
+ * properties
+ *
+ * ======================================================================
+ *
+ * THE STRUCTURE OF NODE TREES:
+ *
+ * Every node except the root node has a parent node. The parent node is
+ * always an element, because data nodes never contain other nodes.
+ * In the other direction, element nodes may have children; both elements
+ * and data nodes are possible as children.
+ * Every node knows its parent (if any) and all its children (if any);
+ * the linkage is maintained in both directions. A node without a parent
+ * is called a root.
+ * It is not possible that a node is the child of two nodes (two different nodes
+ * or a multiple child of the same node).
+ * You can break the connection between a node and its parent; the method
+ * "delete" performs this operations and deletes the node from the parent's
+ * list of children. The node is now a root, for itself and for all
+ * subordinate nodes. In this context, the node is also called an orphan,
+ * because it has lost its parent (this is a bit misleading because the
+ * parent is not always the creator of a node).
+ * In order to simplify complex operations, you can also set the list of
+ * children of an element. Nodes that have been children before are unchanged;
+ * new nodes are added (and the linkage is set up), nodes no more occurring
+ * in the list are handled if they have been deleted.
+ * If you try to add a node that is not a root (either by an "add" or by a
+ * "set" operation) the operation fails.
+ *
+ * CREATION OF NODES
+ *
+ * The class interface supports creation of nodes by cloning a so-called
+ * exemplar. The idea is that it is sometimes useful to implement different
+ * element types by different classes, and to implement this by looking up
+ * exemplars.
+ * Imagine you have three element types A, B, and C, and three classes
+ * a, b, and c implementing the node interface (for example, by providing
+ * different extensions, see below). The XML parser can be configured to
+ * have a lookup table
+ * { A --> a0, B --> b0, C --> c0 }
+ * where a0, b0, c0 are exemplars of the classes a, b, and c, i.e. empty
+ * objects belonging to these classes. If the parser finds an instance of
+ * A, it looks up the exemplar a0 of A and clones it (actually, the method
+ * "create_element" performs this for elements, and "create_data" for data
+ * nodes). Clones belong to the same class as the original nodes, so the
+ * instances of the elements have the same classes as the configured
+ * exemplars.
+ * Note: This technique assumes that the interface of all exemplars is the
+ * same!
+ *
+ * THE EXTENSION
+ *
+ * The class type node and all its implementations have a class parameter
+ * 'ext which must at least fulfil the properties of the class type "extension".
+ * The idea is that you can add properties, for example:
+ *
+ * class my_extension =
+ * object
+ * (* minimal properties required by class type "extension": *)
+ * method clone = ...
+ * method node = ...
+ * method set_node n = ...
+ * (* here my own methods: *)
+ * method do_this_and_that ...
+ * end
+ *
+ * class my_element_impl = [ my_extension ] element_impl
+ * class my_data_impl = [ my_extension ] data_impl
+ *
+ * The whole XML parser is parameterized with 'ext, so your extension is
+ * visible everywhere (this is the reason why extensibility is solved by
+ * parametric polymorphism and not by inclusive polymorphism (subtyping)).
+ *
+ *
+ * SOME COMPLICATED TYPE EXPRESSIONS
+ *
+ * Sometimes the following type expressions turn out to be necessary:
+ *
+ * 'a node extension as 'a
+ * This is the type of an extension that belongs to a node that
+ * has an extension that is the same as we started with.
+ *
+ * 'a extension node as 'a
+ * This is the type of a node that has an extension that belongs to a
+ * node of the type we started with.
+ *
+ *
+ * DOCUMENTS
+ * ...
+ *
+ * ======================================================================
+ *
+ * SIMPLE USAGE: ...
+ *)
+
+
+open Markup_dtd
+
+
+type node_type =
+ T_element of string
+ | T_data
+
+
+
+class type [ 'node ] extension =
+ object ('self)
+ method clone : 'self
+ (* "clone" should return an exact deep copy of the object. *)
+ method node : 'node
+ (* "node" returns the corresponding node of this extension. This method
+ * intended to return exactly what previously has been set by "set_node".
+ *)
+ method set_node : 'node -> unit
+ (* "set_node" is invoked once the extension is associated to a new
+ * node object.
+ *)
+ end
+;;
+
+class type [ 'ext, 'node ] pxp_extension_type =
+object ('self)
+ method clone : 'self
+ method node : 'self Pxp_document.node
+ method set_node : 'self Pxp_document.node -> unit
+
+ method markup_node : 'node
+ method set_markup_node : 'node -> unit
+
+ method set_index : 'self Pxp_yacc.index -> unit
+ method index : 'self Pxp_yacc.index
+ end
+;;
+
+class type [ 'ext ] node =
+ object ('self)
+ constraint 'ext = 'ext node #extension
+ method pxp_node : (('ext, 'ext node) pxp_extension_type) Pxp_document.node
+
+ method extension : 'ext
+ (* Return the extension of this node: *)
+
+ method delete : unit
+ (* Delete this node from the parent's list of sub nodes. This node gets
+ * orphaned.
+ * 'delete' does nothing if this node does not have a parent.
+ *)
+
+ method parent : 'ext node
+ (* Get the parent, or raise Not_found if this node is an orphan. *)
+
+ method root : 'ext node
+ (* Get the direct or indirect parent that does not have a parent itself,
+ * i.e. the root of the tree.
+ *)
+
+ method orphaned_clone : 'ext node
+ (* return an exact clone of this element and all sub nodes (deep copy)
+ * except string values which are shared by this node and the clone.
+ * The other exception is that the clone has no parent (i.e. it is now
+ * a root).
+ *)
+
+ method orphaned_flat_clone : 'ext node
+ (* return a clone of this element where all subnodes are omitted.
+ * The type of the node, and the attributes are the same as in the
+ * original node.
+ * The clone has no parent.
+ *)
+
+ method add_node : 'ext node -> unit
+ (* Append new sub nodes -- mainly used by the parser itself, but
+ * of course open for everybody. If an element is added, it must be
+ * an orphan (i.e. does not have a parent node); and after addition
+ * *this* node is the new parent.
+ *)
+
+ method add_pinstr : proc_instruction -> unit
+ (* Add a processing instruction to the set of processing instructions of
+ * this node. Usually only elements contain processing instructions.
+ *)
+
+ method pinstr : string -> proc_instruction list
+ (* Get all processing instructions with the passed name *)
+
+ method pinstr_names : string list
+ (* Get a list of all names of processing instructions *)
+
+ method sub_nodes : 'ext node list
+ (* Get the list of sub nodes *)
+
+ method iter_nodes : ('ext node -> unit) -> unit
+ (* iterate over the sub nodes *)
+
+ method iter_nodes_sibl :
+ ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
+ (* Here every iteration step can also access to the previous and to the
+ * following node if present:
+ *)
+
+ method find : string -> 'ext node
+ (* Get the node that has an ID attribute with this value, or raise
+ * Not_found.
+ * "find" may also cause a Validation_error if something is wrong
+ * with the IDs.
+ *)
+
+ method reset_finder : unit
+ (* makes that newly added nodes will also be found *)
+
+ method set_nodes : 'ext node list -> unit
+ (* Set the list of sub nodes. Elements that are no longer sub nodes gets
+ * orphaned, and all new elements that previously were not sub nodes
+ * must have been orphaned.
+ *)
+
+ method data : string
+ (* Get the data string of this node. For data nodes, this string is just
+ * the content. For elements, this string is the concatenation of all
+ * subordinate data nodes.
+ *)
+
+ method node_type : node_type
+ (* Get the name of the element type. *)
+
+ method attribute : string -> Markup_types.att_value
+ method attribute_names : string list
+ method attribute_type : string -> Markup_types.att_type
+ method attributes : (string * Markup_types.att_value) list
+ (* Get a specific attribute; get the names of all attributes; get the
+ * type of a specific attribute; get names and values of all attributes.
+ * Only elements have attributes.
+ * Note: If the DTD allows arbitrary for this element, "attribute_type"
+ * raises Undeclared.
+ *)
+
+ method required_string_attribute : string -> string
+ method required_list_attribute : string -> string list
+ (* Return the attribute or fail if the attribute is not present:
+ * The first version passes the value always as string back;
+ * the second version always as list.
+ *)
+
+ method optional_string_attribute : string -> string option
+ method optional_list_attribute : string -> string list
+ (* Return some attribute value or return None if the attribute is not
+ * present:
+ * The first version passes the value always as string back;
+ * the second version always as list.
+ *)
+
+ method quick_set_attributes : (string * Markup_types.att_value) list -> unit
+ (* Sets the attributes but does not check whether they match the DTD.
+ *)
+
+ method dtd : dtd
+ (* Get the DTD *)
+
+ method create_element : dtd -> node_type -> (string * string) list -> 'ext node
+ (* create an "empty copy" of this element:
+ * - new DTD
+ * - new node type
+ * - new attribute list
+ * - empty list of nodes
+ *)
+
+ method create_data : dtd -> string -> 'ext node
+ (* create an "empty copy" of this data node: *)
+
+ method local_validate : unit
+ (* Check that this element conforms to the DTD: *)
+
+ method keep_always_whitespace_mode : unit
+ (* Normally, add_node does not accept data nodes when the DTD does not
+ * allow data nodes or only whitespace ("ignorable whitespace").
+ * Once you have invoked this method, ignorable whitespace is forced
+ * to be included into the document.
+ *)
+
+ method write_compact_as_latin1 : Markup_types.output_stream -> unit
+ (* Write the contents of this node and the subtrees to the passed
+ * output stream; the character set ISO-8859-1 is used. The format
+ * is compact (the opposite of "pretty printing").
+ *)
+
+ (* ---------------------------------------- *)
+ (* internal methods: *)
+ method internal_adopt : 'ext node option -> unit
+ method internal_delete : 'ext node -> unit
+ method internal_init : dtd -> string -> (string * string) list -> unit
+ end
+;;
+
+class [ 'ext ] data_impl : 'ext -> string -> [ 'ext ] node
+
+class [ 'ext ] element_impl : 'ext -> [ 'ext ] node
+
+class [ 'ext ] document :
+ Markup_types.collect_warnings ->
+ object
+ method init_xml_version : string -> unit
+ method init_xml_standalone : bool -> unit
+ method init_root : 'ext node -> unit
+
+ method xml_version : string
+ method xml_standalone : bool
+ method dtd : dtd
+ method root : 'ext node
+
+ method add_pinstr : proc_instruction -> unit
+ method pinstr : string -> proc_instruction list
+ method pinstr_names : string list
+
+ method write_compact_as_latin1 : Markup_types.output_stream -> unit
+ (* Write the document to the passed
+ * output stream; the character set ISO-8859-1 is used. The format
+ * is compact (the opposite of "pretty printing").
+ * If a DTD is present, the DTD is included into the internal subset.
+ *)
+
+ end
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:30 lpadovan
+ * Initial revision
+ *
+ * Revision 1.4 2000/08/18 20:19:16 gerd
+ * Updates in the emulation because of PXP changes.
+ *
+ * Revision 1.3 2000/07/16 16:35:06 gerd
+ * Update because PXP interface contains now the method 'write'.
+ *
+ * Revision 1.2 2000/06/14 22:19:27 gerd
+ * Update because of additional 'encoding' methods.
+ *
+ * Revision 1.1 2000/05/29 23:43:51 gerd
+ * Initial compatibility revision.
+ *
+ * ======================================================================
+ * OLD LOGS:
+ *
+ * Revision 1.13 2000/05/27 19:15:08 gerd
+ * Removed the method init_xml_standalone.
+ *
+ * Revision 1.12 2000/05/01 20:42:34 gerd
+ * New method write_compact_as_latin1.
+ *
+ * Revision 1.11 2000/04/30 18:15:57 gerd
+ * Beautifications.
+ * New method keep_always_whitespace_mode.
+ *
+ * Revision 1.10 2000/03/11 22:58:15 gerd
+ * Updated to support Markup_codewriter.
+ *
+ * Revision 1.9 2000/01/27 21:51:56 gerd
+ * Added method 'attributes'.
+ *
+ * Revision 1.8 2000/01/27 21:19:07 gerd
+ * Added further methods.
+ *
+ * Revision 1.7 1999/11/09 22:20:14 gerd
+ * Removed method init_dtd from class "document". The DTD is
+ * implicitly passed to the document by the root element.
+ *
+ * Revision 1.6 1999/09/01 22:51:40 gerd
+ * Added methods to store processing instructions.
+ *
+ * Revision 1.5 1999/09/01 16:19:57 gerd
+ * The "document" class has now a "warner" as class argument.
+ *
+ * Revision 1.4 1999/08/19 21:59:13 gerd
+ * Added method "reset_finder".
+ *
+ * Revision 1.3 1999/08/19 01:08:29 gerd
+ * Added method "find".
+ *
+ * Revision 1.2 1999/08/15 02:19:41 gerd
+ * Some new explanations: That unknown elements are not rejected
+ * if the DTD allows them.
+ *
+ * Revision 1.1 1999/08/10 00:35:51 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_dtd.ml b/helm/DEVEL/pxp/pxp/compatibility/markup_dtd.ml
new file mode 100644
index 000000000..7df5e29c6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/compatibility/markup_dtd.ml
@@ -0,0 +1,36 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+class dtd w =
+ Pxp_dtd.dtd
+ (w : Markup_types.collect_warnings :> Pxp_types.collect_warnings)
+ `Enc_iso88591;;
+
+class dtd_element dtd name =
+ Pxp_dtd.dtd_element dtd name;;
+
+class dtd_notation name id =
+ Pxp_dtd.dtd_notation name id `Enc_iso88591;;
+
+class proc_instruction target value =
+ Pxp_dtd.proc_instruction target value `Enc_iso88591;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:30 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/07/14 21:35:35 gerd
+ * Updated because of the simplification of Pxp_types.collect_warnings.
+ *
+ * Revision 1.2 2000/06/14 22:19:27 gerd
+ * Update because of additional 'encoding' methods.
+ *
+ * Revision 1.1 2000/05/29 23:43:51 gerd
+ * Initial compatibility revision.
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_dtd.mli b/helm/DEVEL/pxp/pxp/compatibility/markup_dtd.mli
new file mode 100644
index 000000000..660b35ae8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/compatibility/markup_dtd.mli
@@ -0,0 +1,108 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * Markup! The validating XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *
+ * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_dtd.mli.
+ * It corresponds to revision 1.11 of markup_dtd.mli.
+ *)
+
+(**********************************************************************)
+(* *)
+(* Markup_dtd: *)
+(* Object model of document type declarations *)
+(* *)
+(**********************************************************************)
+
+(* ======================================================================
+ * OVERVIEW
+ *
+ * class dtd ............... represents the whole DTD, including element
+ * declarations, entity declarations, notation
+ * declarations, and processing instructions
+ * class dtd_element ....... represents an element declaration consisting
+ * of a content model and an attribute list
+ * declaration
+ * class dtd_notation ...... represents a notation declaration
+ * class proc_instruction .. represents a processing instruction
+ * ======================================================================
+ *
+ *)
+
+
+class dtd :
+ Markup_types.collect_warnings ->
+ Pxp_dtd.dtd
+ (* Incompatibilities:
+ * add_gen_entity, gen_entity
+ *)
+
+class dtd_element : dtd -> string -> Pxp_dtd.dtd_element
+ (* Incompatibilities:
+ * set_content_model, add_attribute
+ *)
+
+class dtd_notation : string -> Markup_types.ext_id -> Pxp_dtd.dtd_notation
+
+class proc_instruction : string -> string -> Pxp_dtd.proc_instruction
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:30 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/05/29 23:43:51 gerd
+ * Initial compatibility revision.
+ *
+ * ======================================================================
+ * OLD LOGS:
+ *
+ * Revision 1.11 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.10 2000/05/27 19:20:38 gerd
+ * Changed the interfaces for the standalone check: New
+ * methods: standalone_declaration, set_standalone_declaration,
+ * externally_declared, attribute_violates_standalone_declaration.
+ * The method set_content_model has been renamed to
+ * set_cm_and_extdecl; it now initializes also whether the element
+ * has been declared in an external entity.
+ * Methods add_gen_entity and gen_entity pass an additional
+ * boolean argument containing whether the declaration of the
+ * general entity happened in an external entity.
+ * Method add_attribute expects this argument, too, which
+ * states whether the declaration of the attribute happened in an
+ * external entity.
+ *
+ * Revision 1.9 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.8 2000/05/06 23:10:26 gerd
+ * allow_arbitrary for elements, too.
+ *
+ * Revision 1.7 2000/05/01 20:42:52 gerd
+ * New method write_compact_as_latin1.
+ *
+ * Revision 1.6 2000/03/11 22:58:15 gerd
+ * Updated to support Markup_codewriter.
+ *
+ * Revision 1.5 2000/02/22 02:32:02 gerd
+ * Updated.
+ *
+ * Revision 1.4 1999/11/09 22:15:41 gerd
+ * Added method "arbitrary_allowed".
+ *
+ * Revision 1.3 1999/09/01 16:21:56 gerd
+ * "dtd" classes have now an argument that passes a "warner".
+ *
+ * Revision 1.2 1999/08/15 02:20:23 gerd
+ * New feature: a DTD can allow arbitrary elements.
+ *
+ * Revision 1.1 1999/08/10 00:35:51 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_reader.ml b/helm/DEVEL/pxp/pxp/compatibility/markup_reader.ml
new file mode 100644
index 000000000..a196c2219
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/compatibility/markup_reader.ml
@@ -0,0 +1,119 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+open Markup_types;;
+
+class type resolver =
+ object
+ method open_in : ext_id -> Lexing.lexbuf
+ method close_in : unit
+ method change_encoding : string -> unit
+ method clone : resolver
+ end
+;;
+
+(* General note: close_in is simulated by close_all. Of course, this is
+ * wrong, but it should not matter
+ *)
+
+
+class resolve_read_channel ch the_warner =
+ object (self)
+ val pxp_resolver =
+ new Pxp_reader.resolve_read_this_channel
+ ~auto_close:false
+ ch
+ val warner = the_warner
+
+ initializer
+ pxp_resolver # init_warner
+ (warner : Markup_types.collect_warnings :> Pxp_types.collect_warnings);
+ pxp_resolver # init_rep_encoding `Enc_iso88591;
+
+ method open_in xid =
+ pxp_resolver # open_in xid
+
+ method close_in =
+ pxp_resolver # close_all (* sic! *)
+
+ method change_encoding enc =
+ pxp_resolver # change_encoding enc
+
+ method clone =
+ ( {< pxp_resolver = pxp_resolver # clone >} : #resolver :> resolver )
+
+ end
+;;
+
+
+class resolve_read_string str =
+ object (self)
+ val pxp_resolver =
+ new Pxp_reader.resolve_read_this_string str
+ val warner = new Pxp_types.drop_warnings
+
+ initializer
+ pxp_resolver # init_warner warner;
+ pxp_resolver # init_rep_encoding `Enc_iso88591;
+
+ method open_in xid =
+ pxp_resolver # open_in xid
+
+ method close_in =
+ pxp_resolver # close_all (* sic! *)
+
+ method change_encoding enc =
+ pxp_resolver # change_encoding enc
+
+ method clone =
+ ( {< pxp_resolver = pxp_resolver # clone >} : #resolver :> resolver )
+ end
+;;
+
+
+class resolve_as_file the_warner =
+ object (self)
+ val pxp_resolver =
+ new Pxp_reader.resolve_as_file
+ ~system_encoding:`Enc_iso88591
+ ()
+ val warner = the_warner
+
+ initializer
+ pxp_resolver # init_warner
+ (warner : Markup_types.collect_warnings :> Pxp_types.collect_warnings);
+ pxp_resolver # init_rep_encoding `Enc_iso88591;
+
+ method open_in xid =
+ pxp_resolver # open_in xid
+
+ method close_in =
+ pxp_resolver # close_all (* sic! *)
+
+ method change_encoding enc =
+ pxp_resolver # change_encoding enc
+
+ method clone =
+ ( {< pxp_resolver = pxp_resolver # clone >} : #resolver :> resolver )
+ end
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:30 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/07/14 21:35:35 gerd
+ * Updated because of the simplification of Pxp_types.collect_warnings.
+ *
+ * Revision 1.2 2000/07/08 17:40:50 gerd
+ * Updated the simulation.
+ *
+ * Revision 1.1 2000/05/29 23:43:51 gerd
+ * Initial compatibility revision.
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_reader.mli b/helm/DEVEL/pxp/pxp/compatibility/markup_reader.mli
new file mode 100644
index 000000000..8e5e2c8fc
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/compatibility/markup_reader.mli
@@ -0,0 +1,141 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * Markup! The validating XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *
+ * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_reader.mli.
+ * It corresponds to revision 1.3 of markup_reader.mli.
+ *)
+
+open Markup_types;;
+
+
+(* The class type resolver is the official type of all "resolvers".
+ * Resolvers get file names (or better, external identifiers) and
+ * return lexbufs, scanning the file for tokens. Resolvers may be
+ * cloned, and clones can interpret relative file names relative to
+ * their creator.
+ *)
+
+class type resolver =
+ object
+ (* A resolver can open a character source, and returns this source as
+ * Lexing.lexbuf.
+ * The resolver should recode the source into ISO-8859-1. By default,
+ * a resolver should assume UTF-8 or UTF-16 encoding. Before
+ * 'change_encoding' is invoked, the resolver should only return
+ * lexbufs with one character. After 'change_encoding' has been invoked,
+ * there is no character limit anymore.
+ * 'change_encoding' can only be invoked once. This method is usually
+ * called after the ... ?> prolog of the entity has been read.
+ * If this method is not called, it is up to the resolver to find out
+ * if UTF-8 or UTF-16 is used. It is recommended to invoke this method
+ * with an empty string to indicate this situation.
+ *)
+ method open_in : ext_id -> Lexing.lexbuf
+ method close_in : unit
+ method change_encoding : string -> unit
+
+
+ (* Every resolver can be cloned. The clone does not inherit the connection
+ * with the external object, i.e. it is closed.
+ *)
+ method clone : resolver
+
+ end
+;;
+
+
+(* The following class is the current main implementation of resolvers.
+ * It fetches strings from an arbitrary source (by calling init_in, and
+ * then repeatedly next_string), recodes them to ISO-8859-1, and creates
+ * lexbufs for them.
+ * It is not complete, as the source is missing.
+ *
+ * Note that 'resolve_general' may change in future revisions; it is ugly.
+ *)
+
+(* -- This API simulation does not provide 'resolve_general' any longer
+
+class virtual resolve_general :
+ collect_warnings ->
+ object
+ val mutable encoding : string
+ val mutable encoding_requested : bool
+ val warner : collect_warnings
+
+ method clone : resolver
+
+ method private warn : int -> unit
+ method private autodetect : string -> unit
+
+ method private virtual next_string : string -> int -> int -> int
+ method private virtual init_in : ext_id -> unit
+ method virtual close_in : unit
+
+ method open_in : ext_id -> Lexing.lexbuf
+
+ method change_encoding : string -> unit
+ end
+*)
+
+
+(* The next classes are resolvers for concrete input sources. *)
+
+class resolve_read_channel :
+ in_channel -> collect_warnings -> resolver;;
+
+ (* Reads from the passed channel (it may be even a pipe). Note that this
+ * resolver cannot handle file inclusions, as it is pre-bound to a
+ * specific channel and is not able to interpret file names.
+ * That means, if there is a entity reference (something like &name; or
+ * %name;) to parse, and the definition points to another file, the
+ * resolver will fail.
+ *)
+
+
+class resolve_read_string :
+ string -> resolver;;
+
+ (* Reads from the passed string. As 'resolver_read_channel', this
+ * resolver cannot handle file inclusions.
+ *)
+
+
+class resolve_as_file :
+ collect_warnings -> resolver;;
+
+ (* Reads from the local file system. Every file name is interpreted as
+ * file name of the local file system, and the referred file is read.
+ * This resolver can handle file inclusions as long as they do not
+ * exceed the scope of the local file system (i.e. no URLs).
+ *)
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:30 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/07/08 17:40:50 gerd
+ * Updated the simulation.
+ *
+ * Revision 1.1 2000/05/29 23:43:51 gerd
+ * Initial compatibility revision.
+ *
+ * ======================================================================
+ * OLD LOGS:
+ *
+ * Revision 1.3 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.2 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.1 2000/03/13 23:41:54 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_types.ml b/helm/DEVEL/pxp/pxp/compatibility/markup_types.ml
new file mode 100644
index 000000000..a0c0c271b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/compatibility/markup_types.ml
@@ -0,0 +1,103 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+
+type ext_id = Pxp_types.ext_id =
+ System of string
+ | Public of (string * string)
+ | Anonymous
+type dtd_id = Pxp_types.dtd_id=
+ External of ext_id
+ | Derived of ext_id
+ | Internal
+type content_model_type = Pxp_types.content_model_type =
+ Unspecified
+ | Empty
+ | Any
+ | Mixed of mixed_spec list
+ | Regexp of regexp_spec
+and mixed_spec = Pxp_types.mixed_spec =
+ MPCDATA
+ | MChild of string
+and regexp_spec = Pxp_types.regexp_spec =
+ Optional of regexp_spec
+ | Repeated of regexp_spec
+ | Repeated1 of regexp_spec
+ | Alt of regexp_spec list
+ | Seq of regexp_spec list
+ | Child of string
+type att_type = Pxp_types.att_type =
+ A_cdata
+ | A_id
+ | A_idref
+ | A_idrefs
+ | A_entity
+ | A_entities
+ | A_nmtoken
+ | A_nmtokens
+ | A_notation of string list
+ | A_enum of string list
+type att_default = Pxp_types.att_default =
+ D_required
+ | D_implied
+ | D_default of string
+ | D_fixed of string
+type att_value = Pxp_types.att_value =
+ Value of string
+ | Valuelist of string list
+ | Implied_value
+
+class collect_warnings =
+object
+ val mutable w = Buffer.create 100
+ method print_warnings =
+ Buffer.contents w
+ method reset =
+ Buffer.clear w
+ method warn s =
+ Buffer.add_string w ("WARNING: " ^ s ^ "\n")
+end
+
+exception Illegal_character of int
+exception Validation_error = Pxp_types.Validation_error
+exception WF_error = Pxp_types.WF_error
+exception Character_not_supported = Pxp_types.Character_not_supported
+exception Bad_character_stream = Netconversion.Malformed_code
+exception At = Pxp_types.At
+exception Undeclared = Pxp_types.Undeclared
+
+let string_of_exn = Pxp_types.string_of_exn
+
+type output_stream = Pxp_types.output_stream =
+ Out_buffer of Buffer.t
+ | Out_channel of out_channel
+ | Out_function of (string -> int -> int -> unit)
+
+let write = Pxp_types.write
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:30 lpadovan
+ * Initial revision
+ *
+ * Revision 1.5 2000/08/18 20:19:16 gerd
+ * Updates in the emulation because of PXP changes.
+ *
+ * Revision 1.4 2000/07/16 18:30:15 gerd
+ * Updated because PXP does no longer have the exception
+ * Illegal_character.
+ *
+ * Revision 1.3 2000/07/14 21:35:35 gerd
+ * Updated because of the simplification of Pxp_types.collect_warnings.
+ *
+ * Revision 1.2 2000/07/08 17:40:50 gerd
+ * Updated the simulation.
+ *
+ * Revision 1.1 2000/05/29 23:43:51 gerd
+ * Initial compatibility revision.
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_types.mli b/helm/DEVEL/pxp/pxp/compatibility/markup_types.mli
new file mode 100644
index 000000000..b33bb30b2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/compatibility/markup_types.mli
@@ -0,0 +1,125 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * Markup! The validating XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *
+ * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_types.mli.
+ * It corresponds to revision 1.7 of markup_types.mli.
+ *)
+
+
+type ext_id = Pxp_types.ext_id =
+ System of string
+ | Public of (string * string)
+ | Anonymous
+type dtd_id = Pxp_types.dtd_id =
+ External of ext_id
+ | Derived of ext_id
+ | Internal
+type content_model_type = Pxp_types.content_model_type =
+ Unspecified
+ | Empty
+ | Any
+ | Mixed of mixed_spec list
+ | Regexp of regexp_spec
+and mixed_spec = Pxp_types.mixed_spec =
+ MPCDATA
+ | MChild of string
+and regexp_spec = Pxp_types.regexp_spec =
+ Optional of regexp_spec
+ | Repeated of regexp_spec
+ | Repeated1 of regexp_spec
+ | Alt of regexp_spec list
+ | Seq of regexp_spec list
+ | Child of string
+type att_type = Pxp_types.att_type =
+ A_cdata
+ | A_id
+ | A_idref
+ | A_idrefs
+ | A_entity
+ | A_entities
+ | A_nmtoken
+ | A_nmtokens
+ | A_notation of string list
+ | A_enum of string list
+type att_default = Pxp_types.att_default =
+ D_required
+ | D_implied
+ | D_default of string
+ | D_fixed of string
+type att_value = Pxp_types.att_value =
+ Value of string
+ | Valuelist of string list
+ | Implied_value
+
+class collect_warnings :
+ object
+ method warn : string -> unit
+ method print_warnings : string
+ method reset : unit
+ end
+;;
+
+
+exception Illegal_character of int
+exception Validation_error of string
+exception WF_error of string
+exception Character_not_supported
+exception Bad_character_stream
+exception At of (string * exn)
+exception Undeclared
+
+val string_of_exn : exn -> string
+ (* Converts a Markup exception into a readable string *)
+
+
+type output_stream = Pxp_types.output_stream =
+ Out_buffer of Buffer.t
+ | Out_channel of out_channel
+ | Out_function of (string -> int -> int -> unit)
+
+val write : output_stream -> string -> int -> int -> unit
+ (* write os s pos len: Writes the string to the buffer/channel/stream *)
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:30 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/07/08 17:40:50 gerd
+ * Updated the simulation.
+ *
+ * Revision 1.1 2000/05/29 23:43:51 gerd
+ * Initial compatibility revision.
+ *
+ * ======================================================================
+ * OLD LOGS:
+ *
+ * Revision 1.7 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.6 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.5 2000/05/01 20:43:25 gerd
+ * New type output_stream; new function 'write'.
+ *
+ * Revision 1.4 1999/09/01 16:25:35 gerd
+ * Dropped Illegal_token and Content_not_allowed_here. WF_error can
+ * be used instead.
+ *
+ * Revision 1.3 1999/08/15 02:22:40 gerd
+ * Added exception Undeclared.
+ *
+ * Revision 1.2 1999/08/14 22:15:17 gerd
+ * New class "collect_warnings".
+ *
+ * Revision 1.1 1999/08/10 00:35:52 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_yacc.ml b/helm/DEVEL/pxp/pxp/compatibility/markup_yacc.ml
new file mode 100644
index 000000000..26c40de18
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/compatibility/markup_yacc.ml
@@ -0,0 +1,245 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+open Markup_types
+open Markup_dtd
+open Markup_document
+
+type config =
+ { warner : collect_warnings;
+ errors_with_line_numbers : bool;
+ processing_instructions_inline : bool;
+ virtual_root : bool;
+ debugging_mode : bool;
+ }
+
+
+type source =
+ Entity of ((dtd -> Pxp_entity.entity) * Markup_reader.resolver)
+ | Channel of in_channel
+ | File of string
+ | Latin1 of string
+ | ExtID of (ext_id * Markup_reader.resolver)
+
+type 'ext domspec =
+ { map : (node_type, 'ext node) Hashtbl.t;
+ default_element : 'ext node;
+ }
+
+
+class default_ext =
+ object(self)
+ val mutable node = (None : ('a extension node as 'a) option)
+ method clone = {< >}
+ method node =
+ match node with
+ None ->
+ assert false
+ | Some n -> n
+ method set_node n =
+ node <- Some n
+ end
+;;
+
+
+let default_extension = new default_ext;;
+
+let default_config =
+ { warner = new collect_warnings;
+ errors_with_line_numbers = true;
+ processing_instructions_inline = false;
+ virtual_root = false;
+ debugging_mode = false;
+ }
+
+
+let default_dom =
+ let d = Hashtbl.create 2 in
+ Hashtbl.add d T_data (new data_impl default_extension "");
+ { map = d;
+ default_element = new element_impl default_extension
+ }
+;;
+
+
+let pxp_config cfg =
+ { Pxp_yacc.default_config with
+ Pxp_yacc.warner = (cfg.warner :> Pxp_types.collect_warnings);
+ Pxp_yacc.errors_with_line_numbers = cfg.errors_with_line_numbers;
+ Pxp_yacc.enable_pinstr_nodes = cfg.processing_instructions_inline;
+ Pxp_yacc.enable_super_root_node = cfg.virtual_root;
+ Pxp_yacc.encoding = `Enc_iso88591;
+ Pxp_yacc.recognize_standalone_declaration = false;
+ Pxp_yacc.debugging_mode = cfg.debugging_mode;
+ }
+;;
+
+
+class pxp_resolver r =
+ object (self)
+ val markup_resolver = r
+
+ method init_rep_encoding enc =
+ assert (enc = `Enc_iso88591 )
+
+ method init_warner w =
+ ()
+
+ method rep_encoding = `Enc_iso88591
+
+ method open_in xid =
+ markup_resolver # open_in xid
+
+ method close_in =
+ markup_resolver # close_in
+
+ method close_all =
+ markup_resolver # close_in
+
+ method change_encoding enc =
+ markup_resolver # change_encoding enc
+
+ method clone =
+ ( {< markup_resolver = markup_resolver # clone >}
+ : #Pxp_reader.resolver :> Pxp_reader.resolver )
+ end
+;;
+
+
+let pxp_source src =
+ match src with
+ Entity (mkent, res) -> Pxp_yacc.Entity(mkent, new pxp_resolver res)
+ | ExtID (id, res) -> Pxp_yacc.ExtID(id, new pxp_resolver res)
+ | Channel ch -> Pxp_yacc.from_channel
+ ~system_encoding:`Enc_iso88591 ch
+ | File f -> Pxp_yacc.from_file
+ ~system_encoding:`Enc_iso88591 f
+ | Latin1 s -> Pxp_yacc.from_string ~fixenc:`Enc_iso88591 s
+;;
+
+
+let pxp_dom dom =
+ let dex =
+ try Hashtbl.find dom.map T_data
+ with Not_found -> assert false
+ in
+ let eex = dom.default_element in
+ let m = Hashtbl.create 100 in
+ Hashtbl.iter
+ (fun nt ex ->
+ match nt with
+ T_element name when name <> "-vr" && name <> "-pi" ->
+ let pxp_ex = ex # pxp_node in
+ Hashtbl.add m name pxp_ex
+ | _ -> ()
+ )
+ dom.map;
+ let srex =
+ try
+ Some ((Hashtbl.find dom.map (T_element "-vr")) # pxp_node)
+ with
+ Not_found -> None
+ in
+ let piex =
+ try
+ Some ((Hashtbl.find dom.map (T_element "-pi")) # pxp_node)
+ with
+ Not_found -> None
+ in
+ Pxp_document.make_spec_from_mapping
+ ?super_root_exemplar:srex
+ ?default_pinstr_exemplar:piex
+ ~data_exemplar:(dex # pxp_node)
+ ~default_element_exemplar:(eex # pxp_node)
+ ~element_mapping:m
+ ()
+;;
+
+
+let markup_document w index doc =
+ let mdoc = new document w in
+ mdoc # init_xml_version (doc # xml_version);
+ mdoc # init_xml_standalone (doc # xml_standalone);
+ let r = doc # root # extension in
+ r # set_index index;
+ mdoc # init_root (r # markup_node);
+ List.iter
+ (fun piname ->
+ let l = doc # pinstr piname in
+ List.iter
+ (fun pi -> mdoc # add_pinstr pi)
+ l)
+ (doc # pinstr_names);
+ mdoc
+;;
+
+
+
+let parse_dtd_entity cfg src =
+ Pxp_yacc.parse_dtd_entity
+ (pxp_config cfg)
+ (pxp_source src)
+;;
+
+
+let parse_document_entity cfg src dom =
+ let index = (new Pxp_yacc.hash_index :> 'ext Pxp_yacc.index) in
+ markup_document
+ cfg.warner
+ index
+ (Pxp_yacc.parse_document_entity
+ ~id_index:index
+ (pxp_config cfg)
+ (pxp_source src)
+ (pxp_dom dom))
+;;
+
+
+let parse_content_entity cfg src dtd dom =
+ let index = (new Pxp_yacc.hash_index :> 'ext Pxp_yacc.index) in
+ let n =
+ (Pxp_yacc.parse_content_entity
+ ~id_index:index
+ (pxp_config cfg)
+ (pxp_source src)
+ dtd
+ (pxp_dom dom)) # extension in
+ n # set_index index;
+ n # markup_node
+;;
+
+
+let parse_wf_entity cfg src dom =
+ let index = (new Pxp_yacc.hash_index :> 'ext Pxp_yacc.index) in
+ (* Restriction: index is not filled! *)
+ markup_document
+ cfg.warner
+ index
+ (Pxp_yacc.parse_wfdocument_entity
+ (pxp_config cfg)
+ (pxp_source src)
+ (pxp_dom dom))
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:30 lpadovan
+ * Initial revision
+ *
+ * Revision 1.4 2000/08/18 20:19:16 gerd
+ * Updates in the emulation because of PXP changes.
+ *
+ * Revision 1.3 2000/07/14 21:35:35 gerd
+ * Updated because of the simplification of Pxp_types.collect_warnings.
+ *
+ * Revision 1.2 2000/07/08 17:40:50 gerd
+ * Updated the simulation.
+ *
+ * Revision 1.1 2000/05/29 23:43:51 gerd
+ * Initial compatibility revision.
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_yacc.mli b/helm/DEVEL/pxp/pxp/compatibility/markup_yacc.mli
new file mode 100644
index 000000000..daccad4c7
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/compatibility/markup_yacc.mli
@@ -0,0 +1,233 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * Markup! The validating XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *
+ * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_yacc.mli.
+ * It corresponds to revision 1.4 of markup_yacc.mli.
+ *)
+
+
+(*$ markup-yacc.mli *)
+
+open Markup_types
+open Markup_dtd
+open Markup_document
+
+type config =
+ { warner : collect_warnings;
+ (* An object that collects warnings. *)
+
+ errors_with_line_numbers : bool;
+ (* Whether error messages contain line numbers or not. The parser
+ * is 10 to 20 per cent faster if line numbers are turned off;
+ * you get only character positions in this case.
+ *)
+
+ processing_instructions_inline : bool;
+ (* true: turns a special mode for processing instructions on. Normally,
+ * you cannot determine the exact location of a PI; you only know
+ * in which element the PI occurs. The "inline" mode makes it possible
+ * to find the exact location out: Every PI is artificially wrapped
+ * by a special element with name "-pi". For example, if the XML text
+ * is , the parser normally produces only an element
+ * object for "a", and puts the PIs "x" and "y" into it (without
+ * order). In inline mode, the object "a" will contain two objects
+ * with name "-pi", and the first object will contain "x", and the
+ * second "y".
+ * Notes:
+ * (1) The name "-pi" is reserved. You cannot use it for your own
+ * tags because tag names must not begin with '-'.
+ * (2) You need not to add a declaration for "-pi" to the DTD. These
+ * elements are handled separately.
+ * (3) Of course, the "-pi" objects are created from exemplars of
+ * your DOM map.
+ *)
+
+ virtual_root : bool;
+ (* true: the topmost element of the XML tree is not the root element,
+ * but the so-called virtual root. The root element is a son of the
+ * virtual root. The virtual root is an ordinary element with name
+ * "-vr".
+ * The following behaviour changes, too:
+ * - PIs occurring outside the root element and outside the DTD are
+ * added to the virtual root instead of the document object
+ * - If processing_instructions_inline is also turned on, these PIs
+ * are added inline to the virtual root
+ * Notes:
+ * (1) The name "-vr" is reserved. You cannot use it for your own
+ * tags because tag names must not begin with '-'.
+ * (2) You need not to add a declaration for "-vr" to the DTD. These
+ * elements are handled separately.
+ * (3) Of course, the "-vr" objects are created from exemplars of
+ * your DOM map.
+ *)
+
+ (* The following options are not implemented, or only for internal
+ * use.
+ *)
+
+ debugging_mode : bool;
+ }
+
+
+type source =
+ Entity of ((dtd -> Pxp_entity.entity) * Markup_reader.resolver)
+ | Channel of in_channel
+ | File of string
+ | Latin1 of string
+ | ExtID of (ext_id * Markup_reader.resolver)
+
+(* Note on sources:
+ *
+ * The sources do not have all the same capabilities. Here the differences:
+ *
+ * - File: A File source reads from a file by name. This has the advantage
+ * that references to external entites can be resolved. - The problem
+ * with SYSTEM references is that they usually contain relative file
+ * names; more exactly, a file name relative to the document containing it.
+ * It is only possible to convert such names to absolute file names if the
+ * name of the document containing such references is known; and File
+ * denotes this name.
+ *
+ * - Channel, Latin1: These sources read from documents given as channels or
+ * (Latin 1-encoded) strings. There is no file name, and because of this
+ * the documents must not contain references to external files (even
+ * if the file names are given as absolute names).
+ *
+ * - ExtID(x,r): The identifier x (either the SYSTEM or the PUBLIC name) of the
+ * entity to read from is passed to the resolver r as-is.
+ * The intention of this option is to allow customized
+ * resolvers to interpret external identifiers without any restriction.
+ * For example, you can assign the PUBLIC identifiers a meaning (they
+ * currently do not have any), or you can extend the "namespace" of
+ * identifiers.
+ * ExtID is the interface of choice for own extensions to resolvers.
+ *
+ * - Entity(m,r): You can implementy every behaviour by using a customized
+ * entity class. Once the DTD object d is known that will be used during
+ * parsing, the entity e = m d is determined and used together with the
+ * resolver r.
+ * This is only for hackers.
+ *)
+
+
+type 'ext domspec =
+ { map : (node_type, 'ext node) Hashtbl.t;
+ default_element : 'ext node;
+ }
+ (* Specifies which node to use as exemplar for which node type. See the
+ * manual for explanations.
+ *)
+
+val default_config : config
+ (* - The resolver is able to read from files by name
+ * - Warnings are thrown away
+ * - Error message will contain line numbers
+ * - The internal encoding is ISO-8859-1
+ * - standalone declaration is checked
+ *)
+
+val default_extension : ('a node extension) as 'a
+ (* A "null" extension; an extension that does not extend the funtionality *)
+
+val default_dom : ('a node extension as 'a) domspec
+ (* Specifies that you do not want to use extensions. *)
+
+val parse_dtd_entity : config -> source -> dtd
+ (* Parse an entity containing a DTD, and return this DTD. *)
+
+val parse_document_entity : config -> source -> 'ext domspec -> 'ext document
+ (* Parse a closed document, i.e. a document beginning with ,
+ * and validate the contents of the document against the DTD contained
+ * and/or referenced in the document.
+ *)
+
+val parse_content_entity : config ->
+ source ->
+ dtd ->
+ 'ext domspec ->
+ 'ext node
+ (* Parse a file representing a well-formed fragment of a document. The
+ * fragment must be a single element (i.e. something like ... ;
+ * not a sequence like ... ... ). The element is validated
+ * against the passed DTD, but it is not checked whether the element is
+ * the root element specified in the DTD.
+ * Note that you can create DTDs that specify not to validate at all
+ * (invoke method allow_arbitrary on the DTD).
+ *)
+
+val parse_wf_entity : config -> source -> 'ext domspec -> 'ext document
+ (* Parse a closed document (see parse_document_entity), but do not
+ * validate it. Only checks on well-formedness are performed.
+ *)
+
+(*$-*)
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:30 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/05/29 23:43:51 gerd
+ * Initial compatibility revision.
+ *
+ * ======================================================================
+ * OLD LOGS:
+ *
+ * Revision 1.4 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.3 2000/05/27 19:24:01 gerd
+ * New option: recognize_standalone_declaration.
+ *
+ * Revision 1.2 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.1 2000/05/06 23:21:49 gerd
+ * Initial revision.
+ *
+ * Revision 1.9 2000/04/30 18:23:38 gerd
+ * New config options 'processing_instructions_inline' and
+ * 'virtual_root'.
+ *
+ * Revision 1.8 2000/03/13 23:46:46 gerd
+ * Change: The 'resolver' component of the 'config' type has
+ * disappeared. Instead, there is a new resolver component in the Entity
+ * and ExtID values of 'source'. I hope that this makes clearer that the
+ * resolver has only an effect if used together with Entity and ExtID
+ * sources.
+ * Change: The Entity value can now return the entity dependent
+ * on the DTD that is going to be used.
+ *
+ * Revision 1.7 2000/02/22 02:32:02 gerd
+ * Updated.
+ *
+ * Revision 1.6 2000/02/22 01:52:45 gerd
+ * Added documentation.
+ *
+ * Revision 1.5 2000/01/20 20:54:43 gerd
+ * New config.errors_with_line_numbers.
+ *
+ * Revision 1.4 1999/09/01 23:09:10 gerd
+ * New function parse_wf_entity that simulates a well-formedness
+ * parser.
+ *
+ * Revision 1.3 1999/09/01 16:26:36 gerd
+ * Added an empty line. This is *really* a big change.
+ *
+ * Revision 1.2 1999/08/14 22:20:27 gerd
+ * The "config" slot has now a component "warner"which is
+ * an object with a "warn" method. This is used to warn about characters
+ * that cannot be represented in the Latin 1 alphabet.
+ * Furthermore, there is a new component "debugging_mode".
+ *
+ * Revision 1.1 1999/08/10 00:35:52 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/doc/ABOUT-FINDLIB b/helm/DEVEL/pxp/pxp/doc/ABOUT-FINDLIB
new file mode 100644
index 000000000..d942e2786
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/ABOUT-FINDLIB
@@ -0,0 +1,52 @@
+******************************************************************************
+ABOUT-FINDLIB - Package manager for O'Caml
+******************************************************************************
+
+
+==============================================================================
+Abstract
+==============================================================================
+
+The findlib library provides a scheme to manage reusable software components
+(packages), and includes tools that support this scheme. Packages are
+collections of OCaml modules for which metainformation can be stored. The
+packages are kept in the filesystem hierarchy, but with strict directory
+structure. The library contains functions to look the directory up that stores
+a package, to query metainformation about a package, and to retrieve dependency
+information about multiple packages. There is also a tool that allows the user
+to enter queries on the command-line. In order to simplify compilation and
+linkage, there are new frontends of the various OCaml compilers that can
+directly deal with packages.
+
+Together with the packages metainformation is stored. This includes a version
+string, the archives the package consists of, and additional linker options.
+Packages can also be dependent on other packages. There is a query which finds
+out all predecessors of a list of packages and sorts them topologically. The
+new compiler frontends do this implicitly.
+
+Metainformation can be conditional, i.e. depend on a set of predicates. This is
+mainly used to be able to react on certain properties of the environment, such
+as if the bytecode or the native compiler is invoked, if the application is
+multi-threaded, and a few more. If the new compiler frontends are used, most
+predicates are found out automatically.
+
+There is special support for scripts. A new directive, "#require", loads
+packages into scripts. Of course, this works only with newly created toploops
+which include the findlib library.
+
+==============================================================================
+Where to get findlib
+==============================================================================
+
+The manual of findlib is available online [1]. You can download findlib here
+[2].
+
+
+--------------------------
+
+[1] see http://www.ocaml-programming.de/packages/documentation/findlib/
+
+[2] see http://www.ocaml-programming.de/packages/findlib-0.3.1.tar.gz
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/doc/ABOUT-FINDLIB.xml b/helm/DEVEL/pxp/pxp/doc/ABOUT-FINDLIB.xml
new file mode 100644
index 000000000..d1dc5b04e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/ABOUT-FINDLIB.xml
@@ -0,0 +1,61 @@
+
+
+%common;
+
+findlib">
+Findlib">
+
+]>
+
+
+
+ Abstract
+
+The &f; library provides a scheme to manage reusable software
+components (packages), and includes tools that support this
+scheme. Packages are collections of OCaml modules for which
+metainformation can be stored. The packages are kept in the filesystem
+hierarchy, but with strict directory structure. The library contains
+functions to look the directory up that stores a package, to query
+metainformation about a package, and to retrieve dependency
+information about multiple packages. There is also a tool that allows
+the user to enter queries on the command-line. In order to simplify
+compilation and linkage, there are new frontends of the various OCaml
+compilers that can directly deal with packages.
+
+
+
+Together with the packages metainformation is stored. This includes a
+version string, the archives the package consists of, and additional
+linker options. Packages can also be dependent on other
+packages. There is a query which finds out all predecessors of a list
+of packages and sorts them topologically. The new compiler frontends
+do this implicitly.
+
+
+
+Metainformation can be conditional, i.e. depend on a set of
+predicates. This is mainly used to be able to react on certain
+properties of the environment, such as if the bytecode or the native
+compiler is invoked, if the application is multi-threaded, and a few
+more. If the new compiler frontends are used, most predicates are
+found out automatically.
+
+
+
+There is special support for scripts. A new directive, "#require",
+loads packages into scripts. Of course, this works only with newly
+created toploops which include the &f; library.
+
+
+
+
+ Where to get findlib
+
+The manual of &f; is available online .
+You can download &f; here .
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/doc/EXTENSIONS b/helm/DEVEL/pxp/pxp/doc/EXTENSIONS
new file mode 100644
index 000000000..a95683910
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/EXTENSIONS
@@ -0,0 +1,50 @@
+******************************************************************************
+Extensions of the XML specification
+******************************************************************************
+
+
+==============================================================================
+This document
+==============================================================================
+
+This parser has some options extending the XML specification. Here, the options
+are explained.
+
+==============================================================================
+Optional declarations instead of mandatory declarations
+==============================================================================
+
+The XML spec demands that elements, notations, and attributes must be declared.
+However, there are sometimes situations where a different rule would be better:
+If there is a declaration, the actual instance of the element type, notation
+reference or attribute must match the pattern of the declaration; but if the
+declaration is missing, a reasonable default declaration should be assumed.
+
+I have an example that seems to be typical: The inclusion of HTML into a meta
+language. Imagine you have defined some type of "generator" or other tool
+working with HTML fragments, and your document contains two types of elements:
+The generating elements (with a name like "gen:xxx"), and the object elements
+which are HTML. As HTML is still evolving, you do not want to declare the HTML
+elements; the HTML fragments should be treated as well-formed XML fragments. In
+contrast to this, the elements of the generator should be declared and
+validated because you can more easily detect errors.
+
+The following two processing instructions can be included into the DTD:
+
+-
+
+
+ References to unknown element types and notations no longer cause an error.
+ The element may contain everything, but it must be still well-formed. It may
+ have arbitrary attributes, and every attribute is treated as an #IMPLIED
+ CDATA attribute.
+
+-
+
+
+ References to unknown attributes inside one of the enumerated elements no
+ longer cause an error. Such an attribute is treated as an #IMPLIED CDATA
+ attribute.
+ If there are several "optional-attribute-declarations" PIs, they are all
+ interpreted (implicitly merged).
+
diff --git a/helm/DEVEL/pxp/pxp/doc/EXTENSIONS.xml b/helm/DEVEL/pxp/pxp/doc/EXTENSIONS.xml
new file mode 100644
index 000000000..e64d06152
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/EXTENSIONS.xml
@@ -0,0 +1,62 @@
+
+
+%common;
+
+
+up'>
+
+
+%config;
+
+]>
+
+
+
+
+ This document
+ This parser has some options extending the XML specification. Here, the
+options are explained.
+
+
+
+
+ Optional declarations instead of mandatory declarations
+
+The XML spec demands that elements, notations, and attributes must be
+declared. However, there are sometimes situations where a different rule would
+be better: If there is a declaration, the actual instance of the
+element type, notation reference or attribute must match the pattern of the
+declaration; but if the declaration is missing, a reasonable default declaration
+should be assumed.
+
+I have an example that seems to be typical: The inclusion of HTML into a
+meta language. Imagine you have defined some type of "generator" or other tool
+working with HTML fragments, and your document contains two types of elements:
+The generating elements (with a name like "gen:xxx"), and the object elements
+which are HTML. As HTML is still evolving, you do not want to declare the HTML
+elements; the HTML fragments should be treated as well-formed XML fragments. In
+contrast to this, the elements of the generator should be declared and
+validated because you can more easily detect errors.
+
+The following two processing instructions can be included into the DTD:
+
+ ]]>
+ References to unknown element types and notations no longer cause an
+ error. The element may contain everything, but it must be still
+ well-formed. It may have arbitrary attributes, and every attribute is
+ treated as an #IMPLIED CDATA attribute.
+
+ ]]>
+ References to unknown attributes inside one of the enumerated elements
+ no longer cause an error. Such an attribute is treated as an #IMPLIED
+ CDATA attribute.
+
+
+If there are several "optional-attribute-declarations" PIs, they are all
+interpreted (implicitly merged).
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/doc/INSTALL b/helm/DEVEL/pxp/pxp/doc/INSTALL
new file mode 100644
index 000000000..9a49a2217
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/INSTALL
@@ -0,0 +1,154 @@
+******************************************************************************
+INSTALL - PXP, the XML parser for O'Caml
+******************************************************************************
+
+
+==============================================================================
+The "pxp" package
+==============================================================================
+
+------------------------------------------------------------------------------
+Prerequisites
+------------------------------------------------------------------------------
+
+PXP requires that the netstring package [1] is already installed. PXP works
+only with O'Caml 3.00 (the support for 2.04 has been dropped). The installation
+procedure defined in the Makefile requires findlib [2] to work [3].
+
+------------------------------------------------------------------------------
+Configuration
+------------------------------------------------------------------------------
+
+It is not necessary to configure PXP; but you can switch off the UTF-8 support
+by setting the variable
+
+UTF8_SUPPORT = no
+
+in Makefile.conf. In this case, the UTF-8 modules are not even compiled. - By
+default, the UTF-8 support is enabled.
+
+Note: Compiling the UTF-8 modules lasts 10 minutes on my 400 Mhz Pentium II; if
+this is too long, you can set UTF8_SUPPORT to "no".
+
+------------------------------------------------------------------------------
+Compilation
+------------------------------------------------------------------------------
+
+The Makefile defines the following goals:
+
+- make all
+ compiles with the bytecode compiler and creates the files pxp_types.cma,
+ pxp_lex_iso88591.cma, pxp_lex_utf8.cma (*), pxp_engine.cma, and pxp_utf8.cmo
+ (*). The (*) files are not built if the UTF-8 support is switched off.
+
+- make opt
+ compiles with the native compiler and creates the files pxp_types.cmxa,
+ pxp_lex_iso88591.cmxa, pxp_lex_utf8.cmxa (*), pxp_engine.cmxa, and
+ pxp_utf8.cmx (*). The (*) files are not built if the UTF-8 support is
+ switched off.
+
+------------------------------------------------------------------------------
+Installation
+------------------------------------------------------------------------------
+
+The Makefile defines the following goals:
+
+- make install
+ installs the bytecode archives, the interface definitions, and if present,
+ the native archives in the default location of findlib as package "pxp"
+
+- make uninstall
+ removes the package "pxp"
+
+- make markup-install
+ installs the Markup compatibility API as package "markup"
+
+- make markup-uninstall
+ removes the package "markup"
+
+------------------------------------------------------------------------------
+Usage with the help of "findlib"
+------------------------------------------------------------------------------
+
+You can refer to the parser as the findlib package "pxp":
+
+ocamlfind ocamlc -package pxp ...
+
+By default, the UTF-8 support modules will be linked in. If you do not need
+them, you may define the predicate "pxp_without_utf8", which causes that the
+UTF-8 relevant parts are not linked with your program; the difference in size
+is about 1 MB:
+
+ocamlfind ocamlc -package pxp -predicates pxp_without_utf8 ...
+
+Note that you can also reduce the size of the resulting executable by
+specifying Netstring-related predicates (e.g. netstring_only_iso); see the
+documentation of Netstring.
+
+------------------------------------------------------------------------------
+Linking with the archives directly
+------------------------------------------------------------------------------
+
+If you need UTF-8 support, you must link your program as follows:
+
+ocamlc ... pxp_types.cma pxp_lex_iso88591.cma pxp_lex_utf8.cma
+ pxp_engine.cma pxp_utf8.cmo ...
+
+If you do not need UTF-8, the following suffices:
+
+ocamlc ... pxp_types.cma pxp_lex_iso88591.cma pxp_engine.cma ...
+
+
+
+==============================================================================
+The examples
+==============================================================================
+
+In the "examples" directory you find several applications of PXP. They require
+that PXP has been installed using findlib. See the Makefiles in the directories
+for descriptions of "make" goals.
+
+==============================================================================
+Trouble shooting
+==============================================================================
+
+------------------------------------------------------------------------------
+Solaris
+------------------------------------------------------------------------------
+
+The "make" utility of Solaris does not work properly enough; there is a bug in
+it that prevents the so-called suffix rules from being recognized. There are
+two solutions:
+
+- Install GNU make and use it instead of Solaris make. This is the recommended
+ way to solve the problem, as GNU make can process almost every Makefile from
+ open source projects, and you will never have problems with building
+ software again.
+
+- Add the following lines to Makefile.code:
+
+ %.cmx: %.ml
+ $(OCAMLOPT) -c $<
+
+ %.cmo: %.ml
+ $(OCAMLC) -c $<
+
+ %.cmi: %.mli
+ $(OCAMLC) -c $<
+
+ %.ml: %.mll
+ ocamllex $<
+
+
+
+
+--------------------------
+
+[1] see http://www.ocaml-programming.de/packages/documentation/netstring
+
+[2] see http://www.ocaml-programming.de/packages/documentation/findlib/
+
+[3] Findlib is a package manager, see the file ABOUT-FINDLIB.
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/doc/INSTALL.xml b/helm/DEVEL/pxp/pxp/doc/INSTALL.xml
new file mode 100644
index 000000000..ac7832dbb
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/INSTALL.xml
@@ -0,0 +1,171 @@
+
+
+%common;
+
+PXP">
+
+]>
+
+
+ The "pxp" package
+ Prerequisites
+
+&m; requires that the netstring package
+ is already installed. &m; works
+only with O'Caml 3.00 (the support for 2.04 has been dropped).
+The installation
+procedure defined in the Makefile requires findlib to workFindlib is a
+package manager, see the file ABOUT-FINDLIB. .
+
+
+
+ Configuration
+
+It is not necessary to configure PXP; but you can switch off the UTF-8
+support by setting the variable
+
+
+UTF8_SUPPORT = no
+
+
+in Makefile.conf. In this case, the UTF-8 modules are not even compiled.
+- By default, the UTF-8 support is enabled.
+
+
+
+Note: Compiling the UTF-8 modules lasts 10 minutes on my 400 Mhz Pentium II;
+if this is too long, you can set UTF8_SUPPORT to "no".
+
+
+ Compilation
+
+The Makefile defines the following goals:
+
+
+
+ make all
+ compiles with the bytecode compiler and creates the files
+pxp_types.cma, pxp_lex_iso88591.cma, pxp_lex_utf8.cma (*), pxp_engine.cma,
+and pxp_utf8.cmo (*). The (*) files are not built if the UTF-8 support
+is switched off.
+
+
+ make opt
+ compiles with the native compiler and creates the files
+pxp_types.cmxa, pxp_lex_iso88591.cmxa, pxp_lex_utf8.cmxa (*), pxp_engine.cmxa,
+and pxp_utf8.cmx (*). The (*) files are not built if the UTF-8 support
+is switched off.
+
+
+
+
+ Installation
+
+The Makefile defines the following goals:
+
+
+ make install
+ installs the bytecode archives, the interface definitions, and if
+present, the native archives in the default location of findlib as
+package "pxp"
+
+
+
+ make uninstall
+ removes the package "pxp"
+
+
+ make markup-install
+ installs the Markup compatibility API as package "markup"
+
+
+ make markup-uninstall
+ removes the package "markup"
+
+
+
+
+
+ Usage with the help of "findlib"
+ You can refer to the parser as the findlib package "pxp":
+
+
+ocamlfind ocamlc -package pxp ...
+
+
+By default, the UTF-8 support modules will be linked in. If you do not need
+them, you may define the predicate "pxp_without_utf8", which causes that the
+UTF-8 relevant parts are not linked with your program; the difference in size
+is about 1 MB:
+
+
+ocamlfind ocamlc -package pxp -predicates pxp_without_utf8 ...
+
+
+Note that you can also reduce the size of the resulting executable by
+specifying Netstring-related predicates (e.g. netstring_only_iso); see the
+documentation of Netstring.
+
+
+
+
+ Linking with the archives directly
+ If you need UTF-8 support, you must link your program as follows:
+
+
+ocamlc ... pxp_types.cma pxp_lex_iso88591.cma pxp_lex_utf8.cma
+ pxp_engine.cma pxp_utf8.cmo ...
+
+
+If you do not need UTF-8, the following suffices:
+
+
+ocamlc ... pxp_types.cma pxp_lex_iso88591.cma pxp_engine.cma ...
+
+
+
+
+
+
+
+ The examples
+
+In the "examples" directory you find several applications of &m;. They require
+that &m; has been installed using findlib . See the Makefiles in the
+directories for descriptions of "make" goals.
+
+
+
+ Trouble shooting
+ Solaris
+
+The "make" utility of Solaris does not work properly enough; there is a bug
+in it that prevents the so-called suffix rules from being recognized. There
+are two solutions:
+
+ Install GNU make and use it instead of Solaris make. This is
+the recommended way to solve the problem, as GNU make can process almost
+every Makefile from open source projects, and you will never have problems
+with building software again.
+ Add the following lines to Makefile.code:
+
+%.cmx: %.ml
+ $(OCAMLOPT) -c $<
+
+%.cmo: %.ml
+ $(OCAMLC) -c $<
+
+%.cmi: %.mli
+ $(OCAMLC) -c $<
+
+%.ml: %.mll
+ ocamllex $<
+
+
+
+
+
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/Makefile b/helm/DEVEL/pxp/pxp/doc/Makefile
new file mode 100644
index 000000000..0ed12741c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/Makefile
@@ -0,0 +1,43 @@
+.PHONY: all
+all: README INSTALL ABOUT-FINDLIB SPEC PRERELEASE EXTENSIONS
+
+README: README.xml common.xml config.xml
+ readme -text README.xml >README
+
+INSTALL: INSTALL.xml common.xml config.xml
+ readme -text INSTALL.xml >INSTALL
+
+ABOUT-FINDLIB: ABOUT-FINDLIB.xml common.xml config.xml
+ readme -text ABOUT-FINDLIB.xml >ABOUT-FINDLIB
+
+SPEC: SPEC.xml common.xml config.xml
+ readme -text SPEC.xml >SPEC
+
+EXTENSIONS: EXTENSIONS.xml common.xml config.xml
+ readme -text EXTENSIONS.xml >EXTENSIONS
+
+PRERELEASE: PRERELEASE.xml common.xml config.xml
+ readme -text PRERELEASE.xml >PRERELEASE
+
+config.xml:
+ touch config.xml
+
+common.xml:
+ ln -s dist-common.xml common.xml
+
+.PHONY: clean
+clean:
+
+.PHONY: CLEAN
+CLEAN: clean
+ $(MAKE) -C manual CLEAN
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~
+ $(MAKE) -C manual distclean
+
+.PHONY: symlinks
+symlinks:
+ ln -s ../examples/readme/readme.dtd .
+
diff --git a/helm/DEVEL/pxp/pxp/doc/PRERELEASE b/helm/DEVEL/pxp/pxp/doc/PRERELEASE
new file mode 100644
index 000000000..bc46cd059
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/PRERELEASE
@@ -0,0 +1,103 @@
+******************************************************************************
+README - PXP, the XML parser for O'Caml
+******************************************************************************
+
+
+==============================================================================
+Pre-release of PXP, the XML parser for O'Caml
+==============================================================================
+
+PXP is the new, completely revised and partly rewritten validating XML parser
+for O'Caml; the old name, "Markup", has been dropped. The current version of
+PXP is still a bit experimental because it is not fully tested; however, it is
+now stable enough to be used in experimental applications.
+
+PXP will retain most parts of Markup's API; the name PXP emphasizes the
+strengths of the API: it is the Polymorphic XML Parser. The document objects
+representing the parsed file have an interesting polymorphism which allows that
+the user of the parser can control which kind of objects are actually created.
+The current API supports the element type as criterion for object/class
+selection; future APIs will extend this concept such that arbitrary criterions
+are possible (e.g. you may want to have different classes for different
+namespaces).
+
+The current development goals of PXP are:
+
+- Full XML-1.0 conformance: The current pre-release is now very close to
+ strict XML-1.0 conformance. The only bigger difference to the standard is
+ that PXP sometimes accepts DTDs as legal while the standard forbids them
+ (non-deterministic content models).
+ One of the more important improvements since 0.2.10 is the possibility to
+ represent XML documents internally as UTF-8 strings, not only as ISO-8859-1
+ strings. Thanks to Claudio Sacerdoti Coen who contributed a special lexer
+ preprocessor hiding the details of the UTF-8 encoding in the lexer
+ definitions.
+
+- Correctness of validation: The well-formedness and valididity constraints
+ must be implemented as correct as possible. The last stable release had
+ already a regression test covering many aspects of XML. The test suite will
+ be extended.
+
+- Parsing performance: It should be possible to process large amounts of data
+ in a reasoable period of time. The last stable release had many stages of
+ processing that wasted time.
+ The current pre-release is already 30 per cent faster than 0.2.10.
+
+- Simplicity of usage: Unlike parsers basing on imperative languages and DOM,
+ the usage of PXP should be simple, even for complex tasks. The current
+ parser API has already many advantages over DOM; especially it is well
+ integrated into the functional and object-oriented language O'Caml. You do
+ not have to deal with artificial representations like "node lists" while the
+ programming environment already provides good support for list structures.
+ The fact that O'Caml allows a functional programming style is interesting
+ for programs transforming XML trees.
+
+==============================================================================
+Download the PXP pre-release
+==============================================================================
+
+The current pre-release is available under
+http://www.ocaml-programming.de/packages/pxp-pre-0.99.8.tar.gz [1]. There is
+currently no documentation for this version of the software; it is recommended
+to use the Markup manual [2] and compare it with the current module interfaces.
+
+Please note that this is work in progress; it may still contain bugs and
+irregularities.
+
+The parser works only with OCaml-3. The parser needs the netstring package [3],
+at least version 0.9.1.
+
+I am very interested in your opinion to PXP; please contact me [4].
+
+==============================================================================
+Author, Credits, Copying
+==============================================================================
+
+PXP has been written by Gerd Stolpmann [5]; it contains contributions by
+Claudio Sacerdoti Coen. You may copy it as you like, you may use it even for
+commercial purposes as long as the license conditions are respected, see the
+file LICENSE coming with the distribution. It allows almost everything.
+
+==============================================================================
+Where to find the stable release
+==============================================================================
+
+Here. [6]
+
+
+--------------------------
+
+[1] see http://www.ocaml-programming.de/packages/pxp-pre-0.99.8.tar.gz
+
+[2] see http://www.ocaml-programming.de/packages/documentation/markup/manual
+
+[3] see http://www.ocaml-programming.de/packages/documentation/netstring
+
+[4] see mailto:gerd@gerd-stolpmann.de
+
+[5] see mailto:gerd@gerd-stolpmann.de
+
+[6] see http://www.ocaml-programming.de/packages/documentation/markup
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/doc/PRERELEASE.xml b/helm/DEVEL/pxp/pxp/doc/PRERELEASE.xml
new file mode 100644
index 000000000..f155abd96
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/PRERELEASE.xml
@@ -0,0 +1,116 @@
+
+
+%common;
+
+
+up'>
+
+
+%config;
+
+]>
+
+
+
+ Pre-release of PXP, the XML parser for O'Caml
+
+ PXP is the new, completely revised and partly rewritten
+validating XML parser
+for O'Caml; the old name, "Markup", has been dropped. The current version
+of PXP is still a bit experimental because it is not fully tested; however,
+it is now stable enough to be used in experimental applications.
+
+
+ PXP will retain most parts of Markup's API; the name PXP
+emphasizes the strengths of the API: it is the Polymorphic XML Parser.
+The document objects representing the parsed file have an interesting
+polymorphism which allows that the user of the parser can control
+which kind of objects are actually created. The current API supports
+the element type as criterion for object/class selection; future APIs will
+extend this concept such that arbitrary criterions are possible
+(e.g. you may want to have different classes for different namespaces).
+
+
+ The current development goals of PXP are:
+
+
+ Full XML-1.0 conformance: The current pre-release
+is now very close to strict XML-1.0 conformance. The only bigger
+difference to the standard is that PXP sometimes accepts DTDs as legal
+while the standard forbids them (non-deterministic content models).
+
+One of the more important improvements since 0.2.10 is the possibility to
+represent XML documents internally as UTF-8 strings, not only as ISO-8859-1
+strings. Thanks to Claudio Sacerdoti Coen who contributed a special lexer
+preprocessor hiding the details of the UTF-8 encoding in the lexer definitions.
+
+
+
+ Correctness of validation: The well-formedness
+and valididity constraints must be implemented as correct as possible.
+The last stable release had already a regression test covering many
+aspects of XML. The test suite will be extended.
+
+
+ Parsing performance: It should be possible to
+process large amounts of data in a reasoable period of time. The last
+stable release had many stages of processing that wasted time.
+
+ The current pre-release is already 30 per cent faster than
+0.2.10.
+
+
+ Simplicity of usage: Unlike parsers basing on
+imperative languages and DOM, the usage of PXP should be simple, even
+for complex tasks. The current parser API has already many advantages
+over DOM; especially it is well integrated into the functional and
+object-oriented language O'Caml. You do not have to deal with
+artificial representations like "node lists" while the programming
+environment already provides good support for list structures. The
+fact that O'Caml allows a functional programming style is interesting
+for programs transforming XML trees.
+
+
+
+
+
+ Download the PXP pre-release
+
+ The current pre-release is available under
+
+&url.gps-ocaml-download;/pxp-pre-0.99.8.tar.gz . There is currently no
+documentation for this version of the software; it is recommended to use the Markup manual and compare it with the current
+module interfaces.
+
+ Please note that this is work in progress; it may still contain bugs
+and irregularities.
+
+ The parser works only with OCaml-3. The parser needs the netstring package , at least version 0.9.1.
+
+
+ I am very interested in your opinion to PXP; please contact me .
+
+
+
+ Author, Credits, Copying
+
+PXP has been written by &person.gps;; it contains contributions by
+Claudio Sacerdoti Coen. You may copy it as you like,
+you may use it even for commercial purposes as long as the license conditions
+are respected, see the file LICENSE coming with the distribution. It allows
+almost everything.
+
+
+
+
+ Where to find the stable release
+ Here.
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/doc/README b/helm/DEVEL/pxp/pxp/doc/README
new file mode 100644
index 000000000..b7ad5de59
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/README
@@ -0,0 +1,247 @@
+******************************************************************************
+README - PXP, the XML parser for O'Caml
+******************************************************************************
+
+
+==============================================================================
+Abstract
+==============================================================================
+
+PXP is a validating parser for XML-1.0 which has been written entirely in
+Objective Caml.
+
+PXP is the new name of the parser formerly known as "Markup". PXP means
+"Polymorphic XML parser" and emphasizes its most useful property: that the API
+is polymorphic and can be configured such that different objects are used to
+store different types of elements.
+
+==============================================================================
+Download
+==============================================================================
+
+You can download PXP as gzip'ed tarball [1]. The parser needs the Netstring [2]
+package (0.9.3). Note that PXP requires O'Caml 3.00.
+
+==============================================================================
+User's Manual
+==============================================================================
+
+The manual is included in the distribution both as Postscript document and
+bunch of HTML files. An online version can be found here [3].
+
+==============================================================================
+Author, Credits, Copying
+==============================================================================
+
+PXP has been written by Gerd Stolpmann [4]; it contains contributions by
+Claudio Sacerdoti Coen. You may copy it as you like, you may use it even for
+commercial purposes as long as the license conditions are respected, see the
+file LICENSE coming with the distribution. It allows almost everything.
+
+Thanks also to Alain Frisch and Haruo Hosoya for discussions and bug reports.
+
+==============================================================================
+Description
+==============================================================================
+
+PXP is a validating XML parser for O'Caml [5]. It strictly complies to the
+XML-1.0 [6] standard.
+
+The parser is simple to call, usually only one statement (function call) is
+sufficient to parse an XML document and to represent it as object tree.
+
+Once the document is parsed, it can be accessed using a class interface. The
+interface allows arbitrary access including transformations. One of the
+features of the document representation is its polymorphic nature; it is simple
+to add custom methods to the document classes. Furthermore, the parser can be
+configured such that different XML elements are represented by objects created
+from different classes. This is a very powerful feature, because it simplifies
+the structure of programs processing XML documents.
+
+Note that the class interface does not comply to the DOM standard. It was not a
+development goal to realize a standard API (industrial developers can this much
+better than I); however, the API is powerful enough to be considered as
+equivalent with DOM. More important, the interface is compatible with the XML
+information model required by many XML-related standards.
+
+------------------------------------------------------------------------------
+Detailed feature list
+------------------------------------------------------------------------------
+
+- The XML instance is validated against the DTD; any violation of a validation
+ constraint leads to the rejection of the instance. The validator has been
+ carefully implemented, and conforms strictly to the standard. If needed, it
+ is also possible to run the parser in a well-formedness mode.
+
+- If possible, the validator applies a deterministic finite automaton to
+ validate the content models. This ensures that validation can always be
+ performed in linear time. However, in the case that the content models are
+ not deterministic, the parser uses a backtracking algorithm which can be
+ much slower. - It is also possible to reject non-deterministic content
+ models.
+
+- In particular, the validator also checks the complicated rules whether
+ parentheses are properly nested with respect to entities, and whether the
+ standalone declaration is satisfied. On demand, it is checked whether the
+ IDREF attributes only refer to existing nodes.
+
+- Entity references are automatically resolved while the XML text is being
+ scanned. It is not possible to recognize in the object tree where a
+ referenced entity begins or ends; the object tree only represents the
+ logical structure.
+
+- External entities are loaded using a configurable resolver infrastructure.
+ It is possible to connect the parser with an arbitrary XML source.
+
+- The parser can read XML text encoded in a variety of character sets.
+ Independent of this, it is possible to choose the encoding of the internal
+ representation of the tree nodes; the parser automatically converts the
+ input text to this encoding. Currently, the parser supports UTF-8 and
+ ISO-8859-1 as internal encodings.
+
+- The interface of the parser has been designed such that it is best
+ integrated into the language O'Caml. The first goal was simplicity of usage
+ which is achieved by many convenience methods and functions, and by allowing
+ the user to select which parts of the XML text are actually represented in
+ the tree. For example, it is possible to store processing instructions as
+ tree nodes, but the parser can also be configured such that these
+ instructions are put into hashtables. The information model is compatible
+ with the requirements of XML-related standards such as XPath.
+
+- In particular, the node tree can optionally contain or leave out processing
+ instructions and comments. It is also possible to generate a "super root"
+ object which is the parent of the root element. The attributes of elements
+ are normally not stored as nodes, but it is possible to get them wrapped
+ into nodes.
+
+- There is also an interface for DTDs; you can parse and access sequences of
+ declarations. The declarations are fully represented as recursive O'Caml
+ values.
+
+------------------------------------------------------------------------------
+Code examples
+------------------------------------------------------------------------------
+
+This distribution contains several examples:
+
+- validate: simply parses a document and prints all error messages
+
+- readme: Defines a DTD for simple "README"-like documents, and offers
+ conversion to HTML and text files [7].
+
+- xmlforms: This is already a sophisticated application that uses XML as style
+ sheet language and data storage format. It shows how a Tk user interface can
+ be configured by an XML style, and how data records can be stored using XML.
+
+------------------------------------------------------------------------------
+Restrictions and missing features
+------------------------------------------------------------------------------
+
+The following restrictions apply that are not violations of the standard:
+
+- The attributes "xml:space", and "xml:lang" are not supported specially. (The
+ application can do this.)
+
+- The built-in support for SYSTEM and PUBLIC identifiers is limited to local
+ file access. There is no support for catalogs. The parser offers a hook to
+ add missing features.
+
+- It is currently not possible to check for interoperatibility with SGML.
+
+The following features are also missing:
+
+- There is no special support for namespaces. (Perhaps in the next release?)
+
+- There is no support for XPATH or XSLT.
+
+However, I hope that these features will be implemented soon, either by myself
+or by contributors (who are invited to do so).
+
+------------------------------------------------------------------------------
+Recent Changes
+------------------------------------------------------------------------------
+
+- Changed in 1.0:
+ Support for document order.
+
+- Changed in 0.99.8:
+ Several fixes of bugs reported by Haruo Hosoya and Alain Frisch.
+ The class type "node" has been extended: you can go directly to the next and
+ previous nodes in the list; you can refer to nodes by position.
+ There are now some iterators for nodes: find, find_all, find_element,
+ find_all_elements, map_tree, iter_tree.
+ Experimental support for viewing attributes as nodes; I hope that helps
+ Alain writing his XPath evaluator.
+ The user's manual has been revised and is almost up to date.
+
+- Changed in 0.99.7:
+ There are now additional node types T_super_root, T_pinstr and T_comment,
+ and the parser is able to create the corresponding nodes.
+ The functions for character set conversion have been moved to the Netstring
+ package; they are not specific for XML.
+
+- Changed in 0.99.6:
+ Implemented a check on deterministic content models. Added an alternate
+ validator basing on a DFA. - This means that now all mandatory features for
+ an XML-1.0 parser are implemented! The parser is now substantially complete.
+
+- Changed in 0.99.5:
+ The handling of ID and IDREF attributes has changed. The index of nodes
+ containing an ID attribute is now separated from the document. Optionally
+ the parser now checks whether the IDREF attributes refer to existing
+ elements.
+ The element nodes can optionally store the location in the source XML code.
+ The method 'write' writes the XML tree in every supported encoding.
+ (Successor of 'write_compact_as_latin1'.)
+ Several smaller changes and fixes.
+
+- Changed in 0.99.4:
+ The module Pxp_reader has been modernized. The resolver classes are simpler
+ to use. There is now support for URLs.
+ The interface of Pxp_yacc has been improved: The type 'source' is now
+ simpler. The type 'domspec' has gone; the new 'spec' is opaque and performs
+ better. There are some new parsing modes.
+ Many smaller changes.
+
+- Changed in 0.99.3:
+ The markup_* modules have been renamed to pxp_*. There is a new
+ compatibility API that tries to be compatible with markup-0.2.10.
+ The type "encoding" is now a polymorphic variant.
+
+- Changed in 0.99.2:
+ Added checks for the constraints about the standalone declaration.
+ Added regression tests about attribute normalization, attribute checks,
+ standalone checks.
+ Fixed some minor errors of the attribute normalization function.
+ The bytecode/native archives are now separated in a general part, in a
+ ISO-8859-1-relevant part, and a UTF-8-relevant part. The parser can again be
+ compiled with ocamlopt.
+
+- Changed in 0.99.1:
+ In general, this release is an early pre-release of the next stable version
+ 1.00. I do not recommend to use it for serious work; it is still very
+ experimental!
+ The core of the parser has been rewritten using a self-written parser
+ generator.
+ The lexer has been restructured, and can now handle UTF-8 encoded files.
+ Numerous other changes.
+
+
+--------------------------
+
+[1] see http://www.ocaml-programming.de/packages/pxp-1.0.tar.gz
+
+[2] see http://www.ocaml-programming.de/packages/documentation/netstring
+
+[3] see http://www.ocaml-programming.de/packages/documentation/pxp/manual
+
+[4] see mailto:gerd@gerd-stolpmann.de
+
+[5] see http://caml.inria.fr/
+
+[6] see http://www.w3.org/TR/1998/REC-xml-19980210.html
+
+[7] This particular document is an example of this DTD!
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/doc/README.xml b/helm/DEVEL/pxp/pxp/doc/README.xml
new file mode 100644
index 000000000..34c7726ad
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/README.xml
@@ -0,0 +1,423 @@
+
+
+
+
+
+
+Gerd Stolpmann'>
+-->
+
+
+%common;
+
+
+up'>
+
+
+%config;
+
+]>
+
+
+
+ Abstract
+
+PXP is a validating parser for XML-1.0 which has been written
+entirely in Objective Caml.
+
+
+ PXP is the new name of the parser formerly known as "Markup".
+PXP means "Polymorphic XML parser" and emphasizes its most useful
+property: that the API is polymorphic and can be configured such that
+different objects are used to store different types of elements.
+
+
+
+ Download
+
+You can download PXP as gzip'ed tarball . The parser needs the Netstring package (0.9.3). Note that PXP
+requires O'Caml 3.00.
+
+
+
+
+ User's Manual
+
+The manual is included in the distribution both as Postscript document and
+bunch of HTML files. An online version can be found here .
+
+
+
+
+ Author, Credits, Copying
+
+PXP has been written by &person.gps;; it contains contributions by
+Claudio Sacerdoti Coen. You may copy it as you like,
+you may use it even for commercial purposes as long as the license conditions
+are respected, see the file LICENSE coming with the distribution. It allows
+almost everything.
+
+
+ Thanks also to Alain Frisch and Haruo Hosoya for discussions and bug
+reports.
+
+
+
+ Description
+
+PXP is a validating XML parser for O'Caml . It strictly complies to the
+XML-1.0 standard.
+
+
+ The parser is simple to call, usually only one statement (function
+call) is sufficient to parse an XML document and to represent it as object
+tree.
+
+
+Once the document is parsed, it can be accessed using a class interface.
+The interface allows arbitrary access including transformations. One of
+the features of the document representation is its polymorphic nature;
+it is simple to add custom methods to the document classes. Furthermore,
+the parser can be configured such that different XML elements are represented
+by objects created from different classes. This is a very powerful feature,
+because it simplifies the structure of programs processing XML documents.
+
+
+
+Note that the class interface does not comply to the DOM standard. It was not a
+development goal to realize a standard API (industrial developers can this much
+better than I); however, the API is powerful enough to be considered as
+equivalent with DOM. More important, the interface is compatible with the
+XML information model required by many XML-related standards.
+
+
+
+ Detailed feature list
+
+
+ The XML instance is validated against the DTD; any violation of
+a validation constraint leads to the rejection of the instance. The validator
+has been carefully implemented, and conforms strictly to the standard. If
+needed, it is also possible to run the parser in a well-formedness mode.
+
+ If possible, the validator applies a deterministic finite
+automaton to validate the content models. This ensures that validation can
+always be performed in linear time. However, in the case that the content
+models are not deterministic, the parser uses a backtracking algorithm which
+can be much slower. - It is also possible to reject non-deterministic content
+models.
+
+ In particular, the validator also checks the complicated rules
+whether parentheses are properly nested with respect to entities, and whether
+the standalone declaration is satisfied. On demand, it is checked whether the
+IDREF attributes only refer to existing nodes.
+
+ Entity references are automatically resolved while the XML text
+is being scanned. It is not possible to recognize in the object tree where a
+referenced entity begins or ends; the object tree only represents the logical structure.
+
+ External entities are loaded using a configurable resolver
+infrastructure. It is possible to connect the parser with an arbitrary XML source.
+
+ The parser can read XML text encoded in a variety of character
+sets. Independent of this, it is possible to choose the encoding of the
+internal representation of the tree nodes; the parser automatically converts
+the input text to this encoding. Currently, the parser supports UTF-8 and
+ISO-8859-1 as internal encodings.
+
+ The interface of the parser has been designed such that it is
+best integrated into the language O'Caml. The first goal was simplicity of
+usage which is achieved by many convenience methods and functions, and by
+allowing the user to select which parts of the XML text are actually
+represented in the tree. For example, it is possible to store processing
+instructions as tree nodes, but the parser can also be configured such that
+these instructions are put into hashtables. The information model is compatible
+with the requirements of XML-related standards such as XPath.
+
+ In particular, the node tree can optionally contain or leave out
+processing instructions and comments. It is also possible to generate a "super
+root" object which is the parent of the root element. The attributes of
+elements are normally not stored as nodes, but it is possible to get them
+wrapped into nodes.
+
+ There is also an interface for DTDs; you can parse and access
+sequences of declarations. The declarations are fully represented as recursive
+O'Caml values.
+
+
+
+
+
+
+
+ Code examples
+
+This distribution contains several examples:
+
+
+validate: simply parses a
+document and prints all error messages
+
+
+
+readme: Defines a DTD for simple "README"-like documents, and offers
+conversion to HTML and text filesThis particular document is an
+example of this DTD! .
+
+
+
+xmlforms: This is already a
+sophisticated application that uses XML as style sheet language and data
+storage format. It shows how a Tk user interface can be configured by an
+XML style, and how data records can be stored using XML.
+
+
+
+
+
+ Restrictions and missing features
+
+The following restrictions apply that are not violations of the standard:
+
+
+
+The attributes "xml:space", and "xml:lang" are not supported specially.
+ (The application can do this.)
+
+
+The built-in support for SYSTEM and PUBLIC identifiers is limited to
+ local file access. There is no support for catalogs. The parser offers
+ a hook to add missing features.
+
+
+It is currently not possible to check for interoperatibility with SGML.
+
+
+
+The following features are also missing:
+
+However, I hope that these features will be implemented soon, either by
+myself or by contributors (who are invited to do so).
+
+
+
+ Recent Changes
+
+
+ Changed in 1.0:
+ Support for document order.
+
+
+ Changed in 0.99.8:
+ Several fixes of bugs reported by Haruo Hosoya and Alain
+Frisch.
+ The class type "node" has been extended: you can go directly to
+the next and previous nodes in the list; you can refer to nodes by
+position.
+ There are now some iterators for nodes: find, find_all,
+find_element, find_all_elements, map_tree, iter_tree.
+ Experimental support for viewing attributes as nodes; I hope that
+helps Alain writing his XPath evaluator.
+ The user's manual has been revised and is almost up to date.
+
+
+ Changed in 0.99.7:
+ There are now additional node types T_super_root, T_pinstr and
+T_comment, and the parser is able to create the corresponding nodes.
+ The functions for character set conversion have been moved to
+the Netstring package; they are not specific for XML.
+
+
+ Changed in 0.99.6:
+ Implemented a check on deterministic content models. Added
+an alternate validator basing on a DFA. - This means that now all mandatory
+features for an XML-1.0 parser are implemented! The parser is now substantially
+complete.
+
+
+ Changed in 0.99.5:
+ The handling of ID and IDREF attributes has changed. The
+index of nodes containing an ID attribute is now separated from the document.
+Optionally the parser now checks whether the IDREF attributes refer to
+existing elements.
+ The element nodes can optionally store the location in the
+source XML code.
+ The method 'write' writes the XML tree in every supported
+encoding. (Successor of 'write_compact_as_latin1'.)
+ Several smaller changes and fixes.
+
+
+ Changed in 0.99.4:
+ The module Pxp_reader has been modernized. The resolver classes
+are simpler to use. There is now support for URLs.
+ The interface of Pxp_yacc has been improved: The type 'source'
+is now simpler. The type 'domspec' has gone; the new 'spec' is opaque and
+performs better. There are some new parsing modes.
+ Many smaller changes.
+
+
+ Changed in 0.99.3:
+ The markup_* modules have been renamed to pxp_*. There is a new
+compatibility API that tries to be compatible with markup-0.2.10.
+ The type "encoding" is now a polymorphic variant.
+
+
+ Changed in 0.99.2:
+ Added checks for the constraints about the standalone
+declaration.
+ Added regression tests about attribute normalization,
+attribute checks, standalone checks.
+ Fixed some minor errors of the attribute normalization
+function.
+ The bytecode/native archives are now separated in
+a general part, in a ISO-8859-1-relevant part, and a UTF-8-relevant
+part. The parser can again be compiled with ocamlopt.
+
+
+ Changed in 0.99.1:
+ In general, this release is an early pre-release of the
+next stable version 1.00. I do not recommend to use it for serious
+work; it is still very experimental!
+ The core of the parser has been rewritten using a self-written
+parser generator.
+ The lexer has been restructured, and can now handle UTF-8
+encoded files.
+ Numerous other changes.
+
+
+
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/doc/SPEC b/helm/DEVEL/pxp/pxp/doc/SPEC
new file mode 100644
index 000000000..28e6914ce
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/SPEC
@@ -0,0 +1,185 @@
+******************************************************************************
+Notes on the XML specification
+******************************************************************************
+
+
+==============================================================================
+This document
+==============================================================================
+
+There are some points in the XML specification which are ambiguous. The
+following notes discuss these points, and describe how this parser behaves.
+
+==============================================================================
+Conditional sections and the token ]]>
+==============================================================================
+
+It is unclear what happens if an ignored section contains the token ]]> at
+places where it is normally allowed, i.e. within string literals and comments,
+e.g.
+
+ --> ]]>
+
+On the one hand, the production rule of the XML grammar does not treat such
+tokens specially. Following the grammar, already the first ]]> ends the
+conditional section
+
+
+
+and the other tokens are included into the DTD.
+
+On the other hand, we can read: "Like the internal and external DTD subsets, a
+conditional section may contain one or more complete declarations, comments,
+processing instructions, or nested conditional sections, intermingled with
+white space" (XML 1.0 spec, section 3.4). Complete declarations and comments
+may contain ]]>, so this is contradictory to the grammar.
+
+The intention of conditional sections is to include or exclude the section
+depending on the current replacement text of a parameter entity. Almost always
+such sections are used as in
+
+ (or "IGNORE")
+
+
+This means that if it is possible to include a section it must also be legal to
+ignore the same section. This is a strong indication that the token ]]> must
+not count as section terminator if it occurs in a string literal or comment.
+
+This parser implements the latter.
+
+==============================================================================
+Conditional sections and the inclusion of parameter entities
+==============================================================================
+
+It is unclear what happens if an ignored section contains a reference to a
+parameter entity. In most cases, this is not problematic because nesting of
+parameter entities must respect declaration braces. The replacement text of
+parameter entities must either contain a whole number of declarations or only
+inner material of one declaration. Almost always it does not matter whether
+these references are resolved or not (the section is ignored).
+
+But there is one case which is not explicitly specified: Is it allowed that the
+replacement text of an entity contains the end marker ]]> of an ignored
+conditional section? Example:
+
+">
+ must be contained in
+the same entity as the corresponding of
+declarations). So it is possible to conclude that ]]> may be in another entity.
+
+Of course, there are many arguments not to allow such constructs: The resulting
+code is incomprehensive, and parsing takes longer (especially if the entities
+are external). I think the best argument against this kind of XML is that the
+XML spec is not detailed enough, as it contains no rules where entity
+references should be recognized and where not. For example:
+
+">
+">
+
+
+Which token ]]> counts? From a logical point of view, the ]]> in the third line
+ends the conditional section. As already pointed out, the XML spec permits the
+interpretation that ]]> is recognized even in string literals, and this may be
+also true if it is "imported" from a separate entity; and so the first ]]>
+denotes the end of the section.
+
+As a practical solution, this parser does not expand parameter entities in
+ignored sections. Furthermore, it is also not allowed that the ending ]]> of
+ignored or included sections is contained in a different entity than the
+starting
+%ext;
+%ent;
+
+"ext" contains:
+
+">
+
+
+
+Here, the reference %ent; would be illegal if the standalone declaration is
+strictly interpreted. This parser handles the references %ent; and %ext;
+equivalently which means that %ent; is allowed, but the element type "el" is
+treated as externally declared.
+
+General entities can occur within the DTD, but they can only be contained in
+the default value of attributes, or in the definition of other general
+entities. The latter can be ignored, because the check will be repeated when
+the entities are expanded. Though, general entities occuring in default
+attribute values are actually checked at the moment when the default is used in
+an element instance.
+
+General entities occuring in the document body are always checked.
+
+NDATA entities can occur in ENTITY attribute values; either in the element
+instance or in the default declaration. Both cases are checked.
+
diff --git a/helm/DEVEL/pxp/pxp/doc/SPEC.xml b/helm/DEVEL/pxp/pxp/doc/SPEC.xml
new file mode 100644
index 000000000..906f45a79
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/SPEC.xml
@@ -0,0 +1,226 @@
+
+
+%common;
+
+
+up'>
+
+
+%config;
+
+]>
+
+
+
+
+ This document
+ There are some points in the XML specification which are ambiguous.
+The following notes discuss these points, and describe how this parser
+behaves.
+
+
+
+ Conditional sections and the token ]]>
+
+ It is unclear what happens if an ignored section contains the
+token ]]> at places where it is normally allowed, i.e. within string
+literals and comments, e.g.
+
+
+<![IGNORE[ <!-- ]]> --> ]]>
+
+
+On the one hand, the production rule of the XML grammar does not treat such
+tokens specially. Following the grammar, already the first ]]> ends
+the conditional section
+
+
+<![IGNORE[ <!-- ]]>
+
+
+and the other tokens are included into the DTD.
+
+On the other hand, we can read: "Like the internal and external DTD subsets,
+a conditional section may contain one or more complete declarations, comments,
+processing instructions, or nested conditional sections, intermingled with
+white space" (XML 1.0 spec, section 3.4). Complete declarations and comments
+may contain ]]>, so this is contradictory to the grammar.
+
+The intention of conditional sections is to include or exclude the section
+depending on the current replacement text of a parameter entity. Almost
+always such sections are used as in
+
+
+<!ENTITY % want.a.feature.or.not "INCLUDE"> (or "IGNORE")
+<![ %want.a.feature.or.not; [ ... ]]>
+
+
+This means that if it is possible to include a section it must also be
+legal to ignore the same section. This is a strong indication that
+the token ]]> must not count as section terminator if it occurs
+in a string literal or comment.
+
+This parser implements the latter.
+
+
+
+
+ Conditional sections and the inclusion of parameter entities
+
+ It is unclear what happens if an ignored section contains a reference
+to a parameter entity. In most cases, this is not problematic because
+nesting of parameter entities must respect declaration braces. The
+replacement text of parameter entities must either contain a whole
+number of declarations or only inner material of one declaration. Almost always
+it does not matter whether these references are resolved or not
+(the section is ignored).
+
+ But there is one case which is not explicitly specified: Is it allowed
+that the replacement text of an entity contains the end marker ]]>
+of an ignored conditional section? Example:
+
+
+<!ENTITY % end "]]>">
+<![ IGNORE [ %end;
+
+
+We do not find the statement in the XML spec that the ]]> must be contained
+in the same entity as the corresponding <![ (as for the tokens <! and
+> of declarations). So it is possible to conclude that ]]> may be in
+another entity.
+
+ Of course, there are many arguments not to allow such constructs: The
+resulting code is incomprehensive, and parsing takes longer (especially if the
+entities are external). I think the best argument against this kind of XML
+is that the XML spec is not detailed enough, as it contains no rules where
+entity references should be recognized and where not. For example:
+
+
+<!ENTITY % y "]]>">
+<!ENTITY % x "<!ENTITY z '<![CDATA[some text%y;'>">
+<![ IGNORE [ %x; ]]>
+
+
+Which token ]]> counts? From a logical point of view, the ]]> in the
+third line ends the conditional section. As already pointed out, the XML spec
+permits the interpretation that ]]> is recognized even in string literals,
+and this may be also true if it is "imported" from a separate entity; and so
+the first ]]> denotes the end of the section.
+
+ As a practical solution, this parser does not expand parameter entities
+in ignored sections. Furthermore, it is also not allowed that the ending ]]>
+of ignored or included sections is contained in a different entity than the
+starting <![ token.
+
+
+
+
+ Standalone documents and attribute normalization
+
+
+If a document is declared as stand-alone, a restriction on the effect of
+attribute normalization takes effect for attributes declared in external
+entities. Normally, the parser knows the type of the attribute from
+the ATTLIST declaration, and it can normalize attribute values depending
+on their types. For example, an NMTOKEN attribute can be written with
+leading or trailing spaces, but the parser returns always the nmtoken
+without such added spaces; in contrast to this, a CDATA attribute is
+not normalized in this way. For stand-alone document the type information is
+not available if the ATTLIST declaration is located in an external
+entity. Because of this, the XML spec demands that attribute values must
+be written in their normal form in this case, i.e. without additional
+spaces.
+
+ This parser interprets this restriction as follows. Obviously,
+the substitution of character and entity references is not considered
+as a "change of the value" as a result of the normalization, because
+these operations will be performed identically if the ATTLIST declaration
+is not available. The same applies to the substitution of TABs, CRs,
+and LFs by space characters. Only the removal of spaces depending on
+the type of the attribute changes the value if the ATTLIST is not
+available.
+
+ This means in detail: CDATA attributes never violate the
+stand-alone status. ID, IDREF, NMTOKEN, ENTITY, NOTATION and enumerator
+attributes must not be written with leading and/or trailing spaces. IDREF,
+ENTITIES, and NMTOKENS attributes must not be written with extra spaces at the
+beginning or at the end of the value, or between the tokens of the list.
+
+ The whole check is dubious, because the attribute type expresses also a
+semantical constraint, not only a syntactical one. At least this parser
+distinguishes strictly between single-value and list types, and returns the
+attribute values differently; the first are represented as Value s (where s is
+a string), the latter are represented as Valuelist [s1; s2; ...; sN]. The
+internal representation of the value is dependent on the attribute type, too,
+such that even normalized values are processed differently depending on
+whether the attribute has list type or not. For this parser, it makes still a
+difference whether a value is normalized and processed as if it were CDATA, or
+whether the value is processed according to its declared type.
+
+ The stand-alone check is included to be able to make a statement
+whether other, well-formedness parsers can process the document. Of course,
+these parsers always process attributes as CDATA, and the stand-alone check
+guarantees that these parsers will always see the normalized values.
+
+
+
+
+ Standalone documents and the restrictions on entity
+references
+
+Stand-alone documents must not refer to entities which are declared in an
+external entity. This parser applies this rule only: to general and NDATA
+entities when they occur in the document body (i.e. not in the DTD); and to
+general and NDATA entities occuring in default attribute values declared in the
+internal subset of the DTD.
+
+
+Parameter entities are out of discussion for the stand-alone property. If there
+is a parameter entity reference in the internal subset which was declared in an
+external entity, it is not available in the same way as the external entity is
+not available that contains its declaration. Because of this "equivalence",
+parameter entity references are not checked on violations against the
+stand-alone declaration. It simply does not matter. - Illustration:
+
+
+
+Main document:
+
+
+%ext;
+%ent;
+]]>
+
+"ext" contains:
+
+ ">
+]]>
+
+
+ Here, the reference %ent; would be illegal if the standalone
+declaration is strictly interpreted. This parser handles the references
+%ent; and %ext; equivalently which means that %ent; is allowed, but the
+element type "el" is treated as externally declared.
+
+
+
+General entities can occur within the DTD, but they can only be contained in
+the default value of attributes, or in the definition of other general
+entities. The latter can be ignored, because the check will be repeated when
+the entities are expanded. Though, general entities occuring in default
+attribute values are actually checked at the moment when the default is
+used in an element instance.
+
+
+General entities occuring in the document body are always checked.
+
+NDATA entities can occur in ENTITY attribute values; either in the element
+instance or in the default declaration. Both cases are checked.
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/doc/design.txt b/helm/DEVEL/pxp/pxp/doc/design.txt
new file mode 100644
index 000000000..bf75d0618
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/design.txt
@@ -0,0 +1,340 @@
+------------------------------------------------ -*- indented-text -*-
+Some Notes About the Design:
+----------------------------------------------------------------------
+
+----------------------------------------------------------------------
+Compilation
+----------------------------------------------------------------------
+
+Compilation is non-trivial because:
+
+ - The lexer and parser generators ocamlllex resp. ocamlyacc normally
+ create code such that the parser module precedes the lexer module.
+ THIS design requires that the lexer layer precedes the entity layer
+ which precedes the parser layer, because the parsing results modify
+ the behaviour of the lexer and entity layers. There is no way to get
+ around this because of the nature of XML.
+
+ So the dependency relation of the lexer and the parser is modified;
+ in particular the "token" type that is normally defined by the
+ generated parser is moved to a common prdecessor of both lexer
+ and parser.
+
+ - Another modification of the standard way of handling parsers is that
+ the parser is turned into an object. This is necessary because the
+ whole parser is polymorphic, i.e. there is a type parameter (the
+ type of the node extension).
+
+......................................................................
+
+First some modules are generated as illustrated by the following
+diagram:
+
+
+ markup_yacc.mly
+ | |
+ \|/ \|/ [ocamlyacc, 1]
+ V V
+ markup_yacc.mli markup_yacc.ml
+ | --> renamed into markup_yacc.ml0
+ [awk, 2] \|/ |
+ V \|/ [sed, 3]
+ markup_yacc_token.mlf V
+ | | markup_yacc.ml
+ markup_lexer_types_ | |
+ shadow.mli | | | markup_lexer_types_
+ \|/ [sed, \|/ | shadow.ml
+ V 4] V | |
+ markup_lexer_types.mli | | [sed, 4]
+ \|/ \|/
+ V V
+ markup_lexer_types.ml
+
+
+ markup_yacc_shadow.mli
+ |
+ \|/ [replaces, 5]
+ V
+ markup_yacc.mli
+
+
+
+ markup_lexers.mll
+ |
+ \|/ [ocamllex, 6]
+ V
+ markup_lexers.ml
+
+
+Notes:
+
+ (1) ocamlyacc generates both a module and a module interface.
+ The module is postprocessed in step (3). The interface cannot
+ be used, but it contains the definition of the "token" type.
+ This definition is extracted in step (2). The interface is
+ completely replaced in step (5) by a different file.
+
+ (2) An "awk" script extracts the definition of the type "token".
+ "token" is created by ocamlyacc upon the %token directives
+ in markup_yacc.mly, and normally "token" is defined in
+ the module generated by ocamlyacc. This turned out not to be
+ useful as the module dependency must be that the lexer is
+ an antecedent of the parser and not vice versa (as usually),
+ so the "token" type is "moved" to the module Markup_lexer_types
+ which is an antecedent of both the lexer and the parser.
+
+ (3) A "sed" script turns the generated parser into an object.
+ This is rather simple; some "let" definitions must be rewritten
+ as "val" definitions, the other "let" definitions as
+ "method" definitions. The parser object is needed because
+ the whole parser has a polymorphic type parameter.
+
+ (4) The implementation and definition of Markup_lexer_types are
+ both generated by inserting the "token" type definition
+ (in markup_lexer_types.mlf) into two pattern files,
+ markup_lexer_types_shadow.ml resp. -.mli. The point of insertion
+ is marked by the string INCLUDE_HERE.
+
+ (5) The generated interface of the Markup_yacc module is replaced
+ by a hand-written file.
+
+ (6) ocamllex generates the lexer; this process is not patched in any
+ way.
+
+......................................................................
+
+After the additional modules have been generated, compilation proceeds
+in the usual manner.
+
+
+----------------------------------------------------------------------
+Hierarchy of parsing layers:
+----------------------------------------------------------------------
+
+From top to bottom:
+
+ - Parser: Markup_yacc
+ + gets input stream from the main entity object
+ + checks most of the grammar
+ + creates the DTD object as side-effect
+ + creates the element tree as side-effect
+ + creates further entity objects that are entered into the DTD
+ - Entity layer: Markup_entity
+ + gets input stream from the lexers, or another entity object
+ + handles entity references: if a reference is encountered the
+ input stream is redirected such that the tokens come from the
+ referenced entity object
+ + handles conditional sections
+ - Lexer layer: Markup_lexers
+ + gets input from lexbuffers created by resolvers
+ + different lexers for different lexical contexts
+ + a lexer returns pairs (token,lexid), where token is the scanned
+ token, and lexid is the name of the lexer that must be used for
+ the next token
+ - Resolver layer: Markup_entity
+ + a resolver creates the lexbuf from some character source
+ + a resolver recodes the input and handles the encoding scheme
+
+----------------------------------------------------------------------
+The YACC based parser
+----------------------------------------------------------------------
+
+ocamlyacc allows it to pass an arbitrary 'next_token' function to the
+parsing functions. We always use 'en # next_token()' where 'en' is the
+main entity object representing the main file to be parsed.
+
+The parser is not functional, but uses mainly side-effects to accumulate
+the structures that have been recognized. This is very important for the
+entity definitions, because once an entity definition has been found there
+may be a reference to it which is handled by the entity layer (which is
+below the yacc layer). This means that such a definition modifies the
+token source of the parser, and this can only be handled by side-effects
+(at least in a sensible manner; a purely functional parser would have to
+pass unresolved entity references to its caller, which would have to
+resolve the reference and to re-parse the whole document!).
+
+Note that also element definitions profit from the imperative style of
+the parser; an element instance can be validated directly once the end
+tag has been read in.
+
+----------------------------------------------------------------------
+The entity layer
+----------------------------------------------------------------------
+
+The parser gets the tokens from the main entity object. This object
+controls the underlying lexing mechanism (see below), and already
+interprets the following:
+
+- Conditional sections (if they are allowed in this entity):
+ The structures and are
+ recognized and interpreted.
+
+ This would be hard to realize by the yacc parser, because:
+ - INCLUDE and IGNORE are not recognized as lexical keywords but as names.
+ This means that the parser cannot select different rules for them.
+ - The text after IGNORE requires a different lexical handling.
+
+- Entity references: &name; and %name;
+ The named entity is looked up and the input source is redirected to it, i.e.
+ if the main entity object gets the message 'next_token' this message is
+ forwarded to the referenced entity. (This entity may choose to forward the
+ message again to a third entity, and so on.)
+
+ There are some fine points:
+
+ - It is okay that redirection happens at token level, not at character level:
+ + General entities must always match the 'content' production, and because
+ of this they must always consist of a whole number of tokens.
+ + If parameter entities are resolved, the XML specification states that
+ a space character is inserted before and after the replacement text.
+ This also means that such entities always consists of a whole number
+ of tokens.
+
+ - There are some "nesting constraints":
+ + General entities must match the 'content' production. Because of this,
+ the special token Begin_entity is inserted before the first token of
+ the entity, and End_entity is inserted just before the Eof token. The
+ brace Begin_entity...End_entity is recognized by the yacc parser, but
+ only in the 'content' production.
+ + External parameter entities must match 'extSubsetDecl'. Again,
+ Begin_entity and End_entity tokens embrace the inner token stream.
+ The brace Begin_entity...End_entity is recognized by the yacc parser
+ at the appropriate position.
+ (As general and parameter entities are used in different contexts
+ (document vs. DTD), both kinds of entities can use the same brace
+ Begin_entity...End_entity.)
+ + TODO:
+ The constraints for internal parameter entities are not yet checked.
+
+ - Recursive references can be detected because entities must be opened
+ before the 'next_token' method can be invoked.
+
+----------------------------------------------------------------------
+The lexer layer
+----------------------------------------------------------------------
+
+There are five main lexers, and a number of auxiliary lexers. The five
+main lexers are:
+
+- Document (function scan_document):
+ Scans an XML document outside the DTD and outside the element instance.
+
+- Content (function scan_content):
+ Scans an element instance, but not within tags.
+
+- Within_tag (function scan_within_tag):
+ Scans within <...>, i.e. a tag denoting an element instance.
+
+- Document_type (function scan_document_type):
+ Scans after .
+
+- Declaration (function scan_declaration):
+ Scans sequences of declarations
+
+Why several lexers? Because there are different lexical rules in these
+five regions of an XML document.
+
+Every lexer not only produces tokens, but also the name of the next lexer
+to use. For example, if the Document lexer scans "
+ ]>
+ ∅
+ - This is illegal, and the presence of an empty Begin_entity/End_entity pair
+ helps to recognize this.
diff --git a/helm/DEVEL/pxp/pxp/doc/dist-common.xml b/helm/DEVEL/pxp/pxp/doc/dist-common.xml
new file mode 100644
index 000000000..d18a1500f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/dist-common.xml
@@ -0,0 +1,123 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Gerd Stolpmann'>
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/Makefile b/helm/DEVEL/pxp/pxp/doc/manual/Makefile
new file mode 100644
index 000000000..5a3e1ffab
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/Makefile
@@ -0,0 +1,82 @@
+DOCBOOK_HTML = /usr/share/sgml/docbkdsl/html
+DOCBOOK_PRINT = /usr/share/sgml/docbkdsl/print
+SRC = $(PWD)/src
+
+.PHONY: html ps
+
+default: html ps
+
+html: html/book1.htm html/pic/done
+
+ps: ps/markup.ps ps/pic/done
+
+
+src/readme.ent: ../../examples/readme/to_html.ml
+ src/getcode.ml <../../examples/readme/to_html.ml >src/readme.ent
+
+src/yacc.mli.ent: ../../pxp_yacc.mli
+ src/getcode.ml <../../pxp_yacc.mli >src/yacc.mli.ent
+
+src/dtd.mli.ent: ../../pxp_dtd.mli
+ src/getcode.ml <../../pxp_dtd.mli >src/dtd.mli.ent
+
+html/book1.htm: src/*.sgml src/readme.ent src/yacc.mli.ent src/dtd.mli.ent
+ mkdir -p html
+ cp src/markup.css html; \
+ cd html; \
+ rm -f *.htm*; \
+ jade -t sgml -D$(DOCBOOK_HTML) -D$(SRC) -ihtml markup.sgml; \
+ true
+ touch html/TIMESTAMP
+
+html/pic/done: src/pic/*.fig
+ mkdir -p html/pic
+ l=`cd src/pic; echo *.fig`; \
+ for x in $$l; do fig2dev -L gif src/pic/$$x html/pic/`basename $$x .fig`.gif; done
+ touch html/pic/done
+
+#man: src/findlib_reference.xml
+# mkdir -p man
+# cd man; \
+# rm -f *.[0-9]; \
+# db2man <../src/findlib_reference.xml
+
+ps/markup.tex: src/*.sgml src/readme.ent src/yacc.mli.ent src/dtd.mli.ent
+ mkdir -p ps
+ cd ps; \
+ jade -t tex -D$(DOCBOOK_PRINT) -D$(SRC) markup.sgml; \
+ true
+
+ps/markup.dvi: ps/markup.tex ps/pic/done
+ cd ps; \
+ jadetex markup.tex; \
+ jadetex markup.tex; \
+ jadetex markup.tex
+
+ps/markup.ps: ps/markup.dvi
+ cd ps; \
+ dvips -f markup.ps
+
+ps/pic/done: src/pic/*.fig
+ mkdir -p ps/pic
+ l=`cd src/pic; echo *.fig`; \
+ for x in $$l; do fig2dev -L ps -m 0.8 src/pic/$$x ps/pic/`basename $$x .fig`.ps; done
+ touch ps/pic/done
+
+.SUFFIXES: .xml .sgml
+
+.sgml.xml:
+ sx -xndata $< >$@; true
+
+
+
+clean:
+ rm -rf html man ps
+ rm -f src/readme.ent
+
+CLEAN: clean
+
+distclean:
+ rm -f src/*~
+ rm -f *~
+ rm -f ps/*.aux ps/*.dvi ps/*.log ps/*.tex
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/TIMESTAMP b/helm/DEVEL/pxp/pxp/doc/manual/html/TIMESTAMP
new file mode 100644
index 000000000..e69de29bb
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/c1567.html b/helm/DEVEL/pxp/pxp/doc/manual/html/c1567.html
new file mode 100644
index 000000000..ab88e87bf
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/html/c1567.html
@@ -0,0 +1,434 @@
+Configuring and calling the parser There are the following main functions invoking the parser (in Pxp_yacc):
+
+
parse_document_entity: You want to
+parse a complete and closed document consisting of a DTD and the document body;
+the body is validated against the DTD. This mode is interesting if you have a
+file
+
+
<!DOCTYPE root ... [ ... ] > <root> ... </root>
+
+and you can accept any DTD that is included in the file (e.g. because the file
+is under your control).parse_wfdocument_entity: You want to
+parse a complete and closed document consisting of a DTD and the document body;
+but the body is not validated, only checked for well-formedness. This mode is
+preferred if validation costs too much time or if the DTD is missing.
parse_dtd_entity: You want only to
+parse an entity (file) containing the external subset of a DTD. Sometimes it is
+interesting to read such a DTD, for example to compare it with the DTD included
+in a document, or to apply the next mode:
parse_content_entity: You want only to
+parse an entity (file) containing a fragment of a document body; this fragment
+is validated against the DTD you pass to the function. Especially, the fragment
+must not have a <!DOCTYPE> clause, and must directly
+begin with an element. The element is validated against the DTD. This mode is
+interesting if you want to check documents against a fixed, immutable DTD.
parse_wfcontent_entity: This function
+also parses a single element without DTD, but does not validate it.
extract_dtd_from_document_entity: This
+function extracts the DTD from a closed document consisting of a DTD and a
+document body. Both the internal and the external subsets are extracted.
In many cases, parse_document_entity is the preferred mode
+to parse a document in a validating way, and
+parse_wfdocument_entity is the mode of choice to parse a
+file while only checking for well-formedness.
There are a number of variations of these modes. One important application of a
+parser is to check documents of an untrusted source against a fixed DTD. One
+solution is to not allow the <!DOCTYPE> clause in
+these documents, and treat the document like a fragment (using mode
+parse_content_entity ). This is very simple, but
+inflexible; users of such a system cannot even define additional entities to
+abbreviate frequent phrases of their text.
It may be necessary to have a more intelligent checker. For example, it is also
+possible to parse the document to check fully, i.e. with DTD, and to compare
+this DTD with the prescribed one. In order to fully parse the document, mode
+parse_document_entity is applied, and to get the DTD to
+compare with mode parse_dtd_entity can be used.
There is another very important configurable aspect of the parser: the
+so-called resolver. The task of the resolver is to locate the contents of an
+(external) entity for a given entity name, and to make the contents accessible
+as a character stream. (Furthermore, it also normalizes the character set;
+but this is a detail we can ignore here.) Consider you have a file called
+"main.xml" containing
+
+
<!ENTITY % sub SYSTEM "sub/sub.xml">
+%sub;
+
+and a file stored in the subdirectory
"sub" with name
+
"sub.xml" containing
+
+
<!ENTITY % subsub SYSTEM "subsub/subsub.xml">
+%subsub;
+
+and a file stored in the subdirectory
"subsub" of
+
"sub" with name
"subsub.xml" (the
+contents of this file do not matter). Here, the resolver must track that
+the second entity
subsub is located in the directory
+
"sub/subsub" , i.e. the difficulty is to interpret the
+system (file) names of entities relative to the entities containing them,
+even if the entities are deeply nested.
There is not a fixed resolver already doing everything right - resolving entity
+names is a task that highly depends on the environment. The XML specification
+only demands that SYSTEM entities are interpreted like URLs
+(which is not very precise, as there are lots of URL schemes in use), hoping
+that this helps overcoming the local peculiarities of the environment; the idea
+is that if you do not know your environment you can refer to other entities by
+denoting URLs for them. I think that this interpretation of
+SYSTEM names may have some applications in the internet, but
+it is not the first choice in general. Because of this, the resolver is a
+separate module of the parser that can be exchanged by another one if
+necessary; more precisely, the parser already defines several resolvers.
The following resolvers do already exist:
+
+
Resolvers reading from arbitrary input channels. These
+can be configured such that a certain ID is associated with the channel; in
+this case inner references to external entities can be resolved. There is also
+a special resolver that interprets SYSTEM IDs as URLs; this resolver can
+process relative SYSTEM names and determine the corresponding absolute URL.
A resolver that reads always from a given O'Caml
+string. This resolver is not able to resolve further names unless the string is
+not associated with any name, i.e. if the document contained in the string
+refers to an external entity, this reference cannot be followed in this
+case.
A resolver for file names. The SYSTEM
+name is interpreted as file URL with the slash "/" as separator for
+directories. - This resolver is derived from the generic URL resolver.
+
+The interface a resolver must have is documented, so it is possible to write
+your own resolver. For example, you could connect the parser with an HTTP
+client, and resolve URLs of the HTTP namespace. The resolver classes support
+that several independent resolvers are combined to one more powerful resolver;
+thus it is possible to combine a self-written resolver with the already
+existing resolvers.
Note that the existing resolvers only interpret SYSTEM
+names, not PUBLIC names. If it helps you, it is possible to
+define resolvers for PUBLIC names, too; for example, such a
+resolver could look up the public name in a hash table, and map it to a system
+name which is passed over to the existing resolver for system names. It is
+relatively simple to provide such a resolver.
Prev Home Next Details of the mapping from XML text to the tree representation Up Resolvers and sources
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/c36.html b/helm/DEVEL/pxp/pxp/doc/manual/html/c36.html
new file mode 100644
index 000000000..d74ecbbca
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/html/c36.html
@@ -0,0 +1,533 @@
+What is XML? XML (short for Extensible Markup Language )
+generalizes the idea that text documents are typically structured in sections,
+sub-sections, paragraphs, and so on. The format of the document is not fixed
+(as, for example, in HTML), but can be declared by a so-called DTD (document
+type definition). The DTD describes only the rules how the document can be
+structured, but not how the document can be processed. For example, if you want
+to publish a book that uses XML markup, you will need a processor that converts
+the XML file into a printable format such as Postscript. On the one hand, the
+structure of XML documents is configurable; on the other hand, there is no
+longer a canonical interpretation of the elements of the document; for example
+one XML DTD might want that paragraphes are delimited by
+para tags, and another DTD expects p tags
+for the same purpose. As a result, for every DTD a new processor is required.
Although XML can be used to express structured text documents it is not limited
+to this kind of application. For example, XML can also be used to exchange
+structured data over a network, or to simply store structured data in
+files. Note that XML documents cannot contain arbitrary binary data because
+some characters are forbidden; for some applications you need to encode binary
+data as text (e.g. the base 64 encoding).
The following example shows a very simple DTD, and a corresponding document
+instance. The document is structured such that it consists of sections, and
+that sections consist of paragraphs, and that paragraphs contain plain text:
<!ELEMENT document (section)+>
+<!ELEMENT section (paragraph)+>
+<!ELEMENT paragraph (#PCDATA)> The following document is an instance of this DTD:
<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE document SYSTEM "simple.dtd">
+<document>
+ <section>
+ <paragraph>This is a paragraph of the first section.</paragraph>
+ <paragraph>This is another paragraph of the first section.</paragraph>
+ </section>
+ <section>
+ <paragraph>This is the only paragraph of the second section.</paragraph>
+ </section>
+</document> As in HTML (and, of course, in grand-father SGML), the "pieces" of
+the document are delimited by element braces, i.e. such a piece begins with
+<name-of-the-type-of-the-piece> and ends with
+</name-of-the-type-of-the-piece> , and the pieces are
+called elements . Unlike HTML and SGML, both start tags and
+end tags (i.e. the delimiters written in angle brackets) can never be left
+out. For example, HTML calls the paragraphs simply p , and
+because paragraphs never contain paragraphs, a sequence of several paragraphs
+can be written as:
+
+
<p>First paragraph
+<p>Second paragraph
+
+This is not possible in XML; continuing our example above we must always write
+
+
<paragraph>First paragraph</paragraph>
+<paragraph>Second paragraph</paragraph>
+
+The rationale behind that is to (1) simplify the development of XML parsers
+(you need not convert the DTD into a deterministic finite automaton which is
+required to detect omitted tags), and to (2) make it possible to parse the
+document independent of whether the DTD is known or not.
The first line of our sample document,
+
+
<?xml version="1.0" encoding="ISO-8859-1"?>
+
+is the so-called
XML declaration . It expresses that the
+document follows the conventions of XML version 1.0, and that the document is
+encoded using characters from the ISO-8859-1 character set (often known as
+"Latin 1", mostly used in Western Europe). Although the XML declaration is not
+mandatory, it is good style to include it; everybody sees at the first glance
+that the document uses XML markup and not the similar-looking HTML and SGML
+markup languages. If you omit the XML declaration, the parser will assume
+that the document is encoded as UTF-8 or UTF-16 (there is a rule that makes
+it possible to distinguish between UTF-8 and UTF-16 automatically); these
+are encodings of Unicode's universal character set. (Note that
PXP , unlike its
+predecessor "Markup", fully supports Unicode.)
The second line,
+
+
<!DOCTYPE document SYSTEM "simple.dtd">
+
+names the DTD that is going to be used for the rest of the document. In
+general, it is possible that the DTD consists of two parts, the so-called
+external and the internal subset. "External" means that the DTD exists as a
+second file; "internal" means that the DTD is included in the same file. In
+this example, there is only an external subset, and the system identifier
+"simple.dtd" specifies where the DTD file can be found. System identifiers are
+interpreted as URLs; for instance this would be legal:
+
+
<!DOCTYPE document SYSTEM "http://host/location/simple.dtd">
+
+Please note that
PXP cannot interpret HTTP identifiers by default, but it is
+possible to change the interpretation of system identifiers.
The word immediately following DOCTYPE determines which of
+the declared element types (here "document", "section", and "paragraph") is
+used for the outermost element, the root element . In this
+example it is document because the outermost element is
+delimited by <document> and
+</document> .
The DTD consists of three declarations for element types:
+document , section , and
+paragraph . Such a declaration has two parts:
+
+
<!ELEMENT name content-model >
+
+The content model is a regular expression which describes the possible inner
+structure of the element. Here,
document contains one or
+more sections, and a
section contains one or more
+paragraphs. Note that these two element types are not allowed to contain
+arbitrary text. Only the
paragraph element type is declared
+such that parsed character data (indicated by the symbol
+
#PCDATA ) is permitted.
See below for a detailed discussion of content models.
XML documents are human-readable, but this is not the main purpose of this
+language. XML has been designed such that documents can be read by a program
+called an XML parser . The parser checks that the document
+is well-formatted, and it represents the document as objects of the programming
+language. There are two aspects when checking the document: First, the document
+must follow some basic syntactic rules, such as that tags are written in angle
+brackets, that for every start tag there must be a corresponding end tag and so
+on. A document respecting these rules is
+well-formed . Second, the document must match the DTD in
+which case the document is valid . Many parsers check only
+on well-formedness and ignore the DTD; PXP is designed such that it can
+even validate the document.
A parser does not make a sensible application, it only reads XML
+documents. The whole application working with XML-formatted data is called an
+XML processor . Often XML processors convert documents into
+another format, such as HTML or Postscript. Sometimes processors extract data
+of the documents and output the processed data again XML-formatted. The parser
+can help the application processing the document; for example it can provide
+means to access the document in a specific manner. PXP supports an
+object-oriented access layer specially.
As we have seen, there are two levels of description: On the one hand, XML can
+define rules about the format of a document (the DTD), on the other hand, XML
+expresses structured documents. There are a number of possible applications:
XML can be used to express structured texts. Unlike HTML, there is no canonical
+interpretation; one would have to write a backend for the DTD that translates
+the structured texts into a format that existing browsers, printers
+etc. understand. The advantage of a self-defined document format is that it is
+possible to design the format in a more problem-oriented way. For example, if
+the task is to extract reports from a database, one can use a DTD that reflects
+the structure of the report or the database. A possible approach would be to
+have an element type for every database table and for every column. Once the
+DTD has been designed, the report procedure can be splitted up in a part that
+selects the database rows and outputs them as an XML document according to the
+DTD, and in a part that translates the document into other formats. Of course,
+the latter part can be solved in a generic way, e.g. there may be configurable
+backends for all DTDs that follow the approach and have element types for
+tables and columns.
XML plays the role of a configurable intermediate format. The database
+extraction function can be written without having to know the details of
+typesetting; the backends can be written without having to know the details of
+the database.
Of course, there are traditional solutions. One can define an ad hoc
+intermediate text file format. This disadvantage is that there are no names for
+the pieces of the format, and that such formats usually lack of documentation
+because of this. Another solution would be to have a binary representation,
+either as language-dependent or language-independent structure (example of the
+latter can be found in RPC implementations). The disadvantage is that it is
+harder to view such representations, one has to write pretty printers for this
+purpose. It is also more difficult to enter test data; XML is plain text that
+can be written using an arbitrary editor (Emacs has even a good XML mode,
+PSGML). All these alternatives suffer from a missing structure checker,
+i.e. the programs processing these formats usually do not check the input file
+or input object in detail; XML parsers check the syntax of the input (the
+so-called well-formedness check), and the advanced parsers like PXP even
+verify that the structure matches the DTD (the so-called validation).
XML can be used as configurable communication language. A fundamental problem
+of every communication is that sender and receiver must follow the same
+conventions about the language. For data exchange, the question is usually
+which data records and fields are available, how they are syntactically
+composed, and which values are possible for the various fields. Similar
+questions arise for text document exchange. XML does not answer these problems
+completely, but it reduces the number of ambiguities for such conventions: The
+outlines of the syntax are specified by the DTD (but not necessarily the
+details), and XML introduces canonical names for the components of documents
+such that it is simpler to describe the rest of the syntax and the semantics
+informally.
XML is a data storage format. Currently, every software product tends to use
+its own way to store data; commercial software often does not describe such
+formats, and it is a pain to integrate such software into a bigger project.
+XML can help to improve this situation when several applications share the same
+syntax of data files. DTDs are then neutral instances that check the format of
+data files independent of applications.
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/c533.html b/helm/DEVEL/pxp/pxp/doc/manual/html/c533.html
new file mode 100644
index 000000000..c58e6ff3e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/html/c533.html
@@ -0,0 +1,234 @@
+Using PXP The parser can be used to validate a document. This means
+that all the constraints that must hold for a valid document are actually
+checked. Validation is the default mode of PXP , i.e. every document is
+validated while it is being parsed.
In the examples directory of the distribution you find the
+pxpvalidate application. It is invoked in the following way:
+
+
pxpvalidate [ -wf ] file ...
+
+The files mentioned on the command line are validated, and every warning and
+every error messages are printed to stderr.
The -wf switch modifies the behaviour such that a well-formedness parser is
+simulated. In this mode, the ELEMENT, ATTLIST, and NOTATION declarations of the
+DTD are ignored, and only the ENTITY declarations will take effect. This mode
+is intended for documents lacking a DTD. Please note that the parser still
+scans the DTD fully and will report all errors in the DTD; such checks are not
+required by a well-formedness parser.
The pxpvalidate application is the simplest sensible program
+using PXP , you may consider it as "hello world" program.
Prev Home Next A complete example: The readme DTD Up How to parse a document from an application
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/c893.html b/helm/DEVEL/pxp/pxp/doc/manual/html/c893.html
new file mode 100644
index 000000000..0e564fb20
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/html/c893.html
@@ -0,0 +1,349 @@
+The objects representing the document This description might be out-of-date. See the module interface files
+for updated information.
class [ 'ext ] document :
+ Pxp_types.collect_warnings ->
+ object
+ method init_xml_version : string -> unit
+ method init_root : 'ext node -> unit
+
+ method xml_version : string
+ method xml_standalone : bool
+ method dtd : dtd
+ method root : 'ext node
+
+ method encoding : Pxp_types.rep_encoding
+
+ method add_pinstr : proc_instruction -> unit
+ method pinstr : string -> proc_instruction list
+ method pinstr_names : string list
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+
+ end
+;;
+
+The methods beginning with
init_ are only for internal use
+of the parser.
xml_version : returns the version string at the beginning of
+the document. For example, "1.0" is returned if the document begins with
+<?xml version="1.0"?> .
xml_standalone : returns the boolean value of
+standalone declaration in the XML declaration. If the
+standalone attribute is missing, false is
+returned.
dtd : returns a reference to the global DTD object.
root : returns a reference to the root element.
encoding : returns the internal encoding of the
+document. This means that all strings of which the document consists are
+encoded in this character set.
pinstr : returns the processing instructions outside the DTD
+and outside the root element. The argument passed to the method names a
+target , and the method returns all instructions with this
+target. The target is the first word inside <? and
+?> .
pinstr_names : returns the names of the processing instructions
add_pinstr : adds another processing instruction. This method
+is used by the parser itself to enter the instructions returned by
+pinstr , but you can also enter additional instructions.
write : writes the document to the passed stream as XML
+text using the passed (external) encoding. The generated text is always valid
+XML and can be parsed by PXP; however, the text is badly formatted (this is not
+a pretty printer).
Prev Home Next Example: An HTML backend for the readme
+DTD Up The class type node
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/index.html b/helm/DEVEL/pxp/pxp/doc/manual/html/index.html
new file mode 100644
index 000000000..3c07ff28f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/html/index.html
@@ -0,0 +1,330 @@
+The PXP user's guide Gerd Stolpmann Copyright © 1999, 2000 by Gerd Stolpmann
PXP is a validating parser for XML-1.0 which has been
+written entirely in Objective Caml.
License
This document, and the described software, "PXP ", are copyright by
+Gerd Stolpmann.
Permission is hereby granted, free of charge, to any person obtaining
+a copy of this document and the "PXP " software (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
The Software is provided ``as is'', without warranty of any kind, express
+or implied, including but not limited to the warranties of
+merchantability, fitness for a particular purpose and noninfringement.
+In no event shall Gerd Stolpmann be liable for any claim, damages or
+other liability, whether in an action of contract, tort or otherwise,
+arising from, out of or in connection with the Software or the use or
+other dealings in the software.
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/markup.css b/helm/DEVEL/pxp/pxp/doc/manual/html/markup.css
new file mode 100644
index 000000000..67dfaecb7
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/html/markup.css
@@ -0,0 +1,4 @@
+.acronym {
+ font-weight: bold;
+ color: #c71585
+}
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/p34.html b/helm/DEVEL/pxp/pxp/doc/manual/html/p34.html
new file mode 100644
index 000000000..9db427d34
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/html/p34.html
@@ -0,0 +1,167 @@
+User's guide
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/pic/done b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/done
new file mode 100644
index 000000000..e69de29bb
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/pic/extension_general.gif b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/extension_general.gif
new file mode 100644
index 000000000..6cc260a4e
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/extension_general.gif differ
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_add.gif b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_add.gif
new file mode 100644
index 000000000..0091db2a2
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_add.gif differ
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_clone.gif b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_clone.gif
new file mode 100644
index 000000000..97cd3639e
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_clone.gif differ
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_delete.gif b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_delete.gif
new file mode 100644
index 000000000..d521123a7
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_delete.gif differ
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_general.gif b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_general.gif
new file mode 100644
index 000000000..5f6358cc3
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_general.gif differ
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_term.gif b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_term.gif
new file mode 100644
index 000000000..5644c91f3
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_term.gif differ
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x107.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x107.html
new file mode 100644
index 000000000..102aba218
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x107.html
@@ -0,0 +1,1694 @@
+Highlights of XML The PXP user's guide Prev Chapter 1. What is XML? Next
This section explains many of the features of XML, but not all, and some
+features not in detail. For a complete description, see the XML
+specification .
The DTD contains various declarations; in general you can only use a feature if
+you have previously declared it. The document instance file may contain the
+full DTD, but it is also possible to split the DTD into an internal and an
+external subset. A document must begin as follows if the full DTD is included:
+
+
<?xml version="1.0" encoding="Your encoding "?>
+<!DOCTYPE root [
+ Declarations
+]>
+
+These declarations are called the
internal subset . Note
+that the usage of entities and conditional sections is restricted within the
+internal subset.
If the declarations are located in a different file, you can refer to this file
+as follows:
+
+
<?xml version="1.0" encoding="Your encoding "?>
+<!DOCTYPE root SYSTEM "file name ">
+
+The declarations in the file are called the
external
+subset . The file name is called the
system
+identifier .
+It is also possible to refer to the file by a so-called
+
public identifier , but most XML applications won't use
+this feature.
You can also specify both internal and external subsets. In this case, the
+declarations of both subsets are mixed, and if there are conflicts, the
+declaration of the internal subset overrides those of the external subset with
+the same name. This looks as follows:
+
+
<?xml version="1.0" encoding="Your encoding "?>
+<!DOCTYPE root SYSTEM "file name " [
+ Declarations
+]> The XML declaration (the string beginning with <?xml and
+ending at ?> ) should specify the encoding of the
+file. Common values are UTF-8, and the ISO-8859 series of character sets. Note
+that every file parsed by the XML processor can begin with an XML declaration
+and that every file may have its own encoding.
The name of the root element must be mentioned directly after the
+DOCTYPE string. This means that a full document instance
+looks like
+
+
<?xml version="1.0" encoding="Your encoding "?>
+<!DOCTYPE root SYSTEM "file name " [
+ Declarations
+]>
+
+<root >
+ inner contents
+</root > Some characters are generally reserved to indicate markup such that they cannot
+be used for character data. These characters are <, >, and
+&. Furthermore, single and double quotes are sometimes reserved. If you
+want to include such a character as character, write it as follows:
+
+
< instead of <
> instead of >
& instead of &
' instead of '
" instead of "
+
+All other characters are free in the document instance. It is possible to
+include a character by its position in the Unicode alphabet:
+
+
&#n ;
+
+where
n is the decimal number of the
+character. Alternatively, you can specify the character by its hexadecimal
+number:
+
+
&#xn ;
+
+In the scope of declarations, the character % is no longer free. To include it
+as character, you must use the notations
% or
+
% .
Note that besides <, >, &,
+', and " there are no predefines character entities. This is
+different from HTML which defines a list of characters that can be referenced
+by name (e.g. ä for ä); however, if you prefer named characters, you
+can declare such entities yourself (see below).
Elements structure the document instance in a hierarchical way. There is a
+top-level element, the root element , which contains a
+sequence of inner elements and character sections. The inner elements are
+structured in the same way. Every element has an element
+type . The beginning of the element is indicated by a start
+tag , written
+
+
<element-type >
+
+and the element continues until the corresponding
end tag
+is reached:
+
+
</element-type >
+
+In XML, it is not allowed to omit start or end tags, even if the DTD would
+permit this. Note that there are no special rules how to interpret spaces or
+newlines near start or end tags; all spaces and newlines count.
Every element type must be declared before it can be used. The declaration
+consists of two parts: the ELEMENT declaration describes the content model,
+i.e. which inner elements are allowed; the ATTLIST declaration describes the
+attributes of the element.
An element can simply allow everything as content. This is written:
+
+
<!ELEMENT name ANY>
+
+On the opposite, an element can be forced to be empty; declared by:
+
+
<!ELEMENT name EMPTY>
+
+Note that there is an abbreviated notation for empty element instances:
+
<name /> .
There are two more sophisticated forms of declarations: so-called
+mixed declarations , and regular
+expressions . An element with mixed content contains character data
+interspersed with inner elements, and the set of allowed inner elements can be
+specified. In contrast to this, a regular expression declaration does not allow
+character data, but the inner elements can be described by the more powerful
+means of regular expressions.
A declaration for mixed content looks as follows:
+
+
<!ELEMENT name (#PCDATA | element1 | ... | elementn )*>
+
+or if you do not want to allow any inner element, simply
+
+
<!ELEMENT name (#PCDATA)> Example
If element type q is declared as
+
+
<!ELEMENT q (#PCDATA | r | s)*>
+
+this is a legal instance:
+
+<q>This is character data<r></r>with <s></s>inner elements</q>
+
+But this is illegal because t has not been enumerated in the
+declaration:
+
+<q>This is character data<r></r>with <t></t>inner elements</q> The other form uses a regular expression to describe the possible contents:
+
+
<!ELEMENT name regexp >
+
+The following well-known regexp operators are allowed:
+
+
element-name
(subexpr1 , ... , subexprn )
(subexpr1 | ... | subexprn )
subexpr *
subexpr +
subexpr ?
+
+The
, operator indicates a sequence of sub-models, the
+
| operator describes alternative sub-models. The
+
* indicates zero or more repetitions, and
+
+ one or more repetitions. Finally,
? can
+be used for optional sub-models. As atoms the regexp can contain names of
+elements; note that it is not allowed to include
#PCDATA .
The exact syntax of the regular expressions is rather strange. This can be
+explained best by a list of constraints:
+
+
The outermost expression must not be
+element-name .
Illegal:
+<!ELEMENT x y> ; this must be written as
+<!ELEMENT x (y)> .
For the unary operators subexpr * ,
+subexpr + , and
+subexpr ? , the
+subexpr must not be again an
+unary operator.
Illegal:
+<!ELEMENT x y**> ; this must be written as
+<!ELEMENT x (y*)*> .
Between ) and one of the unary operatory
+* , + , or ? , there must
+not be whitespace.
Illegal:
+<!ELEMENT x (y|z) *> ; this must be written as
+<!ELEMENT x (y|z)*> .
There is the additional constraint that the
+right parenthsis must be contained in the same entity as the left parenthesis;
+see the section about parsed entities below.
Note that there is another restriction on regular expressions which must be
+deterministic. This means that the parser must be able to see by looking at the
+next token which alternative is actually used, or whether the repetition
+stops. The reason for this is simply compatability with SGML (there is no
+intrinsic reason for this rule; XML can live without this restriction).
Example
The elements are declared as follows:
+
+
<!ELEMENT q (r?, (s | t)+)>
+<!ELEMENT r (#PCDATA)>
+<!ELEMENT s EMPTY>
+<!ELEMENT t (q | r)>
+
+This is a legal instance:
+
+<q><r>Some characters</r><s/></q>
+
+(Note: <s/> is an abbreviation for
+<s></s> .)
+
+It would be illegal to leave <s/> out because at
+least one instance of s or t must be
+present. It would be illegal, too, if characters existed outside the
+r element; the only exception is white space. -- This is
+legal, too:
+
+<q><s/><t><q><s/></q></t></q> Elements may have attributes. These are put into the start tag of an element as
+follows:
+
+
<element-name attribute1 ="value1 " ... attributen ="valuen ">
+
+Instead of
+
"valuek "
+it is also possible to use single quotes as in
+
'valuek ' .
+Note that you cannot use double quotes literally within the value of the
+attribute if double quotes are the delimiters; the same applies to single
+quotes. You can generally not use < and & as characters in attribute
+values. It is possible to include the paraphrases <, >,
+&, ', and " (and any other reference to a general
+entity as long as the entity is not defined by an external file) as well as
+&#
n ;.
Before you can use an attribute you must declare it. An ATTLIST declaration
+looks as follows:
+
+
<!ATTLIST element-name
+ attribute-name attribute-type attribute-default
+ ...
+ attribute-name attribute-type attribute-default
+>
+
+There are a lot of types, but most important are:
+
+
CDATA : Every string is allowed as attribute value.
NMTOKEN : Every nametoken is allowed as attribute
+value. Nametokens consist (mainly) of letters, digits, ., :, -, _ in arbitrary
+order.
NMTOKENS : A space-separated list of nametokens is allowed as
+attribute value.
+
+The most interesting default declarations are:
+
+
#REQUIRED : The attribute must be specified.
#IMPLIED : The attribute can be specified but also can be
+left out. The application can find out whether the attribute was present or
+not.
"value " or
+'value ' : This particular value is
+used as default if the attribute is omitted in the element.
Example
This is a valid attribute declaration for element type r :
+
+
<!ATTLIST r
+ x CDATA #REQUIRED
+ y NMTOKEN #IMPLIED
+ z NMTOKENS "one two three">
+
+This means that x is a required attribute that cannot be
+left out, while y and z are optional. The
+XML parser indicates the application whether y is present or
+not, but if z is missing the default value
+"one two three" is returned automatically. This is a valid example of these attributes:
+
+
<r x="He said: "I don't like quotes!"" y='1'> Elements describe the logical structure of the document, while
+entities determine the physical structure. Entities are
+the pieces of text the parser operates on, mostly files and macros. Entities
+may be parsed in which case the parser reads the text and
+interprets it as XML markup, or unparsed which simply
+means that the data of the entity has a foreign format (e.g. a GIF icon).
If the parsed entity is going to be used as part of the DTD, it
+is called a parameter entity . You can declare a parameter
+entity with a fixed text as content by:
+
+
<!ENTITY % name "value ">
+
+Within the DTD, you can
refer to this entity, i.e. read
+the text of the entity, by:
+
+
%name ;
+
+Such entities behave like macros, i.e. when they are referred to, the
+macro text is inserted and read instead of the original text.
+
+
Example
For example, you can declare two elements with the same content model by:
+
+
<!ENTITY % model "a | b | c">
+<!ELEMENT x (%model;)>
+<!ELEMENT y (%model;)>
+
+If the contents of the entity are given as string constant, the entity is
+called an
internal entity. It is also possible to name a
+file to be used as content (an
external entity):
+
+
<!ENTITY % name SYSTEM "file name ">
+
+There are some restrictions for parameter entities:
+
+
If the internal parameter entity contains the first token of a declaration
+(i.e. <! ), it must also contain the last token of the
+declaration, i.e. the > . This means that the entity
+either contains a whole number of complete declarations, or some text from the
+middle of one declaration.
Illegal:
+
<!ENTITY % e "(a | b | c)>">
+<!ELEMENT x %e; Because <! is contained in the main
+entity, and the corresponding > is contained in the
+entity e .If the internal parameter entity contains a left paranthesis, it must also
+contain the corresponding right paranthesis.
Illegal:
+
<!ENTITY % e "(a | b | c">
+<!ELEMENT x %e;)> Because ( is contained in the entity
+e , and the corresponding ) is
+contained in the main entity.When reading text from an entity, the parser automatically inserts one space
+character before the entity text and one space character after the entity
+text. However, this rule is not applied within the definition of another
+entity.
Legal:
+
+<!ENTITY % suffix "gif">
+<!ENTITY iconfile 'icon.%suffix;'> Because %suffix; is referenced within
+the definition text for iconfile , no additional spaces are
+added.Illegal:
+
<!ENTITY % suffix "test">
+<!ELEMENT x.%suffix; ANY>
+Because %suffix; is referenced outside the definition
+text of another entity, the parser replaces %suffix; by
+space testspace . Illegal:
+
<!ENTITY % e "(a | b | c)">
+<!ELEMENT x %e;*> Because there is a whitespace between )
+and * , which is illegal.An external parameter entity must always consist of a whole number of complete
+declarations.
In the internal subset of the DTD, a reference to a parameter entity (internal
+or external) is only allowed at positions where a new declaration can start.
If the parsed entity is going to be used in the document instance, it is called
+a general entity . Such entities can be used as
+abbreviations for frequent phrases, or to include external files. Internal
+general entities are declared as follows:
+
+
<!ENTITY name "value ">
+
+External general entities are declared this way:
+
+
<!ENTITY name SYSTEM "file name ">
+
+References to general entities are written as:
+
+
&name ;
+
+The main difference between parameter and general entities is that the former
+are only recognized in the DTD and that the latter are only recognized in the
+document instance. As the DTD is parsed before the document, the parameter
+entities are expanded first; for example it is possible to use the content of a
+parameter entity as the name of a general entity:
+
&%name;; [1] .
General entities must respect the element hierarchy. This means that there must
+be an end tag for every start tag in the entity value, and that end tags
+without corresponding start tags are not allowed.
Example
If the author of a document changes sometimes, it is worthwhile to set up a
+general entity containing the names of the authors. If the author changes, you
+need only to change the definition of the entity, and do not need to check all
+occurrences of authors' names:
+
+
<!ENTITY authors "Gerd Stolpmann">
+
+In the document text, you can now refer to the author names by writing
+&authors; .Illegal:
+The following two entities are illegal because the elements in the definition
+do not nest properly:
+
+
<!ENTITY lengthy-tag "<section textcolor='white' background='graphic'>">
+<!ENTITY nonsense "<a></b>"> Earlier in this introduction we explained that there are substitutes for
+reserved characters: <, >, &, ', and
+". These are simply predefined general entities; note that they are
+the only predefined entities. It is allowed to define these entities again
+as long as the meaning is unchanged.
Unparsed entities have a foreign format and can thus not be read by the XML
+parser. Unparsed entities are always external. The format of an unparsed entity
+must have been declared, such a format is called a
+notation . The entity can then be declared by referring to
+this notation. As unparsed entities do not contain XML text, it is not possible
+to include them directly into the document; you can only declare attributes
+such that names of unparsed entities are acceptable values.
As you can see, unparsed entities are too complicated in order to have any
+purpose. It is almost always better to simply pass the name of the data file as
+normal attribute value, and let the application recognize and process the
+foreign format.
Notes [1] This construct is only
+allowed within the definition of another entity; otherwise extra spaces would
+be added (as explained above). Such indirection is not recommended.
Complete example:
+
<!ENTITY % variant "a"> <!-- or "b" -->
+<!ENTITY text-a "This is text A.">
+<!ENTITY text-b "This is text B.">
+<!ENTITY text "&text-%variant;;">
+You can now write &text; in the document instance, and
+depending on the value of variant either
+text-a or text-b is inserted.
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x1439.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x1439.html
new file mode 100644
index 000000000..267730574
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x1439.html
@@ -0,0 +1,464 @@
+The class type extension The PXP user's guide Prev Chapter 3. The objects representing the document Next
class type [ 'node ] extension =
+ object ('self)
+ method clone : 'self
+ (* "clone" should return an exact deep copy of the object. *)
+ method node : 'node
+ (* "node" returns the corresponding node of this extension. This method
+ * intended to return exactly what previously has been set by "set_node".
+ *)
+ method set_node : 'node -> unit
+ (* "set_node" is invoked once the extension is associated to a new
+ * node object.
+ *)
+ end
+
+This is the type of classes used for node extensions. For every node of the
+document tree, there is not only the
node object, but also
+an
extension object. The latter has minimal
+functionality; it has only the necessary methods to be attached to the node
+object containing the details of the node instance. The extension object is
+called extension because its purpose is extensibility.
For some reasons, it is impossible to derive the
+node classes (i.e. element_impl and
+data_impl ) such that the subclasses can be extended by new
+new methods. But
+subclassing nodes is a great feature, because it allows the user to provide
+different classes for different types of nodes. The extension objects are a
+workaround that is as powerful as direct subclassing, the costs are
+some notation overhead.
Figure 3-6. The structure of nodes and extensions
The picture shows how the nodes and extensions are linked
+together. Every node has a reference to its extension, and every extension has
+a reference to its node. The methods extension and
+node follow these references; a typical phrase is
+
+
self # node # attribute "xy"
+
+to get the value of an attribute from a method defined in the extension object;
+or
+
+
self # node # iter
+ (fun n -> n # extension # my_method ...)
+
+to iterate over the subnodes and to call
my_method of the
+corresponding extension objects.
Note that extension objects do not have references to subnodes
+(or "subextensions") themselves; in order to get one of the children of an
+extension you must first go to the node object, then get the child node, and
+finally reach the extension that is logically the child of the extension you
+started with.
At minimum, you must define the methods
+clone , node , and
+set_node such that your class is compatible with the type
+extension . The method set_node is called
+during the initialization of the node, or after a node has been cloned; the
+node object invokes set_node on the extension object to tell
+it that this node is now the object the extension is linked to. The extension
+must return the node object passed as argument of set_node
+when the node method is called.
The clone method must return a copy of the
+extension object; at least the object itself must be duplicated, but if
+required, the copy should deeply duplicate all objects and values that are
+referred by the extension, too. Whether this is required, depends on the
+application; clone is invoked by the node object when one of
+its cloning methods is called.
A good starting point for an extension class:
+
+
class custom_extension =
+ object (self)
+
+ val mutable node = (None : custom_extension node option)
+
+ method clone = {< >}
+
+ method node =
+ match node with
+ None ->
+ assert false
+ | Some n -> n
+
+ method set_node n =
+ node <- Some n
+
+ end
+
+This class is compatible with
extension . The purpose of
+defining such a class is, of course, adding further methods; and you can do it
+without restriction.
Often, you want not only one extension class. In this case,
+it is the simplest way that all your classes (for one kind of document) have
+the same type (with respect to the interface; i.e. it does not matter if your
+classes differ in the defined private methods and instance variables, but
+public methods count). This approach avoids lots of coercions and problems with
+type incompatibilities. It is simple to implement:
+
+
class custom_extension =
+ object (self)
+ val mutable node = (None : custom_extension node option)
+
+ method clone = ... (* see above *)
+ method node = ... (* see above *)
+ method set_node n = ... (* see above *)
+
+ method virtual my_method1 : ...
+ method virtual my_method2 : ...
+ ... (* etc. *)
+ end
+
+class custom_extension_kind_A =
+ object (self)
+ inherit custom_extension
+
+ method my_method1 = ...
+ method my_method2 = ...
+ end
+
+class custom_extension_kind_B =
+ object (self)
+ inherit custom_extension
+
+ method my_method1 = ...
+ method my_method2 = ...
+ end
+
+If a class does not need a method (e.g. because it does not make sense, or it
+would violate some important condition), it is possible to define the method
+and to always raise an exception when the method is invoked
+(e.g.
assert false ).
The latter is a strong recommendation: do not try to further
+specialize the types of extension objects. It is difficult, sometimes even
+impossible, and almost never worth-while.
Once you have defined your extension classes, you can bind them
+to element types. The simplest case is that you have only one class and that
+this class is to be always used. The parsing functions in the module
+Pxp_yacc take a spec argument which
+can be customized. If your single class has the name c ,
+this argument should be
+
+
let spec =
+ make_spec_from_alist
+ ~data_exemplar: (new data_impl c)
+ ~default_element_exemplar: (new element_impl c)
+ ~element_alist: []
+ ()
+
+This means that data nodes will be created from the exemplar passed by
+~data_exemplar and that all element nodes will be made from the exemplar
+specified by ~default_element_exemplar. In ~element_alist, you can
+pass that different exemplars are to be used for different element types; but
+this is an optional feature. If you do not need it, pass the empty list.
Remember that an exemplar is a (node, extension) pair that serves as pattern
+when new nodes (and the corresponding extension objects) are added to the
+document tree. In this case, the exemplar contains c as
+extension, and when nodes are created, the exemplar is cloned, and cloning
+makes also a copy of c such that all nodes of the document
+tree will have a copy of c as extension.
The ~element_alist argument can bind
+specific element types to specific exemplars; as exemplars may be instances of
+different classes it is effectively possible to bind element types to
+classes. For example, if the element type "p" is implemented by class "c_p",
+and "q" is realized by "c_q", you can pass the following value:
+
+
let spec =
+ make_spec_from_alist
+ ~data_exemplar: (new data_impl c)
+ ~default_element_exemplar: (new element_impl c)
+ ~element_alist:
+ [ "p", new element_impl c_p;
+ "q", new element_impl c_q;
+ ]
+ ()
+
+The extension object
c is still used for all data nodes and
+for all other element types.
Prev Home Next The class type node Up Details of the mapping from XML text to the tree representation
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x1496.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x1496.html
new file mode 100644
index 000000000..faea39fc6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x1496.html
@@ -0,0 +1,442 @@
+Details of the mapping from XML text to the tree representation The PXP user's guide Prev Chapter 3. The objects representing the document Next
If an element declaration does not allow the element to
+contain character data, the following rules apply.
If the element must be empty, i.e. it is declared with the
+keyword EMPTY , the element instance must be effectively
+empty (it must not even contain whitespace characters). The parser guarantees
+that a declared EMPTY element does never contain a data
+node, even if the data node represents the empty string.
If the element declaration only permits other elements to occur
+within that element but not character data, it is still possible to insert
+whitespace characters between the subelements. The parser ignores these
+characters, too, and does not create data nodes for them.
Example. Consider the following element types:
+
+
<!ELEMENT x ( #PCDATA | z )* >
+<!ELEMENT y ( z )* >
+<!ELEMENT z EMPTY>
+
+Only
x may contain character data, the keyword
+
#PCDATA indicates this. The other types are character-free.
The XML term
+
+
<x><z/> <z/></x>
+
+will be internally represented by an element node for
x
+with three subnodes: the first
z element, a data node
+containing the space character, and the second
z element.
+In contrast to this, the term
+
+
<y><z/> <z/></y>
+
+is represented by an element node for
y with only
+
two subnodes, the two
z elements. There
+is no data node for the space character because spaces are ignored in the
+character-free element
y .
The XML specification allows all Unicode characters in XML
+texts. This parser can be configured such that UTF-8 is used to represent the
+characters internally; however, the default character encoding is
+ISO-8859-1. (Currently, no other encodings are possible for the internal string
+representation; the type Pxp_types.rep_encoding enumerates
+the possible encodings. Principially, the parser could use any encoding that is
+ASCII-compatible, but there are currently only lexical analyzers for UTF-8 and
+ISO-8859-1. It is currently impossible to use UTF-16 or UCS-4 as internal
+encodings (or other multibyte encodings which are not ASCII-compatible) unless
+major parts of the parser are rewritten - unlikely...)
The internal encoding may be different from the external encoding (specified
+in the XML declaration <?xml ... encoding="..."?> ); in
+this case the strings are automatically converted to the internal encoding.
If the internal encoding is ISO-8859-1, it is possible that there are
+characters that cannot be represented. In this case, the parser ignores such
+characters and prints a warning (to the collect_warning
+object that must be passed when the parser is called).
The XML specification allows lines to be separated by single LF
+characters, by CR LF character sequences, or by single CR
+characters. Internally, these separators are always converted to single LF
+characters.
The parser guarantees that there are never two adjacent data
+nodes; if necessary, data material that would otherwise be represented by
+several nodes is collapsed into one node. Note that you can still create node
+trees with adjacent data nodes; however, the parser does not return such trees.
Note that CDATA sections are not represented specially; such
+sections are added to the current data material that being collected for the
+next data node.
Entities are not represented within
+documents! If the parser finds an entity reference in the document
+content, the reference is immediately expanded, and the parser reads the
+expansion text instead of the reference.
As attribute
+values are composed of Unicode characters, too, the same problems with the
+character encoding arise as for character material. Attribute values are
+converted to the internal encoding, too; and if there are characters that
+cannot be represented, these are dropped, and a warning is printed.
Attribute values are normalized before they are returned by
+methods like attribute . First, any remaining entity
+references are expanded; if necessary, expansion is performed recursively.
+Second, newline characters (any of LF, CR LF, or CR characters) are converted
+to single space characters. Note that especially the latter action is
+prescribed by the XML standard (but is not converted
+such that it is still possible to include line feeds into attributes).
Processing instructions are parsed to some extent: The first word of the
+PI is called the target, and it is stored separated from the rest of the PI:
+
+
<?target rest?>
+
+The exact location where a PI occurs is not represented (by default). The
+parser puts the PI into the object that represents the embracing construct (an
+element, a DTD, or the whole document); that means you can find out which PIs
+occur in a certain element, in the DTD, or in the whole document, but you
+cannot lookup the exact position within the construct.
If you require the exact location of PIs, it is possible to
+create extra nodes for them. This mode is controled by the option
+enable_pinstr_nodes . The additional nodes have the node type
+T_pinstr target , and are created
+from special exemplars contained in the spec (see
+pxp_document.mli).
Normally, comments are not represented; they are dropped by
+default. However, if you require them, it is possible to create
+T_comment nodes for them. This mode can be specified by the
+option enable_comment_nodes . Comment nodes are created from
+special exemplars contained in the spec (see
+pxp_document.mli). You can access the contents of comments through the
+method comment .
These attributes are not supported specially; they are handled
+like any other attribute.
Currently, there is no special support for namespaces.
+However, the parser allows it that the colon occurs in names such that it is
+possible to implement namespaces on top of the current API.
Some future release of PXP will support namespaces as built-in
+feature...
Prev Home Next The class type extension Up Configuring and calling the parser
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x1629.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x1629.html
new file mode 100644
index 000000000..06b1e60ea
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x1629.html
@@ -0,0 +1,895 @@
+Resolvers and sources The PXP user's guide Prev Chapter 4. Configuring and calling the parser Next
The type source enumerates the two
+possibilities where the document to parse comes from.
+
+
type source =
+ Entity of ((dtd -> Pxp_entity.entity) * Pxp_reader.resolver)
+ | ExtID of (ext_id * Pxp_reader.resolver)
+
+You normally need not to worry about this type as there are convenience
+functions that create
source values:
+
+
+
from_file s : The document is read from
+file s ; you may specify absolute or relative path names.
+The file name must be encoded as UTF-8 string.
There is an optional argument ~system_encoding
+specifying the character encoding which is used for the names of the file
+system. For example, if this encoding is ISO-8859-1 and s is
+also a ISO-8859-1 string, you can form the source:
+
+
let s_utf8 = recode_string ~in_enc:`Enc_iso88591 ~out_enc:`Enc_utf8 s in
+from_file ~system_encoding:`Enc_iso88591 s_utf8 This source has the advantage that
+it is able to resolve inner external entities; i.e. if your document includes
+data from another file (using the SYSTEM attribute), this
+mode will find that file. However, this mode cannot resolve
+PUBLIC identifiers nor SYSTEM identifiers
+other than "file:".
from_channel ch : The document is read
+from the channel ch . In general, this source also supports
+file URLs found in the document; however, by default only absolute URLs are
+understood. It is possible to associate an ID with the channel such that the
+resolver knows how to interpret relative URLs:
+
+
from_channel ~id:(System "file:///dir/dir1/") ch
+
+There is also the ~system_encoding argument specifying how file names are
+encoded. - The example from above can also be written (but it is no
+longer possible to interpret relative URLs because there is no ~id argument,
+and computing this argument is relatively complicated because it must
+be a valid URL):
+
+let ch = open_in s in
+let src = from_channel ~system_encoding:`Enc_iso88591 ch in
+...;
+close_in ch from_string s : The string
+s is the document to parse. This mode is not able to
+interpret file names of SYSTEM clauses, nor it can look up
+PUBLIC identifiers.
Normally, the encoding of the string is detected as usual
+by analyzing the XML declaration, if any. However, it is also possible to
+specify the encoding directly:
+
+
let src = from_string ~fixenc:`ISO-8859-2 s ExtID (id, r) : The document to parse
+is denoted by the identifier id (either a
+SYSTEM or PUBLIC clause), and this
+identifier is interpreted by the resolver r . Use this mode
+if you have written your own resolver.
Which character sets are possible depends on the passed
+resolver r .
Entity (get_entity, r) : The document
+to parse is returned by the function invocation get_entity
+dtd , where dtd is the DTD object to use (it may be
+empty). Inner external references occuring in this entity are resolved using
+the resolver r .
Which character sets are possible depends on the passed
+resolver r .
A resolver is an object that can be opened like a file, but you
+do not pass the file name to the resolver, but the XML identifier of the entity
+to read from (either a SYSTEM or PUBLIC
+clause). When opened, the resolver must return the
+Lexing.lexbuf that reads the characters. The resolver can
+be closed, and it can be cloned. Furthermore, it is possible to tell the
+resolver which character set it should assume. - The following from Pxp_reader:
+
+
exception Not_competent
+exception Not_resolvable of exn
+
+class type resolver =
+ object
+ method init_rep_encoding : rep_encoding -> unit
+ method init_warner : collect_warnings -> unit
+ method rep_encoding : rep_encoding
+ method open_in : ext_id -> Lexing.lexbuf
+ method close_in : unit
+ method change_encoding : string -> unit
+ method clone : resolver
+ method close_all : unit
+ end
+
+The resolver object must work as follows:
When the parser is called, it tells the resolver the
+warner object and the internal encoding by invoking
+init_warner and init_rep_encoding . The
+resolver should store these values. The method rep_encoding
+should return the internal encoding.
If the parser wants to read from the resolver, it invokes
+the method open_in . Either the resolver succeeds, in which
+case the Lexing.lexbuf reading from the file or stream must
+be returned, or opening fails. In the latter case the method implementation
+should raise an exception (see below).
If the parser finishes reading, it calls the
+close_in method.
If the parser finds a reference to another external
+entity in the input stream, it calls clone to get a second
+resolver which must be initially closed (not yet connected with an input
+stream). The parser then invokes open_in and the other
+methods as described.
If you already know the character set of the input
+stream, you should recode it to the internal encoding, and define the method
+change_encoding as an empty method.
If you want to support multiple external character sets,
+the object must follow a much more complicated protocol. Directly after
+open_in has been called, the resolver must return a lexical
+buffer that only reads one byte at a time. This is only possible if you create
+the lexical buffer with Lexing.from_function ; the function
+must then always return 1 if the EOF is not yet reached, and 0 if EOF is
+reached. If the parser has read the first line of the document, it will invoke
+change_encoding to tell the resolver which character set to
+assume. From this moment, the object can return more than one byte at once. The
+argument of change_encoding is either the parameter of the
+"encoding" attribute of the XML declaration, or the empty string if there is
+not any XML declaration or if the declaration does not contain an encoding
+attribute.
At the beginning the resolver must only return one
+character every time something is read from the lexical buffer. The reason for
+this is that you otherwise would not exactly know at which position in the
+input stream the character set changes.
If you want automatic recognition of the character set,
+it is up to the resolver object to implement this.
If an error occurs, the parser calls the method
+close_all for the top-level resolver; this method should
+close itself (if not already done) and all clones.
Exceptions. It is possible to chain resolvers such that when the first resolver is not able
+to open the entity, the other resolvers of the chain are tried in turn. The
+method open_in should raise the exception
+Not_competent to indicate that the next resolver should try
+to open the entity. If the resolver is able to handle the ID, but some other
+error occurs, the exception Not_resolvable should be raised
+to force that the chain breaks.
+
Example: How to define a resolver that is equivalent to
+from_string: ...
There are some classes in Pxp_reader that define common resolver behaviour.
+
+
class resolve_read_this_channel :
+ ?id:ext_id ->
+ ?fixenc:encoding ->
+ ?auto_close:bool ->
+ in_channel ->
+ resolver
+
+Reads from the passed channel (it may be even a pipe). If the
+
~id argument is passed to the object, the created resolver
+accepts only this ID. Otherwise all IDs are accepted. - Once the resolver has
+been cloned, it does not accept any ID. This means that this resolver cannot
+handle inner references to external entities. Note that you can combine this
+resolver with another resolver that can handle inner references (such as
+resolve_as_file); see class 'combine' below. - If you pass the
+
~fixenc argument, the encoding of the channel is set to the
+passed value, regardless of any auto-recognition or any XML declaration. - If
+
~auto_close = true (which is the default), the channel is
+closed after use. If
~auto_close = false , the channel is
+left open.
+
class resolve_read_any_channel :
+ ?auto_close:bool ->
+ channel_of_id:(ext_id -> (in_channel * encoding option)) ->
+ resolver
+
+This resolver calls the function
~channel_of_id to open a
+new channel for the passed
ext_id . This function must either
+return the channel and the encoding, or it must fail with Not_competent. The
+function must return
None as encoding if the default
+mechanism to recognize the encoding should be used. It must return
+
Some e if it is already known that the encoding of the
+channel is
e . If
~auto_close = true
+(which is the default), the channel is closed after use. If
+
~auto_close = false , the channel is left open.
class resolve_read_url_channel :
+ ?base_url:Neturl.url ->
+ ?auto_close:bool ->
+ url_of_id:(ext_id -> Neturl.url) ->
+ channel_of_url:(Neturl.url -> (in_channel * encoding option)) ->
+ resolver
+
+When this resolver gets an ID to read from, it calls the function
+
~url_of_id to get the corresponding URL. This URL may be a
+relative URL; however, a URL scheme must be used which contains a path. The
+resolver converts the URL to an absolute URL if necessary. The second
+function,
~channel_of_url , is fed with the absolute URL as
+input. This function opens the resource to read from, and returns the channel
+and the encoding of the resource.
Both functions, ~url_of_id and
+~channel_of_url , can raise Not_competent to indicate that
+the object is not able to read from the specified resource. However, there is a
+difference: A Not_competent from ~url_of_id is left as it
+is, but a Not_competent from ~channel_of_url is converted to
+Not_resolvable. So only ~url_of_id decides which URLs are
+accepted by the resolver and which not.
The function ~channel_of_url must return
+None as encoding if the default mechanism to recognize the
+encoding should be used. It must return Some e if it is
+already known that the encoding of the channel is e .
If ~auto_close = true (which is the default), the channel is
+closed after use. If ~auto_close = false , the channel is
+left open.
Objects of this class contain a base URL relative to which relative URLs are
+interpreted. When creating a new object, you can specify the base URL by
+passing it as ~base_url argument. When an existing object is
+cloned, the base URL of the clone is the URL of the original object. - Note
+that the term "base URL" has a strict definition in RFC 1808.
class resolve_read_this_string :
+ ?id:ext_id ->
+ ?fixenc:encoding ->
+ string ->
+ resolver
+
+Reads from the passed string. If the
~id argument is passed
+to the object, the created resolver accepts only this ID. Otherwise all IDs are
+accepted. - Once the resolver has been cloned, it does not accept any ID. This
+means that this resolver cannot handle inner references to external
+entities. Note that you can combine this resolver with another resolver that
+can handle inner references (such as resolve_as_file); see class 'combine'
+below. - If you pass the
~fixenc argument, the encoding of
+the string is set to the passed value, regardless of any auto-recognition or
+any XML declaration.
class resolve_read_any_string :
+ string_of_id:(ext_id -> (string * encoding option)) ->
+ resolver
+
+This resolver calls the function
~string_of_id to get the
+string for the passed
ext_id . This function must either
+return the string and the encoding, or it must fail with Not_competent. The
+function must return
None as encoding if the default
+mechanism to recognize the encoding should be used. It must return
+
Some e if it is already known that the encoding of the
+string is
e .
class resolve_as_file :
+ ?file_prefix:[ `Not_recognized | `Allowed | `Required ] ->
+ ?host_prefix:[ `Not_recognized | `Allowed | `Required ] ->
+ ?system_encoding:encoding ->
+ ?url_of_id:(ext_id -> Neturl.url) ->
+ ?channel_of_url: (Neturl.url -> (in_channel * encoding option)) ->
+ unit ->
+ resolver
+Reads from the local file system. Every file name is interpreted as
+file name of the local file system, and the referred file is read.
The full form of a file URL is: file://host/path, where
+'host' specifies the host system where the file identified 'path'
+resides. host = "" or host = "localhost" are accepted; other values
+will raise Not_competent. The standard for file URLs is
+defined in RFC 1738.
Option ~file_prefix : Specifies how the "file:" prefix of
+file names is handled:
+
`Not_recognized: The prefix is not
+recognized.
`Allowed: The prefix is allowed but
+not required (the default).
`Required: The prefix is
+required.
Option ~host_prefix: Specifies how the "//host" phrase of
+file names is handled:
+
`Not_recognized: The prefix is not
+recognized.
`Allowed: The prefix is allowed but
+not required (the default).
`Required: The prefix is
+required.
Option ~system_encoding: Specifies the encoding of file
+names of the local file system. Default: UTF-8.
Options ~url_of_id , ~channel_of_url : Not
+for the casual user!
class combine :
+ ?prefer:resolver ->
+ resolver list ->
+ resolver
+
+Combines several resolver objects. If a concrete entity with an
+
ext_id is to be opened, the combined resolver tries the
+contained resolvers in turn until a resolver accepts opening the entity
+(i.e. it does not raise Not_competent on open_in).
Clones: If the 'clone' method is invoked before 'open_in', all contained
+resolvers are cloned separately and again combined. If the 'clone' method is
+invoked after 'open_in' (i.e. while the resolver is open), additionally the
+clone of the active resolver is flagged as being preferred, i.e. it is tried
+first.
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x1812.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x1812.html
new file mode 100644
index 000000000..34f09c208
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x1812.html
@@ -0,0 +1,517 @@
+The DTD classes The PXP user's guide Prev Chapter 4. Configuring and calling the parser Next
Sorry, not yet
+written. Perhaps the interface definition of Pxp_dtd expresses the same:
(**********************************************************************)
+(* *)
+(* Pxp_dtd: *)
+(* Object model of document type declarations *)
+(* *)
+(**********************************************************************)
+
+(* ======================================================================
+ * OVERVIEW
+ *
+ * class dtd ............... represents the whole DTD, including element
+ * declarations, entity declarations, notation
+ * declarations, and processing instructions
+ * class dtd_element ....... represents an element declaration consisting
+ * of a content model and an attribute list
+ * declaration
+ * class dtd_notation ...... represents a notation declaration
+ * class proc_instruction .. represents a processing instruction
+ * ======================================================================
+ *
+ *)
+
+
+class dtd :
+ (* Creation:
+ * new dtd
+ * creates a new, empty DTD object without any declaration, without a root
+ * element, without an ID.
+ *)
+ Pxp_types.collect_warnings ->
+ Pxp_types.rep_encoding ->
+ object
+ method root : string option
+ (* get the name of the root element if present *)
+
+ method set_root : string -> unit
+ (* set the name of the root element. This method can be invoked
+ * only once
+ *)
+
+ method id : Pxp_types.dtd_id option
+ (* get the identifier for this DTD *)
+
+ method set_id : Pxp_types.dtd_id -> unit
+ (* set the identifier. This method can be invoked only once *)
+
+ method encoding : Pxp_types.rep_encoding
+ (* returns the encoding used for character representation *)
+
+
+ method allow_arbitrary : unit
+ (* After this method has been invoked, the object changes its behaviour:
+ * - elements and notations that have not been added may be used in an
+ * arbitrary way; the methods "element" and "notation" indicate this
+ * by raising Undeclared instead of Validation_error.
+ *)
+
+ method disallow_arbitrary : unit
+
+ method arbitrary_allowed : bool
+ (* Returns whether arbitrary contents are allowed or not. *)
+
+ method standalone_declaration : bool
+ (* Whether there is a 'standalone' declaration or not. Strictly
+ * speaking, this declaration is not part of the DTD, but it is
+ * included here because of practical reasons.
+ * If not set, this property defaults to 'false'.
+ *)
+
+ method set_standalone_declaration : bool -> unit
+ (* Sets the 'standalone' declaration. *)
+
+
+ method add_element : dtd_element -> unit
+ (* add the given element declaration to this DTD. Raises Not_found
+ * if there is already an element declaration with the same name.
+ *)
+
+ method add_gen_entity : Pxp_entity.entity -> bool -> unit
+ (* add_gen_entity e extdecl:
+ * add the entity 'e' as general entity to this DTD (general entities
+ * are those represented by &name;). If there is already a declaration
+ * with the same name, the second definition is ignored; as exception from
+ * this rule, entities with names "lt", "gt", "amp", "quot", and "apos"
+ * may only be redeclared with a definition that is equivalent to the
+ * standard definition; otherwise a Validation_error is raised.
+ *
+ * 'extdecl': 'true' indicates that the entity declaration occurs in
+ * an external entity. (Used for the standalone check.)
+ *)
+
+ method add_par_entity : Pxp_entity.entity -> unit
+ (* add the given entity as parameter entity to this DTD (parameter
+ * entities are those represented by %name;). If there is already a
+ * declaration with the same name, the second definition is ignored.
+ *)
+
+ method add_notation : dtd_notation -> unit
+ (* add the given notation to this DTD. If there is already a declaration
+ * with the same name, a Validation_error is raised.
+ *)
+
+ method add_pinstr : proc_instruction -> unit
+ (* add the given processing instruction to this DTD. *)
+
+ method element : string -> dtd_element
+ (* looks up the element declaration with the given name. Raises
+ * Validation_error if the element cannot be found. (If "allow_arbitrary"
+ * has been invoked before, Unrestricted is raised instead.)
+ *)
+
+ method element_names : string list
+ (* returns the list of the names of all element declarations. *)
+
+ method gen_entity : string -> (Pxp_entity.entity * bool)
+ (* let e, extdecl = obj # gen_entity n:
+ * looks up the general entity 'e' with the name 'n'. Raises
+ * WF_error if the entity cannot be found.
+ * 'extdecl': indicates whether the entity declaration occured in an
+ * external entity.
+ *)
+
+ method gen_entity_names : string list
+ (* returns the list of all general entity names *)
+
+ method par_entity : string -> Pxp_entity.entity
+ (* looks up the parameter entity with the given name. Raises
+ * WF_error if the entity cannot be found.
+ *)
+
+ method par_entity_names : string list
+ (* returns the list of all parameter entity names *)
+
+ method notation : string -> dtd_notation
+ (* looks up the notation declaration with the given name. Raises
+ * Validation_error if the notation cannot be found. (If "allow_arbitrary"
+ * has been invoked before, Unrestricted is raised instead.)
+ *)
+
+ method notation_names : string list
+ (* Returns the list of the names of all added notations *)
+
+ method pinstr : string -> proc_instruction list
+ (* looks up all processing instructions with the given target.
+ * The "target" is the identifier following "<?".
+ * Note: It is not possible to find out the exact position of the
+ * processing instruction.
+ *)
+
+ method pinstr_names : string list
+ (* Returns the list of the names (targets) of all added pinstrs *)
+
+ method validate : unit
+ (* ensures that the DTD is valid. This method is optimized such that
+ * actual validation is only performed if DTD has changed.
+ * If the DTD is invalid, mostly a Validation_error is raised,
+ * but other exceptions are possible, too.
+ *)
+
+ method only_deterministic_models : unit
+ (* Succeeds if all regexp content models are deterministic.
+ * Otherwise Validation_error.
+ *)
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> bool -> unit
+ (* write_compact_as_latin1 os enc doctype:
+ * Writes the DTD as 'enc'-encoded string to 'os'. If 'doctype', a
+ * DTD like <!DOCTYPE root [ ... ]> is written. If 'not doctype',
+ * only the declarations are written (the material within the
+ * square brackets).
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> bool -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+
+
+ (*----------------------------------------*)
+ method invalidate : unit
+ (* INTERNAL METHOD *)
+ method warner : Pxp_types.collect_warnings
+ (* INTERNAL METHOD *)
+ end
+
+
+
+(* ---------------------------------------------------------------------- *)
+
+and dtd_element : dtd -> string ->
+ (* Creation:
+ * new dtd_element init_dtd init_name:
+ * creates a new dtd_element object for init_dtd with init_name.
+ * The strings are represented in the same encoding as init_dtd.
+ *)
+ object
+
+ method name : string
+ (* returns the name of the declared element *)
+
+ method externally_declared : bool
+ (* returns whether the element declaration occurs in an external
+ * entity.
+ *)
+
+ method content_model : Pxp_types.content_model_type
+ (* get the content model of this element declaration, or Unspecified *)
+
+ method content_dfa : Pxp_dfa.dfa_definition option
+ (* return the DFA of the content model if there is a DFA, or None.
+ * A DFA exists only for regexp style content models which are
+ * deterministic.
+ *)
+
+ method set_cm_and_extdecl : Pxp_types.content_model_type -> bool -> unit
+ (* set_cm_and_extdecl cm extdecl:
+ * set the content model to 'cm'. Once the content model is not
+ * Unspecified, it cannot be set to a different value again.
+ * Furthermore, it is set whether the element occurs in an external
+ * entity ('extdecl').
+ *)
+
+ method encoding : Pxp_types.rep_encoding
+ (* Return the encoding of the strings *)
+
+ method allow_arbitrary : unit
+ (* After this method has been invoked, the object changes its behaviour:
+ * - attributes that have not been added may be used in an
+ * arbitrary way; the method "attribute" indicates this
+ * by raising Undeclared instead of Validation_error.
+ *)
+
+ method disallow_arbitrary : unit
+
+ method arbitrary_allowed : bool
+ (* Returns whether arbitrary attributes are allowed or not. *)
+
+ method attribute : string ->
+ Pxp_types.att_type * Pxp_types.att_default
+ (* get the type and default value of a declared attribute, or raise
+ * Validation_error if the attribute does not exist.
+ * If 'arbitrary_allowed', the exception Undeclared is raised instead
+ * of Validation_error.
+ *)
+
+ method attribute_violates_standalone_declaration :
+ string -> string option -> bool
+ (* attribute_violates_standalone_declaration name v:
+ * Checks whether the attribute 'name' violates the "standalone"
+ * declaration if it has value 'v'.
+ * The method returns true if:
+ * - The attribute declaration occurs in an external entity,
+ * and if one of the two conditions holds:
+ * - v = None, and there is a default for the attribute value
+ * - v = Some s, and the type of the attribute is not CDATA,
+ * and s changes if normalized according to the rules of the
+ * attribute type.
+ *
+ * The method raises Validation_error if the attribute does not exist.
+ * If 'arbitrary_allowed', the exception Undeclared is raised instead
+ * of Validation_error.
+ *)
+
+ method attribute_names : string list
+ (* get the list of all declared attributes *)
+
+ method names_of_required_attributes : string list
+ (* get the list of all attributes that are specified as required
+ * attributes
+ *)
+
+ method id_attribute_name : string option
+ (* Returns the name of the attribute with type ID, or None. *)
+
+ method idref_attribute_names : string list
+ (* Returns the names of the attributes with type IDREF or IDREFS. *)
+
+ method add_attribute : string ->
+ Pxp_types.att_type ->
+ Pxp_types.att_default ->
+ bool ->
+ unit
+ (* add_attribute name type default extdecl:
+ * add an attribute declaration for an attribute with the given name,
+ * type, and default value. If there is more than one declaration for
+ * an attribute name, the first declaration counts; the other declarations
+ * are ignored.
+ * 'extdecl': if true, the attribute declaration occurs in an external
+ * entity. This property is used to check the "standalone" attribute.
+ *)
+
+ method validate : unit
+ (* checks whether this element declaration (i.e. the content model and
+ * all attribute declarations) is valid for the associated DTD.
+ * Raises mostly Validation_error if the validation fails.
+ *)
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+ (* write_compact_as_latin1 os enc:
+ * Writes the <!ELEMENT ... > declaration to 'os' as 'enc'-encoded string.
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+ end
+
+(* ---------------------------------------------------------------------- *)
+
+and dtd_notation : string -> Pxp_types.ext_id -> Pxp_types.rep_encoding ->
+ (* Creation:
+ * new dtd_notation a_name an_external_ID init_encoding
+ * creates a new dtd_notation object with the given name and the given
+ * external ID.
+ *)
+ object
+ method name : string
+ method ext_id : Pxp_types.ext_id
+ method encoding : Pxp_types.rep_encoding
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+ (* write_compact_as_latin1 os enc:
+ * Writes the <!NOTATION ... > declaration to 'os' as 'enc'-encoded
+ * string.
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+
+ end
+
+(* ---------------------------------------------------------------------- *)
+
+and proc_instruction : string -> string -> Pxp_types.rep_encoding ->
+ (* Creation:
+ * new proc_instruction a_target a_value
+ * creates a new proc_instruction object with the given target string and
+ * the given value string.
+ * Note: A processing instruction is written as <?target value?>.
+ *)
+ object
+ method target : string
+ method value : string
+ method encoding : Pxp_types.rep_encoding
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+ (* write os enc:
+ * Writes the <?...?> PI to 'os' as 'enc'-encoded string.
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+
+ method parse_pxp_option : (string * string * (string * string) list)
+ (* Parses a PI containing a PXP option. Such PIs are formed like:
+ * <?target option-name option-att="value" option-att="value" ... ?>
+ * The method returns a triple
+ * (target, option-name, [option-att, value; ...])
+ * or raises Error.
+ *)
+
+ end
+
+;;
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x1818.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x1818.html
new file mode 100644
index 000000000..b289a3674
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x1818.html
@@ -0,0 +1,779 @@
+Invoking the parser The PXP user's guide Prev Chapter 4. Configuring and calling the parser Next
Here a description of Pxp_yacc.
The following defaults are available:
+
+
val default_config : config
+val default_extension : ('a node extension) as 'a
+val default_spec : ('a node extension as 'a) spec In the following, the term "closed document" refers to
+an XML structure like
+
+
<!DOCTYPE ... [ declarations ] >
+<root >
+...
+</root >
+
+The term "fragment" refers to an XML structure like
+
+
<root >
+...
+</root >
+
+i.e. only to one isolated element instance.
val parse_dtd_entity : config -> source -> dtd
+
+Parses the declarations which are contained in the entity, and returns them as
+
dtd object.
val extract_dtd_from_document_entity : config -> source -> dtd
+
+Extracts the DTD from a closed document. Both the internal and the external
+subsets are extracted and combined to one
dtd object. This
+function does not parse the whole document, but only the parts that are
+necessary to extract the DTD.
val parse_document_entity :
+ ?transform_dtd:(dtd -> dtd) ->
+ ?id_index:('ext index) ->
+ config ->
+ source ->
+ 'ext spec ->
+ 'ext document
+
+Parses a closed document and validates it against the DTD that is contained in
+the document (internal and external subsets). The option
+
~transform_dtd can be used to transform the DTD in the
+document, and to use the transformed DTD for validation. If
+
~id_index is specified, an index of all ID attributes is
+created.
val parse_wfdocument_entity :
+ config ->
+ source ->
+ 'ext spec ->
+ 'ext document
+
+Parses a closed document, but checks it only on well-formedness.
val parse_content_entity :
+ ?id_index:('ext index) ->
+ config ->
+ source ->
+ dtd ->
+ 'ext spec ->
+ 'ext node
+
+Parses a fragment, and validates the element.
val parse_wfcontent_entity :
+ config ->
+ source ->
+ 'ext spec ->
+ 'ext node
+
+Parses a fragment, but checks it only on well-formedness.
type config =
+ { warner : collect_warnings;
+ errors_with_line_numbers : bool;
+ enable_pinstr_nodes : bool;
+ enable_super_root_node : bool;
+ enable_comment_nodes : bool;
+ encoding : rep_encoding;
+ recognize_standalone_declaration : bool;
+ store_element_positions : bool;
+ idref_pass : bool;
+ validate_by_dfa : bool;
+ accept_only_deterministic_models : bool;
+ ...
+ }
+
+
warner: The parser prints
+warnings by invoking the method warn for this warner
+object. (Default: all warnings are dropped)
errors_with_line_numbers: If
+true, errors contain line numbers; if false, errors contain only byte
+positions. The latter mode is faster. (Default: true)
enable_pinstr_nodes: If true,
+the parser creates extra nodes for processing instructions. If false,
+processing instructions are simply added to the element or document surrounding
+the instructions. (Default: false)
enable_super_root_node: If
+true, the parser creates an extra node which is the parent of the root of the
+document tree. This node is called super root; it is an element with type
+T_super_root . - If there are processing instructions outside
+the root element and outside the DTD, they are added to the super root instead
+of the document. - If false, the super root node is not created. (Default:
+false)
enable_comment_nodes: If true,
+the parser creates nodes for comments with type T_comment ;
+if false, such nodes are not created. (Default: false)
encoding: Specifies the
+internal encoding of the parser. Most strings are then represented according to
+this encoding; however there are some exceptions (especially
+ext_id values which are always UTF-8 encoded).
+(Default: `Enc_iso88591)
recognize_standalone_declaration: If true and if the parser is
+validating, the standalone="yes" declaration forces that it
+is checked whether the document is a standalone document. - If false, or if the
+parser is in well-formedness mode, such declarations are ignored.
+(Default: true)
store_element_positions: If
+true, for every non-data node the source position is stored. If false, the
+position information is lost. If available, you can get the positions of nodes
+by invoking the position method.
+(Default: true)
idref_pass: If true and if
+there is an ID index, the parser checks whether every IDREF or IDREFS attribute
+refer to an existing node; this requires that the parser traverses the whole
+doument tree. If false, this check is left out. (Default: false)
validate_by_dfa: If true and if
+the content model for an element type is deterministic, a deterministic finite
+automaton is used to validate whether the element contents match the content
+model of the type. If false, or if a DFA is not available, a backtracking
+algorithm is used for validation. (Default: true)
accept_only_deterministic_models: If true, only deterministic content
+models are accepted; if false, any syntactically correct content models can be
+processed. (Default: true)
First, I recommend to vary the default configuration instead of
+creating a new configuration record. For instance, to set
+idref_pass to true , change the default
+as in:
+
let config = { default_config with idref_pass = true }
+The background is that I can add more options to the record in future versions
+of the parser without breaking your programs.
Do I need extra nodes for processing instructions? By default, such nodes are not created. This does not mean that the
+processing instructions are lost; however, you cannot find out the exact
+location where they occur. For example, the following XML text
+
+
<x><?pi1?><y/><?pi2?></x>
+
+will normally create one element node for
x containing
+
one subnode for
y . The processing
+instructions are attached to
x in a separate hash table; you
+can access them using
x # pinstr "pi1" and
x #
+pinstr "pi2" , respectively. The information is lost where the
+instructions occur within
x .
If the option enable_pinstr_nodes is
+turned on, the parser creates extra nodes pi1 and
+pi2 such that the subnodes of x are now:
+
+
x # sub_nodes = [ pi1; y; pi2 ]
+
+The extra nodes contain the processing instructions in the usual way, i.e. you
+can access them using
pi1 # pinstr "pi1" and
pi2 #
+pinstr "pi2" , respectively.
Note that you will need an exemplar for the PI nodes (see
+make_spec_from_alist ).
Do I need a super root node? By default, there is no super root node. The
+document object refers directly to the node representing the
+root element of the document, i.e.
+
+
doc # root = r
+
+if
r is the root node. This is sometimes inconvenient: (1)
+Some algorithms become simpler if every node has a parent, even the root
+node. (2) Some standards such as XPath call the "root node" the node whose
+child represents the root of the document. (3) The super root node can serve
+as a container for processing instructions outside the root element. Because of
+these reasons, it is possible to create an extra super root node, whose child
+is the root node:
+
+
doc # root = sr &&
+sr # sub_nodes = [ r ]
+
+When extra nodes are also created for processing instructions, these nodes can
+be added to the super root node if they occur outside the root element (reason
+(3)), and the order reflects the order in the source text.
Note that you will need an exemplar for the super root node
+(see make_spec_from_alist ).
What is the effect of the UTF-8 encoding? By default, the parser represents strings (with few
+exceptions) as ISO-8859-1 strings. These are well-known, and there are tools
+and fonts for this encoding.
However, internationalization may require that you switch over
+to UTF-8 encoding. In most environments, the immediate effect will be that you
+cannot read strings with character codes >= 160 any longer; your terminal will
+only show funny glyph combinations. It is strongly recommended to install
+Unicode fonts (GNU Unifont ,
+Markus Kuhn's fonts ) and terminal emulators
+that can handle UTF-8 byte sequences . Furthermore, a Unicode editor may
+be helpful (such as Yudit ). There are
+also FAQ by
+Markus Kuhn.
By setting encoding to
+`Enc_utf8 all strings originating from the parsed XML
+document are represented as UTF-8 strings. This includes not only character
+data and attribute values but also element names, attribute names and so on, as
+it is possible to use any Unicode letter to form such names. Strictly
+speaking, PXP is only XML-compliant if the UTF-8 mode is used; otherwise it
+will have difficulties when validating documents containing
+non-ISO-8859-1-names.
This mode does not have any impact on the external
+representation of documents. The character set assumed when reading a document
+is set in the XML declaration, and character set when writing a document must
+be passed to the write method.
How do I check that nodes exist which are referred by IDREF attributes? First, you must create an index of all occurring ID
+attributes:
+
+
let index = new hash_index
+
+This index must be passed to the parsing function:
+
+
parse_document_entity
+ ~id_index:(index :> index)
+ config source spec
+
+Next, you must turn on the
idref_pass mode:
+
+
let config = { default_config with idref_pass = true }
+
+Note that now the whole document tree will be traversed, and every node will be
+checked for IDREF and IDREFS attributes. If the tree is big, this may take some
+time.
What are deterministic content models? These type of models can speed up the validation checks;
+furthermore they ensure SGML-compatibility. In particular, a content model is
+deterministic if the parser can determine the actually used alternative by
+inspecting only the current token. For example, this element has
+non-deterministic contents:
+
+
<!ELEMENT x ((u,v) | (u,y+) | v)>
+
+If the first element in
x is
u , the
+parser does not know which of the alternatives
(u,v) or
+
(u,y+) will work; the parser must also inspect the second
+element to be able to distinguish between the alternatives. Because such
+look-ahead (or "guessing") is required, this example is
+non-deterministic.
The XML standard demands that content models must be
+deterministic. So it is recommended to turn the option
+accept_only_deterministic_models on; however, PXP can also
+process non-deterministic models using a backtracking algorithm.
Deterministic models ensure that validation can be performed in
+linear time. In order to get the maximum benefits, PXP also implements a
+special validator that profits from deterministic models; this is the
+deterministic finite automaton (DFA). This validator is enabled per element
+type if the element type has a deterministic model and if the option
+validate_by_dfa is turned on.
In general, I expect that the DFA method is faster than the
+backtracking method; especially in the worst case the DFA takes only linear
+time. However, if the content model has only few alternatives and the
+alternatives do not nest, the backtracking algorithm may be better.
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x1965.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x1965.html
new file mode 100644
index 000000000..8fc856264
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x1965.html
@@ -0,0 +1,152 @@
+Updates The PXP user's guide Prev Chapter 4. Configuring and calling the parser
Some (often later added) features that are otherwise
+not explained in the manual but worth to be mentioned.
Methods node_position, node_path, nth_node,
+previous_node, next_node for nodes: See pxp_document.mli
Functions to determine the document order of nodes:
+compare, create_ord_index, ord_number, ord_compare: See pxp_document.mli
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x468.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x468.html
new file mode 100644
index 000000000..dc9cc1e8c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x468.html
@@ -0,0 +1,474 @@
+A complete example: The readme DTD The PXP user's guide Prev Chapter 1. What is XML? Next
The reason for readme was that I often wrote two versions
+of files such as README and INSTALL which explain aspects of a distributed
+software archive; one version was ASCII-formatted, the other was written in
+HTML. Maintaining both versions means double amount of work, and changes
+of one version may be forgotten in the other version. To improve this situation
+I invented the readme DTD which allows me to maintain only
+one source written as XML document, and to generate the ASCII and the HTML
+version from it.
In this section, I explain only the DTD. The readme DTD is
+contained in the PXP distribution together with the two converters to
+produce ASCII and HTML. Another section of this manual describes the HTML
+converter.
The documents have a simple structure: There are up to three levels of nested
+sections, paragraphs, item lists, footnotes, hyperlinks, and text emphasis. The
+outermost element has usually the type readme , it is
+declared by
+
+
<!ELEMENT readme (sect1+)>
+<!ATTLIST readme
+ title CDATA #REQUIRED>
+
+This means that this element contains one or more sections of the first level
+(element type
sect1 ), and that the element has a required
+attribute
title containing character data (CDATA). Note that
+
readme elements must not contain text data.
The three levels of sections are declared as follows:
+
+
<!ELEMENT sect1 (title,(sect2|p|ul)+)>
+
+<!ELEMENT sect2 (title,(sect3|p|ul)+)>
+
+<!ELEMENT sect3 (title,(p|ul)+)>
+
+Every section has a
title element as first subelement. After
+the title an arbitrary but non-empty sequence of inner sections, paragraphs and
+item lists follows. Note that the inner sections must belong to the next higher
+section level;
sect3 elements must not contain inner
+sections because there is no next higher level.
Obviously, all three declarations allow paragraphs (p ) and
+item lists (ul ). The definition can be simplified at this
+point by using a parameter entity:
+
+
<!ENTITY % p.like "p|ul">
+
+<!ELEMENT sect1 (title,(sect2|%p.like;)+)>
+
+<!ELEMENT sect2 (title,(sect3|%p.like;)+)>
+
+<!ELEMENT sect3 (title,(%p.like;)+)>
+
+Here, the entity
p.like is nothing but a macro abbreviating
+the same sequence of declarations; if new elements on the same level as
+
p and
ul are later added, it is
+sufficient only to change the entity definition. Note that there are some
+restrictions on the usage of entities in this context; most important, entities
+containing a left paranthesis must also contain the corresponding right
+paranthesis.
Note that the entity p.like is a
+parameter entity, i.e. the ENTITY declaration contains a
+percent sign, and the entity is referred to by
+%p.like; . This kind of entity must be used to abbreviate
+parts of the DTD; the general entities declared without
+percent sign and referred to as &name; are not allowed
+in this context.
The title element specifies the title of the section in
+which it occurs. The title is given as character data, optionally interspersed
+with line breaks (br ):
+
+
<!ELEMENT title (#PCDATA|br)*>
+
+Compared with the
title attribute of
+the
readme element, this element allows inner markup
+(i.e.
br ) while attribute values do not: It is an error if
+an attribute value contains the left angle bracket < literally such that it
+is impossible to include inner elements.
The paragraph element p has a structure similar to
+title , but it allows more inner elements:
+
+
<!ENTITY % text "br|code|em|footnote|a">
+
+<!ELEMENT p (#PCDATA|%text;)*>
+
+Line breaks do not have inner structure, so they are declared as being empty:
+
+
<!ELEMENT br EMPTY>
+
+This means that really nothing is allowed within
br ; you
+must always write
<br></br> or abbreviated
+
<br/> .
Code samples should be marked up by the code tag; emphasized
+text can be indicated by em :
+
+
<!ELEMENT code (#PCDATA)>
+
+<!ELEMENT em (#PCDATA|%text;)*>
+
+That
code elements are not allowed to contain further markup
+while
em elements do is a design decision by the author of
+the DTD.
Unordered lists simply consists of one or more list items, and a list item may
+contain paragraph-level material:
+
+
<!ELEMENT ul (li+)>
+
+<!ELEMENT li (%p.like;)*>
+
+Footnotes are described by the text of the note; this text may contain
+text-level markup. There is no mechanism to describe the numbering scheme of
+footnotes, or to specify how footnote references are printed.
+
+
<!ELEMENT footnote (#PCDATA|%text;)*>
+
+Hyperlinks are written as in HTML. The anchor tag contains the text describing
+where the link points to, and the
href attribute is the
+pointer (as URL). There is no way to describe locations of "hash marks". If the
+link refers to another
readme document, the attribute
+
readmeref should be used instead of
href .
+The reason is that the converted document has usually a different system
+identifier (file name), and the link to a converted document must be
+converted, too.
+
+
<!ELEMENT a (#PCDATA)*>
+<!ATTLIST a
+ href CDATA #IMPLIED
+ readmeref CDATA #IMPLIED
+>
+
+Note that although it is only sensible to specify one of the two attributes,
+the DTD has no means to express this restriction.
So far the DTD. Finally, here is a document for it:
+
+
<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE readme SYSTEM "readme.dtd">
+<readme title="How to use the readme converters">
+<sect1>
+ <title>Usage</title>
+ <p>
+ The <em>readme</em> converter is invoked on the command line by:
+ </p>
+ <p>
+ <code>readme [ -text | -html ] input.xml</code>
+ </p>
+ <p>
+ Here a list of options:
+ </p>
+ <ul>
+ <li>
+ <p><code>-text</code>: specifies that ASCII output should be produced</p>
+ </li>
+ <li>
+ <p><code>-html</code>: specifies that HTML output should be produced</p>
+ </li>
+ </ul>
+ <p>
+ The input file must be given on the command line. The converted output is
+ printed to <em>stdout</em>.
+ </p>
+</sect1>
+<sect1>
+ <title>Author</title>
+ <p>
+ The program has been written by
+ <a href="mailto:Gerd.Stolpmann@darmstadt.netsurf.de">Gerd Stolpmann</a>.
+ </p>
+</sect1>
+</readme>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x550.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x550.html
new file mode 100644
index 000000000..f2dcdd79b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x550.html
@@ -0,0 +1,765 @@
+How to parse a document from an application The PXP user's guide Prev Chapter 2. Using PXP Next
Let me first give a rough overview of the object model of the parser. The
+following items are represented by objects:
+
+
Documents: The document representation is more or less the
+anchor for the application; all accesses to the parsed entities start here. It
+is described by the class document contained in the module
+Pxp_document . You can get some global information, such
+as the XML declaration the document begins with, the DTD of the document,
+global processing instructions, and most important, the document tree.
The contents of documents: The contents have the structure
+of a tree: Elements contain other elements and text[1] .
+
+The common type to represent both kinds of content is node
+which is a class type that unifies the properties of elements and character
+data. Every node has a list of children (which is empty if the element is empty
+or the node represents text); nodes may have attributes; nodes have always text
+contents. There are two implementations of node , the class
+element_impl for elements, and the class
+data_impl for text data. You find these classes and class
+types in the module Pxp_document , too.
Note that attribute lists are represented by non-class values.
The node extension: For advanced usage, every node of the
+document may have an associated extension which is simply
+a second object. This object must have the three methods
+clone , node , and
+set_node as bare minimum, but you are free to add methods as
+you want. This is the preferred way to add functionality to the document
+tree[2] . The class type extension is
+defined in Pxp_document , too.
The DTD: Sometimes it is necessary to access the DTD of a
+document; the average application does not need this feature. The class
+dtd describes DTDs, and makes it possible to get
+representations of element, entity, and notation declarations as well as
+processing instructions contained in the DTD. This class, and
+dtd_element , dtd_notation , and
+proc_instruction can be found in the module
+Pxp_dtd . There are a couple of classes representing
+different kinds of entities; these can be found in the module
+Pxp_entity .
+
+Additionally, the following modules play a role:
+
+
Pxp_yacc: Here the main parsing functions such as
+parse_document_entity are located. Some additional types and
+functions allow the parser to be configured in a non-standard way.
Pxp_types: This is a collection of basic types and
+exceptions.
+
+There are some further modules that are needed internally but are not part of
+the API.
Let the document to be parsed be stored in a file called
+doc.xml . The parsing process is started by calling the
+function
+
+
val parse_document_entity : config -> source -> 'ext spec -> 'ext document
+
+defined in the module
Pxp_yacc . The first argument
+specifies some global properties of the parser; it is recommended to start with
+the
default_config . The second argument determines where the
+document to be parsed comes from; this may be a file, a channel, or an entity
+ID. To parse
doc.xml , it is sufficient to pass
+
from_file "doc.xml" .
The third argument passes the object specification to use. Roughly
+speaking, it determines which classes implement the node objects of which
+element types, and which extensions are to be used. The 'ext
+polymorphic variable is the type of the extension. For the moment, let us
+simply pass default_spec as this argument, and ignore it.
So the following expression parses doc.xml :
+
+
open Pxp_yacc
+let d = parse_document_entity default_config (from_file "doc.xml") default_spec
+
+Note that
default_config implies that warnings are collected
+but not printed. Errors raise one of the exception defined in
+
Pxp_types ; to get readable errors and warnings catch the
+exceptions as follows:
+
+
class warner =
+ object
+ method warn w =
+ print_endline ("WARNING: " ^ w)
+ end
+;;
+
+try
+ let config = { default_config with warner = new warner } in
+ let d = parse_document_entity config (from_file "doc.xml") default_spec
+ in
+ ...
+with
+ e ->
+ print_endline (Pxp_types.string_of_exn e)
+
+Now
d is an object of the
document
+class. If you want the node tree, you can get the root element by
+
+
let root = d # root
+
+and if you would rather like to access the DTD, determine it by
+
+
let dtd = d # dtd
+
+As it is more interesting, let us investigate the node tree now. Given the root
+element, it is possible to recursively traverse the whole tree. The children of
+a node
n are returned by the method
+
sub_nodes , and the type of a node is returned by
+
node_type . This function traverses the tree, and prints the
+type of each node:
+
+
let rec print_structure n =
+ let ntype = n # node_type in
+ match ntype with
+ T_element name ->
+ print_endline ("Element of type " ^ name);
+ let children = n # sub_nodes in
+ List.iter print_structure children
+ | T_data ->
+ print_endline "Data"
+ | _ ->
+ (* Other node types are not possible unless the parser is configured
+ differently.
+ *)
+ assert false
+
+You can call this function by
+
+
print_structure root
+
+The type returned by
node_type is either
T_element
+name or
T_data . The
name of the
+element type is the string included in the angle brackets. Note that only
+elements have children; data nodes are always leaves of the tree.
There are some more methods in order to access a parsed node tree:
+
+
n # parent : Returns the parent node, or raises
+Not_found if the node is already the root
n # root : Returns the root of the node tree.
n # attribute a : Returns the value of the attribute with
+name a . The method returns a value for every
+declared attribute, independently of whether the attribute
+instance is defined or not. If the attribute is not declared,
+Not_found will be raised. (In well-formedness mode, every
+attribute is considered as being implicitly declared with type
+CDATA .)
The following return values are possible: Value s ,
+Valuelist sl , and Implied_value .
+The first two value types indicate that the attribute value is available,
+either because there is a definition
+a ="value "
+in the XML text, or because there is a default value (declared in the
+DTD). Only if both the instance definition and the default declaration are
+missing, the latter value Implied_value will be returned.
In the DTD, every attribute is typed. There are single-value types (CDATA, ID,
+IDREF, ENTITY, NMTOKEN, enumerations), in which case the method passes
+Value s back, where s is the normalized
+string value of the attribute. The other types (IDREFS, ENTITIES, NMTOKENS)
+represent list values, and the parser splits the XML literal into several
+tokens and returns these tokens as Valuelist sl .
Normalization means that entity references (the
+&name ; tokens) and
+character references
+(&#number ; ) are replaced
+by the text they represent, and that white space characters are converted into
+plain spaces.
n # data : Returns the character data contained in the
+node. For data nodes, the meaning is obvious as this is the main content of
+data nodes. For element nodes, this method returns the concatenated contents of
+all inner data nodes.
Note that entity references included in the text are resolved while they are
+being parsed; for example the text "a <> b" will be returned
+as "a <> b" by this method. Spaces of data nodes are always
+preserved. Newlines are preserved, but always converted to \n characters even
+if newlines are encoded as \r\n or \r. Normally you will never see two adjacent
+data nodes because the parser collapses all data material at one location into
+one node. (However, if you create your own tree or transform the parsed tree,
+it is possible to have adjacent data nodes.)
Note that elements that do not allow #PCDATA as content
+will not have data nodes as children. This means that spaces and newlines, the
+only character material allowed for such elements, are silently dropped.
+
+For example, if the task is to print all contents of elements with type
+"valuable" whose attribute "priority" is "1", this function can help:
+
+
let rec print_valuable_prio1 n =
+ let ntype = n # node_type in
+ match ntype with
+ T_element "valuable" when n # attribute "priority" = Value "1" ->
+ print_endline "Valuable node with priotity 1 found:";
+ print_endline (n # data)
+ | (T_element _ | T_data) ->
+ let children = n # sub_nodes in
+ List.iter print_valuable_prio1 children
+ | _ ->
+ assert false
+
+You can call this function by:
+
+
print_valuable_prio1 root
+
+If you like a DSSSL-like style, you can make the function
+
process_children explicit:
+
+
let rec print_valuable_prio1 n =
+
+ let process_children n =
+ let children = n # sub_nodes in
+ List.iter print_valuable_prio1 children
+ in
+
+ let ntype = n # node_type in
+ match ntype with
+ T_element "valuable" when n # attribute "priority" = Value "1" ->
+ print_endline "Valuable node with priority 1 found:";
+ print_endline (n # data)
+ | (T_element _ | T_data) ->
+ process_children n
+ | _ ->
+ assert false
+
+So far, O'Caml is now a simple "style-sheet language": You can form a big
+"match" expression to distinguish between all significant cases, and provide
+different reactions on different conditions. But this technique has
+limitations; the "match" expression tends to get larger and larger, and it is
+difficult to store intermediate values as there is only one big
+recursion. Alternatively, it is also possible to represent the various cases as
+classes, and to use dynamic method lookup to find the appropiate class. The
+next section explains this technique in detail.
Notes [1] Elements may
+also contain processing instructions. Unlike other document models, PXP
+separates processing instructions from the rest of the text and provides a
+second interface to access them (method pinstr ). However,
+there is a parser option (enable_pinstr_nodes ) which changes
+the behaviour of the parser such that extra nodes for processing instructions
+are included into the tree.
Furthermore, the tree does normally not contain nodes for XML comments;
+they are ignored by default. Again, there is an option
+(enable_comment_nodes ) changing this.
[2] Due to the typing system it is more or less impossible to
+derive recursive classes in O'Caml. To get around this, it is common practice
+to put the modifiable or extensible part of recursive objects into parallel
+objects.
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x675.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x675.html
new file mode 100644
index 000000000..cf3f4737c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x675.html
@@ -0,0 +1,538 @@
+Class-based processing of the node tree The PXP user's guide Prev Chapter 2. Using PXP Next
By default, the parsed node tree consists of objects of the same class; this is
+a good design as long as you want only to access selected parts of the
+document. For complex transformations, it may be better to use different
+classes for objects describing different element types.
For example, if the DTD declares the element types a ,
+b , and c , and if the task is to convert
+an arbitrary document into a printable format, the idea is to define for every
+element type a separate class that has a method print . The
+classes are eltype_a , eltype_b , and
+eltype_c , and every class implements
+print such that elements of the type corresponding to the
+class are converted to the output format.
The parser supports such a design directly. As it is impossible to derive
+recursive classes in O'Caml[1] , the specialized element classes cannot be formed by
+simply inheriting from the built-in classes of the parser and adding methods
+for customized functionality. To get around this limitation, every node of the
+document tree is represented by two objects, one called
+"the node" and containing the recursive definition of the tree, one called "the
+extension". Every node object has a reference to the extension, and the
+extension has a reference to the node. The advantage of this model is that it
+is now possible to customize the extension without affecting the typing
+constraints of the recursive node definition.
Every extension must have the three methods clone ,
+node , and set_node . The method
+clone creates a deep copy of the extension object and
+returns it; node returns the node object for this extension
+object; and set_node is used to tell the extension object
+which node is associated with it, this method is automatically called when the
+node tree is initialized. The following definition is a good starting point
+for these methods; usually clone must be further refined
+when instance variables are added to the class:
+
+
class custom_extension =
+ object (self)
+
+ val mutable node = (None : custom_extension node option)
+
+ method clone = {< >}
+ method node =
+ match node with
+ None ->
+ assert false
+ | Some n -> n
+ method set_node n =
+ node <- Some n
+
+ end
+
+This part of the extension is usually the same for all classes, so it is a good
+idea to consider
custom_extension as the super-class of the
+further class definitions. Continuining the example of above, we can define the
+element type classes as follows:
+
+
class virtual custom_extension =
+ object (self)
+ ... clone, node, set_node defined as above ...
+
+ method virtual print : out_channel -> unit
+ end
+
+class eltype_a =
+ object (self)
+ inherit custom_extension
+ method print ch = ...
+ end
+
+class eltype_b =
+ object (self)
+ inherit custom_extension
+ method print ch = ...
+ end
+
+class eltype_c =
+ object (self)
+ inherit custom_extension
+ method print ch = ...
+ end
+
+The method
print can now be implemented for every element
+type separately. Note that you get the associated node by invoking
+
+
self # node
+
+and you get the extension object of a node
n by writing
+
+
n # extension
+
+It is guaranteed that
+
+
self # node # extension == self
+
+always holds.
Here are sample definitions of the print
+methods:
+
+
class eltype_a =
+ object (self)
+ inherit custom_extension
+ method print ch =
+ (* Nodes <a>...</a> are only containers: *)
+ output_string ch "(";
+ List.iter
+ (fun n -> n # extension # print ch)
+ (self # node # sub_nodes);
+ output_string ch ")";
+ end
+
+class eltype_b =
+ object (self)
+ inherit custom_extension
+ method print ch =
+ (* Print the value of the CDATA attribute "print": *)
+ match self # node # attribute "print" with
+ Value s -> output_string ch s
+ | Implied_value -> output_string ch "<missing>"
+ | Valuelist l -> assert false
+ (* not possible because the att is CDATA *)
+ end
+
+class eltype_c =
+ object (self)
+ inherit custom_extension
+ method print ch =
+ (* Print the contents of this element: *)
+ output_string ch (self # node # data)
+ end
+
+class null_extension =
+ object (self)
+ inherit custom_extension
+ method print ch = assert false
+ end The remaining task is to configure the parser such that these extension classes
+are actually used. Here another problem arises: It is not possible to
+dynamically select the class of an object to be created. As workaround,
+PXP allows the user to specify exemplar objects for
+the various element types; instead of creating the nodes of the tree by
+applying the new operator the nodes are produced by
+duplicating the exemplars. As object duplication preserves the class of the
+object, one can create fresh objects of every class for which previously an
+exemplar has been registered.
Exemplars are meant as objects without contents, the only interesting thing is
+that exemplars are instances of a certain class. The creation of an exemplar
+for an element node can be done by:
+
+
let element_exemplar = new element_impl extension_exemplar
+
+And a data node exemplar is created by:
+
+
let data_exemplar = new data_impl extension_exemplar
+
+The classes
element_impl and
data_impl
+are defined in the module
Pxp_document . The constructors
+initialize the fresh objects as empty objects, i.e. without children, without
+data contents, and so on. The
extension_exemplar is the
+initial extension object the exemplars are associated with.
Once the exemplars are created and stored somewhere (e.g. in a hash table), you
+can take an exemplar and create a concrete instance (with contents) by
+duplicating it. As user of the parser you are normally not concerned with this
+as this is part of the internal logic of the parser, but as background knowledge
+it is worthwhile to mention that the two methods
+create_element and create_data actually
+perform the duplication of the exemplar for which they are invoked,
+additionally apply modifications to the clone, and finally return the new
+object. Moreover, the extension object is copied, too, and the new node object
+is associated with the fresh extension object. Note that this is the reason why
+every extension object must have a clone method.
The configuration of the set of exemplars is passed to the
+parse_document_entity function as third argument. In our
+example, this argument can be set up as follows:
+
+
let spec =
+ make_spec_from_alist
+ ~data_exemplar: (new data_impl (new null_extension))
+ ~default_element_exemplar: (new element_impl (new null_extension))
+ ~element_alist:
+ [ "a", new element_impl (new eltype_a);
+ "b", new element_impl (new eltype_b);
+ "c", new element_impl (new eltype_c);
+ ]
+ ()
+
+The
~element_alist function argument defines the mapping
+from element types to exemplars as associative list. The argument
+
~data_exemplar specifies the exemplar for data nodes, and
+the
~default_element_exemplar is used whenever the parser
+finds an element type for which the associative list does not define an
+exemplar.
The configuration is now complete. You can still use the same parsing
+functions, only the initialization is a bit different. For example, call the
+parser by:
+
+
let d = parse_document_entity default_config (from_file "doc.xml") spec
+
+Note that the resulting document
d has a usable type;
+especially the
print method we added is visible. So you can
+print your document by
+
+
d # root # extension # print stdout This object-oriented approach looks rather complicated; this is mostly caused
+by working around some problems of the strict typing system of O'Caml. Some
+auxiliary concepts such as extensions were needed, but the practical
+consequences are low. In the next section, one of the examples of the
+distribution is explained, a converter from readme
+documents to HTML.
Notes [1] The problem is that the subclass is
+usually not a subtype in this case because O'Caml has a contravariant subtyping
+rule.
Prev Home Next How to parse a document from an application Up Example: An HTML backend for the readme
+DTD
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x738.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x738.html
new file mode 100644
index 000000000..674180172
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x738.html
@@ -0,0 +1,1036 @@
+Example: An HTML backend for the readme
+DTD The PXP user's guide Prev Chapter 2. Using PXP Next
The converter from readme documents to HTML
+documents follows strictly the approach to define one class per element
+type. The HTML code is similar to the readme source,
+because of this most elements can be converted in the following way: Given the
+input element
+
+
<e>content</e>
+
+the conversion text is the concatenation of a computed prefix, the recursively
+converted content, and a computed suffix.
Only one element type cannot be handled by this scheme:
+footnote . Footnotes are collected while they are found in
+the input text, and they are printed after the main text has been converted and
+printed.
open Pxp_types
+open Pxp_document
class type footnote_printer =
+ object
+ method footnote_to_html : store_type -> out_channel -> unit
+ end
+
+and store_type =
+ object
+ method alloc_footnote : footnote_printer -> int
+ method print_footnotes : out_channel -> unit
+ end
+;; The store is a container for footnotes. You can add a
+footnote by invoking alloc_footnote ; the argument is an
+object of the class footnote_printer , the method returns the
+number of the footnote. The interesting property of a footnote is that it can
+be converted to HTML, so a footnote_printer is an object
+with a method footnote_to_html . The class
+footnote which is defined below has a compatible method
+footnote_to_html such that objects created from it can be
+used as footnote_printer s.
The other method, print_footnotes prints the footnotes as
+definition list, and is typically invoked after the main material of the page
+has already been printed. Every item of the list is printed by
+footnote_to_html .
class store =
+ object (self)
+
+ val mutable footnotes = ( [] : (int * footnote_printer) list )
+ val mutable next_footnote_number = 1
+
+ method alloc_footnote n =
+ let number = next_footnote_number in
+ next_footnote_number <- number+1;
+ footnotes <- footnotes @ [ number, n ];
+ number
+
+ method print_footnotes ch =
+ if footnotes <> [] then begin
+ output_string ch "<hr align=left noshade=noshade width=\"30%\">\n";
+ output_string ch "<dl>\n";
+ List.iter
+ (fun (_,n) ->
+ n # footnote_to_html (self : #store_type :> store_type) ch)
+ footnotes;
+ output_string ch "</dl>\n";
+ end
+
+ end
+;; This function converts the characters <, >, &, and " to their HTML
+representation. For example,
+escape_html "<>" = "<>" . Other
+characters are left unchanged.
+
+
let escape_html s =
+ Str.global_substitute
+ (Str.regexp "<\\|>\\|&\\|\"")
+ (fun s ->
+ match Str.matched_string s with
+ "<" -> "<"
+ | ">" -> ">"
+ | "&" -> "&"
+ | "\"" -> """
+ | _ -> assert false)
+ s
+;; This virtual class is the abstract superclass of the extension classes shown
+below. It defines the standard methods clone ,
+node , and set_node , and declares the type
+of the virtual method to_html . This method recursively
+traverses the whole element tree, and prints the converted HTML code to the
+output channel passed as second argument. The first argument is the reference
+to the global store object which collects the footnotes.
+
+
class virtual shared =
+ object (self)
+
+ (* --- default_ext --- *)
+
+ val mutable node = (None : shared node option)
+
+ method clone = {< >}
+ method node =
+ match node with
+ None ->
+ assert false
+ | Some n -> n
+ method set_node n =
+ node <- Some n
+
+ (* --- virtual --- *)
+
+ method virtual to_html : store -> out_channel -> unit
+
+ end
+;; This class defines to_html such that the character data of
+the current node is converted to HTML. Note that self is an
+extension object, self # node is the node object, and
+self # node # data returns the character data of the node.
+
+
class only_data =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ output_string ch (escape_html (self # node # data))
+ end
+;; This class converts elements of type readme to HTML. Such an
+element is (by definition) always the root element of the document. First, the
+HTML header is printed; the title attribute of the element
+determines the title of the HTML page. Some aspects of the HTML page can be
+configured by setting certain parameter entities, for example the background
+color, the text color, and link colors. After the header, the
+body tag, and the headline have been printed, the contents
+of the page are converted by invoking to_html on all
+children of the current node (which is the root node). Then, the footnotes are
+appended to this by telling the global store object to print
+the footnotes. Finally, the end tags of the HTML pages are printed.
This class is an example how to access the value of an attribute: The value is
+determined by invoking self # node # attribute "title" . As
+this attribute has been declared as CDATA and as being required, the value has
+always the form Value s where s is the
+string value of the attribute.
You can also see how entity contents can be accessed. A parameter entity object
+can be looked up by self # node # dtd # par_entity "name" ,
+and by invoking replacement_text the value of the entity
+is returned after inner parameter and character entities have been
+processed. Note that you must use gen_entity instead of
+par_entity to access general entities.
class readme =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ (* output header *)
+ output_string
+ ch "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\">";
+ output_string
+ ch "<!-- WARNING! This is a generated file, do not edit! -->\n";
+ let title =
+ match self # node # attribute "title" with
+ Value s -> s
+ | _ -> assert false
+ in
+ let html_header, _ =
+ try (self # node # dtd # par_entity "readme:html:header")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_trailer, _ =
+ try (self # node # dtd # par_entity "readme:html:trailer")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_bgcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:bgcolor")
+ # replacement_text
+ with WF_error _ -> "white", false in
+ let html_textcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:textcolor")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_alinkcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:alinkcolor")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_vlinkcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:vlinkcolor")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_linkcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:linkcolor")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_background, _ =
+ try (self # node # dtd # par_entity "readme:html:background")
+ # replacement_text
+ with WF_error _ -> "", false in
+
+ output_string ch "<html><header><title>\n";
+ output_string ch (escape_html title);
+ output_string ch "</title></header>\n";
+ output_string ch "<body ";
+ List.iter
+ (fun (name,value) ->
+ if value <> "" then
+ output_string ch (name ^ "=\"" ^ escape_html value ^ "\" "))
+ [ "bgcolor", html_bgcolor;
+ "text", html_textcolor;
+ "link", html_linkcolor;
+ "alink", html_alinkcolor;
+ "vlink", html_vlinkcolor;
+ ];
+ output_string ch ">\n";
+ output_string ch html_header;
+ output_string ch "<h1>";
+ output_string ch (escape_html title);
+ output_string ch "</h1>\n";
+ (* process main content: *)
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ (self # node # sub_nodes);
+ (* now process footnotes *)
+ store # print_footnotes ch;
+ (* trailer *)
+ output_string ch html_trailer;
+ output_string ch "</html>\n";
+
+ end
+;; As the conversion process is very similar, the conversion classes of the three
+section levels are derived from the more general section
+class. The HTML code of the section levels only differs in the type of the
+headline, and because of this the classes describing the section levels can be
+computed by replacing the class argument the_tag of
+section by the HTML name of the headline tag.
Section elements are converted to HTML by printing a headline and then
+converting the contents of the element recursively. More precisely, the first
+sub-element is always a title element, and the other
+elements are the contents of the section. This structure is declared in the
+DTD, and it is guaranteed that the document matches the DTD. Because of this
+the title node can be separated from the rest without any checks.
Both the title node, and the body nodes are then converted to HTML by calling
+to_html on them.
class section the_tag =
+ object (self)
+ inherit shared
+
+ val tag = the_tag
+
+ method to_html store ch =
+ let sub_nodes = self # node # sub_nodes in
+ match sub_nodes with
+ title_node :: rest ->
+ output_string ch ("<" ^ tag ^ ">\n");
+ title_node # extension # to_html store ch;
+ output_string ch ("\n</" ^ tag ^ ">");
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ rest
+ | _ ->
+ assert false
+ end
+;;
+
+class sect1 = section "h1";;
+class sect2 = section "h3";;
+class sect3 = section "h4";; Several element types are converted to HTML by simply mapping them to
+corresponding HTML element types. The class map_tag
+implements this, and the class argument the_target_tag
+determines the tag name to map to. The output consists of the start tag, the
+recursively converted inner elements, and the end tag.
+
+
class map_tag the_target_tag =
+ object (self)
+ inherit shared
+
+ val target_tag = the_target_tag
+
+ method to_html store ch =
+ output_string ch ("<" ^ target_tag ^ ">\n");
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ (self # node # sub_nodes);
+ output_string ch ("\n</" ^ target_tag ^ ">");
+ end
+;;
+
+class p = map_tag "p";;
+class em = map_tag "b";;
+class ul = map_tag "ul";;
+class li = map_tag "li";; Element of type br are mapped to the same HTML type. Note
+that HTML forbids the end tag of br .
+
+
class br =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ output_string ch "<br>\n";
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ (self # node # sub_nodes);
+ end
+;; The code type is converted to a pre
+section (preformatted text). As the meaning of tabs is unspecified in HTML,
+tabs are expanded to spaces.
+
+
class code =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ let data = self # node # data in
+ (* convert tabs *)
+ let l = String.length data in
+ let rec preprocess i column =
+ (* this is very ineffective but comprehensive: *)
+ if i < l then
+ match data.[i] with
+ '\t' ->
+ let n = 8 - (column mod 8) in
+ String.make n ' ' ^ preprocess (i+1) (column + n)
+ | '\n' ->
+ "\n" ^ preprocess (i+1) 0
+ | c ->
+ String.make 1 c ^ preprocess (i+1) (column + 1)
+ else
+ ""
+ in
+ output_string ch "<p><pre>";
+ output_string ch (escape_html (preprocess 0 0));
+ output_string ch "</pre></p>";
+
+ end
+;; Hyperlinks, expressed by the a element type, are converted
+to the HTML a type. If the target of the hyperlink is given
+by href , the URL of this attribute can be used
+directly. Alternatively, the target can be given by
+readmeref in which case the ".html" suffix must be added to
+the file name.
Note that within a only #PCDATA is allowed, so the contents
+can be converted directly by applying escape_html to the
+character data contents.
+
+
class a =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ output_string ch "<a ";
+ let href =
+ match self # node # attribute "href" with
+ Value v -> escape_html v
+ | Valuelist _ -> assert false
+ | Implied_value ->
+ begin match self # node # attribute "readmeref" with
+ Value v -> escape_html v ^ ".html"
+ | Valuelist _ -> assert false
+ | Implied_value ->
+ ""
+ end
+ in
+ if href <> "" then
+ output_string ch ("href=\"" ^ href ^ "\"");
+ output_string ch ">";
+ output_string ch (escape_html (self # node # data));
+ output_string ch "</a>";
+
+ end
+;; The footnote class has two methods:
+to_html to convert the footnote reference to HTML, and
+footnote_to_html to convert the footnote text itself.
The footnote reference is converted to a local hyperlink; more precisely, to
+two anchor tags which are connected with each other. The text anchor points to
+the footnote anchor, and the footnote anchor points to the text anchor.
The footnote must be allocated in the store object. By
+allocating the footnote, you get the number of the footnote, and the text of
+the footnote is stored until the end of the HTML page is reached when the
+footnotes can be printed. The to_html method stores simply
+the object itself, such that the footnote_to_html method is
+invoked on the same object that encountered the footnote.
The to_html only allocates the footnote, and prints the
+reference anchor, but it does not print nor convert the contents of the
+note. This is deferred until the footnotes actually get printed, i.e. the
+recursive call of to_html on the sub nodes is done by
+footnote_to_html .
Note that this technique does not work if you make another footnote within a
+footnote; the second footnote gets allocated but not printed.
class footnote =
+ object (self)
+ inherit shared
+
+ val mutable footnote_number = 0
+
+ method to_html store ch =
+ let number =
+ store # alloc_footnote (self : #shared :> footnote_printer) in
+ let foot_anchor =
+ "footnote" ^ string_of_int number in
+ let text_anchor =
+ "textnote" ^ string_of_int number in
+ footnote_number <- number;
+ output_string ch ( "<a name=\"" ^ text_anchor ^ "\" href=\"#" ^
+ foot_anchor ^ "\">[" ^ string_of_int number ^
+ "]</a>" )
+
+ method footnote_to_html store ch =
+ (* prerequisite: we are in a definition list <dl>...</dl> *)
+ let foot_anchor =
+ "footnote" ^ string_of_int footnote_number in
+ let text_anchor =
+ "textnote" ^ string_of_int footnote_number in
+ output_string ch ("<dt><a name=\"" ^ foot_anchor ^ "\" href=\"#" ^
+ text_anchor ^ "\">[" ^ string_of_int footnote_number ^
+ "]</a></dt>\n<dd>");
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ (self # node # sub_nodes);
+ output_string ch ("\n</dd>")
+
+ end
+;; This code sets up the hash table that connects element types with the exemplars
+of the extension classes that convert the elements to HTML.
+
+
open Pxp_yacc
+
+let tag_map =
+ make_spec_from_alist
+ ~data_exemplar:(new data_impl (new only_data))
+ ~default_element_exemplar:(new element_impl (new no_markup))
+ ~element_alist:
+ [ "readme", (new element_impl (new readme));
+ "sect1", (new element_impl (new sect1));
+ "sect2", (new element_impl (new sect2));
+ "sect3", (new element_impl (new sect3));
+ "title", (new element_impl (new no_markup));
+ "p", (new element_impl (new p));
+ "br", (new element_impl (new br));
+ "code", (new element_impl (new code));
+ "em", (new element_impl (new em));
+ "ul", (new element_impl (new ul));
+ "li", (new element_impl (new li));
+ "footnote", (new element_impl (new footnote : #shared :> shared));
+ "a", (new element_impl (new a));
+ ]
+ ()
+;; Prev Home Next Class-based processing of the node tree Up The objects representing the document
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x939.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x939.html
new file mode 100644
index 000000000..cf177f88e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x939.html
@@ -0,0 +1,2337 @@
+The class type node The PXP user's guide Prev Chapter 3. The objects representing the document Next
From Pxp_document :
+
+
type node_type =
+ T_data
+| T_element of string
+| T_super_root
+| T_pinstr of string
+| T_comment
+and some other, reserved types
+;;
+
+class type [ 'ext ] node =
+ object ('self)
+ constraint 'ext = 'ext node #extension
+
+ (* General observers *)
+
+ method extension : 'ext
+ method dtd : dtd
+ method parent : 'ext node
+ method root : 'ext node
+ method sub_nodes : 'ext node list
+ method iter_nodes : ('ext node -> unit) -> unit
+ method iter_nodes_sibl :
+ ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
+ method node_type : node_type
+ method encoding : Pxp_types.rep_encoding
+ method data : string
+ method position : (string * int * int)
+ method comment : string option
+ method pinstr : string -> proc_instruction list
+ method pinstr_names : string list
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+
+ (* Attribute observers *)
+
+ method attribute : string -> Pxp_types.att_value
+ method required_string_attribute : string -> string
+ method optional_string_attribute : string -> string option
+ method required_list_attribute : string -> string list
+ method optional_list_attribute : string -> string list
+ method attribute_names : string list
+ method attribute_type : string -> Pxp_types.att_type
+ method attributes : (string * Pxp_types.att_value) list
+ method id_attribute_name : string
+ method id_attribute_value : string
+ method idref_attribute_names : string
+
+ (* Modifying methods *)
+
+ method add_node : ?force:bool -> 'ext node -> unit
+ method add_pinstr : proc_instruction -> unit
+ method delete : unit
+ method set_nodes : 'ext node list -> unit
+ method quick_set_attributes : (string * Pxp_types.att_value) list -> unit
+ method set_comment : string option -> unit
+
+ (* Cloning methods *)
+
+ method orphaned_clone : 'self
+ method orphaned_flat_clone : 'self
+ method create_element :
+ ?position:(string * int * int) ->
+ dtd -> node_type -> (string * string) list ->
+ 'ext node
+ method create_data : dtd -> string -> 'ext node
+ method keep_always_whitespace_mode : unit
+
+ (* Validating methods *)
+
+ method local_validate : ?use_dfa:bool -> unit -> unit
+
+ (* ... Internal methods are undocumented. *)
+
+ end
+;;
+
+In the module
Pxp_types you can find another type
+definition that is important in this context:
+
+
type Pxp_types.att_value =
+ Value of string
+ | Valuelist of string list
+ | Implied_value
+;; A node represents either an element or a character data section. There are two
+classes implementing the two aspects of nodes: element_impl
+and data_impl . The latter class does not implement all
+methods because some methods do not make sense for data nodes.
(Note: PXP also supports a mode which forces that processing instructions and
+comments are represented as nodes of the document tree. However, these nodes
+are instances of element_impl with node types
+T_pinstr and T_comment ,
+respectively. This mode must be explicitly configured; the basic representation
+knows only element and data nodes.)
The following figure
+(A tree with element nodes, data nodes, and attributes ) shows an example how
+a tree is constructed from element and data nodes. The circular areas
+represent element nodes whereas the ovals denote data nodes. Only elements
+may have subnodes; data nodes are always leaves of the tree. The subnodes
+of an element can be either element or data nodes; in both cases the O'Caml
+objects storing the nodes have the class type node .
Attributes (the clouds in the picture) are not directly
+integrated into the tree; there is always an extra link to the attribute
+list. This is also true for processing instructions (not shown in the
+picture). This means that there are separated access methods for attributes and
+processing instructions.
Figure 3-1. A tree with element nodes, data nodes, and attributes
Only elements, data sections, attributes and processing
+instructions (and comments, if configured) can, directly or indirectly, occur
+in the document tree. It is impossible to add entity references to the tree; if
+the parser finds such a reference, not the reference as such but the referenced
+text (i.e. the tree representing the structured text) is included in the
+tree.
Note that the parser collapses as much data material into one
+data node as possible such that there are normally never two adjacent data
+nodes. This invariant is enforced even if data material is included by entity
+references or CDATA sections, or if a data sequence is interrupted by
+comments. So a & b <-- comment --> c <![CDATA[
+<> d]]> is represented by only one data node, for
+instance. However, you can create document trees manually which break this
+invariant; it is only the way the parser forms the tree.
Figure 3-2. Nodes are doubly linked trees
The node tree has links in both directions: Every node has a link to its parent
+(if any), and it has links to the subnodes (see
+figure Nodes are doubly linked trees ). Obviously,
+this doubly-linked structure simplifies the navigation in the tree; but has
+also some consequences for the possible operations on trees.
Because every node must have at most one parent node,
+operations are illegal if they violate this condition. The following figure
+(A node can only be added if it is a root ) shows on the left side
+that node y is added to x as new subnode
+which is allowed because y does not have a parent yet. The
+right side of the picture illustrates what would happen if y
+had a parent node; this is illegal because y would have two
+parents after the operation.
Figure 3-3. A node can only be added if it is a root
The "delete" operation simply removes the links between two nodes. In the
+picture (A deleted node becomes the root of the subtree ) the node
+x is deleted from the list of subnodes of
+y . After that, x becomes the root of the
+subtree starting at this node.
Figure 3-4. A deleted node becomes the root of the subtree
It is also possible to make a clone of a subtree; illustrated in
+The clone of a subtree . In this case, the
+clone is a copy of the original subtree except that it is no longer a
+subnode. Because cloning never keeps the connection to the parent, the clones
+are called orphaned .
Figure 3-5. The clone of a subtree
General observers
+ .
extension : The reference to the extension object which
+belongs to this node (see ...).
dtd : Returns a reference to the global DTD. All nodes
+of a tree must share the same DTD.
parent : Get the father node. Raises
+Not_found in the case the node does not have a
+parent, i.e. the node is the root.
root : Gets the reference to the root node of the tree.
+Every node is contained in a tree with a root, so this method always
+succeeds. Note that this method searches the root,
+which costs time proportional to the length of the path to the root.
sub_nodes : Returns references to the children. The returned
+list reflects the order of the children. For data nodes, this method returns
+the empty list.
iter_nodes f : Iterates over the children, and calls
+f for every child in turn.
iter_nodes_sibl f : Iterates over the children, and calls
+f for every child in turn. f gets as
+arguments the previous node, the current node, and the next node.
node_type : Returns either T_data which
+means that the node is a data node, or T_element n
+which means that the node is an element of type n .
+If configured, possible node types are also T_pinstr t
+indicating that the node represents a processing instruction with target
+t , and T_comment in which case the node
+is a comment.
encoding : Returns the encoding of the strings.
data : Returns the character data of this node and all
+children, concatenated as one string. The encoding of the string is what
+the method encoding returns.
+- For data nodes, this method simply returns the represented characters.
+For elements, the meaning of the method has been extended such that it
+returns something useful, i.e. the effectively contained characters, without
+markup. (For T_pinstr and T_comment
+nodes, the method returns the empty string.)
position : If configured, this method returns the position of
+the element as triple (entity, line, byteposition). For data nodes, the
+position is not stored. If the position is not available the triple
+"?", 0, 0 is returned.
comment : Returns Some text for comment
+nodes, and None for other nodes. The text
+is everything between the comment delimiters <-- and
+--> .
pinstr n : Returns all processing instructions that are
+directly contained in this element and that have a target
+specification of n . The target is the first word after
+the <? .
pinstr_names : Returns the list of all targets of processing
+instructions directly contained in this element.
write s enc : Prints the node and all subnodes to the passed
+output stream as valid XML text, using the passed external encoding.
+
Attribute observers
+ .
attribute n : Returns the value of the attribute with name
+n . This method returns a value for every declared
+attribute, and it raises Not_found for any undeclared
+attribute. Note that it even returns a value if the attribute is actually
+missing but is declared as #IMPLIED or has a default
+value. - Possible values are:
+
Implied_value : The attribute has been declared with the
+keyword #IMPLIED , and the attribute is missing in the
+attribute list of this element.
Value s : The attribute has been declared as type
+CDATA , as ID , as
+IDREF , as ENTITY , or as
+NMTOKEN , or as enumeration or notation, and one of the two
+conditions holds: (1) The attribute value is present in the attribute list in
+which case the value is returned in the string s . (2) The
+attribute has been omitted, and the DTD declared the attribute with a default
+value. The default value is returned in s .
+- Summarized, Value s is returned for non-implied, non-list
+attribute values.
Valuelist l : The attribute has been declared as type
+IDREFS , as ENTITIES , or
+as NMTOKENS , and one of the two conditions holds: (1) The
+attribute value is present in the attribute list in which case the
+space-separated tokens of the value are returned in the string list
+l . (2) The attribute has been omitted, and the DTD declared
+the attribute with a default value. The default value is returned in
+l .
+- Summarized, Valuelist l is returned for all list-type
+attribute values.
+
+Note that before the attribute value is returned, the value is normalized. This
+means that newlines are converted to spaces, and that references to character
+entities (i.e. &#n ; ) and
+general entities
+(i.e. &name ; ) are expanded;
+if necessary, expansion is performed recursively.In well-formedness mode, there is no DTD which could declare an
+attribute. Because of this, every occuring attribute is considered as a CDATA
+attribute.
required_string_attribute n : returns the Value attribute
+called n, or the Valuelist attribute as a string where the list elements
+are separated by spaces. If the attribute value is implied, or if the
+attribute does not exists, the method will fail. - This method is convenient
+if you expect a non-implied and non-list attribute value.
optional_string_attribute n : returns the Value attribute
+called n, or the Valuelist attribute as a string where the list elements
+are separated by spaces. If the attribute value is implied, or if the
+attribute does not exists, the method returns None. - This method is
+convenient if you expect a non-list attribute value including the implied
+value.
required_list_attribute n : returns the Valuelist attribute
+called n, or the Value attribute as a list with a single element.
+If the attribute value is implied, or if the
+attribute does not exists, the method will fail. - This method is
+convenient if you expect a list attribute value.
optional_list_attribute n : returns the Valuelist attribute
+called n, or the Value attribute as a list with a single element.
+If the attribute value is implied, or if the
+attribute does not exists, an empty list will be returned. - This method
+is convenient if you expect a list attribute value or the implied value.
attribute_names : returns the list of all attribute names of
+this element. As this is a validating parser, this list is equal to the
+list of declared attributes.
attribute_type n : returns the type of the attribute called
+n . See the module Pxp_types for a
+description of the encoding of the types.
attributes : returns the list of pairs of names and values
+for all attributes of
+this element.
id_attribute_name : returns the name of the attribute that is
+declared with type ID. There is at most one such attribute. The method raises
+Not_found if there is no declared ID attribute for the
+element type.
id_attribute_value : returns the value of the attribute that
+is declared with type ID. There is at most one such attribute. The method raises
+Not_found if there is no declared ID attribute for the
+element type.
idref_attribute_names : returns the list of attribute names
+that are declared as IDREF or IDREFS.
+
Modifying methods
+ . The following methods are only defined for element nodes (more exactly:
+the methods are defined for data nodes, too, but fail always).
+
+
add_node sn : Adds sub node sn to the list
+of children. This operation is illustrated in the picture
+A node can only be added if it is a root . This method expects that
+sn is a root, and it requires that sn and
+the current object share the same DTD.
Because add_node is the method the parser itself uses
+to add new nodes to the tree, it performs by default some simple validation
+checks: If the content model is a regular expression, it is not allowed to add
+data nodes to this node unless the new nodes consist only of whitespace. In
+this case, the new data nodes are silently dropped (you can change this by
+invoking keep_always_whitespace_mode ).
If the document is flagged as stand-alone, these data nodes only
+containing whitespace are even forbidden if the element declaration is
+contained in an external entity. This case is detected and rejected.
If the content model is EMPTY , it is not allowed to
+add any data node unless the data node is empty. In this case, the new data
+node is silently dropped.
These checks only apply if there is a DTD. In well-formedness mode, it is
+assumed that every element is declared with content model
+ANY which prohibits any validation check. Furthermore, you
+turn these checks off by passing ~force:true as first
+argument.
add_pinstr pi : Adds the processing instruction
+pi to the list of processing instructions.
delete : Deletes this node from the tree. After this
+operation, this node is no longer the child of the former father node; and the
+node loses the connection to the father as well. This operation is illustrated
+by the figure A deleted node becomes the root of the subtree .
set_nodes nl : Sets the list of children to
+nl . It is required that every member of nl
+is a root, and that all members and the current object share the same DTD.
+Unlike add_node , no validation checks are performed.
quick_set_attributes atts : sets the attributes of this
+element to atts . It is not checked
+whether atts matches the DTD or not; it is up to the
+caller of this method to ensure this. (This method may be useful to transform
+the attribute values, i.e. apply a mapping to every attribute.)
set_comment text : This method is only applicable to
+T_comment nodes; it sets the comment text contained by such
+nodes.
Cloning methods
+ .
orphaned_clone : Returns a clone of the node and the complete
+tree below this node (deep clone). The clone does not have a parent (i.e. the
+reference to the parent node is not cloned). While
+copying the subtree, strings are skipped; it is likely that the original tree
+and the copy tree share strings. Extension objects are cloned by invoking
+the clone method on the original objects; how much of
+the extension objects is cloned depends on the implemention of this method.
This operation is illustrated by the figure
+The clone of a subtree .
orphaned_flat_clone : Returns a clone of the node,
+but sets the list of sub nodes to [], i.e. the sub nodes are not cloned.
+create_element dtd nt al : Returns a flat copy of this node
+(which must be an element) with the following modifications: The DTD is set to
+dtd ; the node type is set to nt , and the
+new attribute list is set to al (given as list of
+(name,value) pairs). The copy does not have children nor a parent. It does not
+contain processing instructions. See
+the example below .
Note that you can specify the position of the new node
+by the optional argument ~position .
+create_data dtd cdata : Returns a flat copy of this node
+(which must be a data node) with the following modifications: The DTD is set to
+dtd ; the node type is set to T_data ; the
+attribute list is empty (data nodes never have attributes); the list of
+children and PIs is empty, too (same reason). The new node does not have a
+parent. The value cdata is the new character content of the
+node. See
+the example below .
keep_always_whitespace_mode : Even data nodes which are
+normally dropped because they only contain ignorable whitespace, can added to
+this node once this mode is turned on. (This mode is useful to produce
+canonical XML.)
Validating methods
+ . There is one method which locally validates the node, i.e. checks whether the
+subnodes match the content model of this node.
+
+
This class is an implementation of node which
+realizes element nodes:
+
+
class [ 'ext ] element_impl : 'ext -> [ 'ext ] node
Constructor. You can create a new instance by
+
+
new element_impl extension_object
+
+which creates a special form of empty element which already contains a
+reference to the
extension_object , but is
+otherwise empty. This special form is called an
+
exemplar . The purpose of exemplars is that they serve as
+patterns that can be duplicated and filled with data. The method
+
create_element is designed to perform this action.
Example. First, create an exemplar by
+
+
let exemplar_ext = ... in
+let exemplar = new element_impl exemplar_ext in
+
+The
exemplar is not used in node trees, but only as
+a pattern when the element nodes are created:
+
+
let element = exemplar # create_element dtd (T_element name) attlist
+
+The
element is a copy of
exemplar
+(even the extension
exemplar_ext has been copied)
+which ensures that
element and its extension are objects
+of the same class as the exemplars; note that you need not to pass a
+class name or other meta information. The copy is initially connected
+with the
dtd , it gets a node type, and the attribute list
+is filled. The
element is now fully functional; it can
+be added to another element as child, and it can contain references to
+subnodes.
This class is an implementation of node which
+should be used for all character data nodes:
+
+
class [ 'ext ] data_impl : 'ext -> [ 'ext ] node
Constructor. You can create a new instance by
+
+
new data_impl extension_object
+
+which creates an empty exemplar node which is connected to
+
extension_object . The node does not contain a
+reference to any DTD, and because of this it cannot be added to node trees.
To get a fully working data node, apply the method
+create_data to the exemplar (see example).
Example. First, create an exemplar by
+
+
let exemplar_ext = ... in
+let exemplar = new exemplar_ext data_impl in
+
+The
exemplar is not used in node trees, but only as
+a pattern when the data nodes are created:
+
+
let data_node = exemplar # create_data dtd "The characters contained in the data node"
+
+The
data_node is a copy of
exemplar .
+The copy is initially connected
+with the
dtd , and it is filled with character material.
+The
data_node is now fully functional; it can
+be added to an element as child.
The type spec defines a way to handle the details of
+creating nodes from exemplars.
+
+
type 'ext spec
+constraint 'ext = 'ext node #extension
+
+val make_spec_from_mapping :
+ ?super_root_exemplar : 'ext node ->
+ ?comment_exemplar : 'ext node ->
+ ?default_pinstr_exemplar : 'ext node ->
+ ?pinstr_mapping : (string, 'ext node) Hashtbl.t ->
+ data_exemplar: 'ext node ->
+ default_element_exemplar: 'ext node ->
+ element_mapping: (string, 'ext node) Hashtbl.t ->
+ unit ->
+ 'ext spec
+
+val make_spec_from_alist :
+ ?super_root_exemplar : 'ext node ->
+ ?comment_exemplar : 'ext node ->
+ ?default_pinstr_exemplar : 'ext node ->
+ ?pinstr_alist : (string * 'ext node) list ->
+ data_exemplar: 'ext node ->
+ default_element_exemplar: 'ext node ->
+ element_alist: (string * 'ext node) list ->
+ unit ->
+ 'ext spec
+
+The two functions
make_spec_from_mapping and
+
make_spec_from_alist create
spec
+values. Both functions are functionally equivalent and the only difference is
+that the first function prefers hashtables and the latter associative lists to
+describe mappings from names to exemplars.
You can specify exemplars for the various kinds of nodes that need to be
+generated when an XML document is parsed:
+
+
~super_root_exemplar : This exemplar
+is used to create the super root. This special node is only created if the
+corresponding configuration option has been selected; it is the parent node of
+the root node which may be convenient if every working node must have a parent.
~comment_exemplar : This exemplar is
+used when a comment node must be created. Note that such nodes are only created
+if the corresponding configuration option is "on".
~default_pinstr_exemplar : If a node
+for a processing instruction must be created, and the instruction is not listed
+in the table passed by ~pinstr_mapping or
+~pinstr_alist , this exemplar is used.
+Again the configuration option must be "on" in order to create such nodes at
+all.
~pinstr_mapping or
+~pinstr_alist : Map the target names of processing
+instructions to exemplars. These mappings are only used when nodes for
+processing instructions are created.
~data_exemplar : The exemplar for
+ordinary data nodes.
~default_element_exemplar : This
+exemplar is used if an element node must be created, but the element type
+cannot be found in the tables element_mapping or
+element_alist .
~element_mapping or
+~element_alist : Map the element types to exemplars. These
+mappings are used to create element nodes.
+
+In most cases, you only want to create
spec values to pass
+them to the parser functions found in
Pxp_yacc . However, it
+might be useful to apply
spec values directly.
The following functions create various types of nodes by selecting the
+corresponding exemplar from the passed spec value, and by
+calling create_element or create_data on
+the exemplar.
+
+
val create_data_node :
+ 'ext spec ->
+ dtd ->
+ (* data material: *) string ->
+ 'ext node
+
+val create_element_node :
+ ?position:(string * int * int) ->
+ 'ext spec ->
+ dtd ->
+ (* element type: *) string ->
+ (* attributes: *) (string * string) list ->
+ 'ext node
+
+val create_super_root_node :
+ ?position:(string * int * int) ->
+ 'ext spec ->
+ dtd ->
+ 'ext node
+
+val create_comment_node :
+ ?position:(string * int * int) ->
+ 'ext spec ->
+ dtd ->
+ (* comment text: *) string ->
+ 'ext node
+
+val create_pinstr_node :
+ ?position:(string * int * int) ->
+ 'ext spec ->
+ dtd ->
+ proc_instruction ->
+ 'ext node Building trees. Here is the piece of code that creates the tree of
+the figure A tree with element nodes, data nodes, and attributes . The extension
+object and the DTD are beyond the scope of this example.
+
+
let exemplar_ext = ... (* some extension *) in
+let dtd = ... (* some DTD *) in
+
+let element_exemplar = new element_impl exemplar_ext in
+let data_exemplar = new data_impl exemplar_ext in
+
+let a1 = element_exemplar # create_element dtd (T_element "a") ["att", "apple"]
+and b1 = element_exemplar # create_element dtd (T_element "b") []
+and c1 = element_exemplar # create_element dtd (T_element "c") []
+and a2 = element_exemplar # create_element dtd (T_element "a") ["att", "orange"]
+in
+
+let cherries = data_exemplar # create_data dtd "Cherries" in
+let orange = data_exemplar # create_data dtd "An orange" in
+
+a1 # add_node b1;
+a1 # add_node c1;
+b1 # add_node a2;
+b1 # add_node cherries;
+a2 # add_node orange;
+
+Alternatively, the last block of statements could also be written as:
+
+
a1 # set_nodes [b1; c1];
+b1 # set_nodes [a2; cherries];
+a2 # set_nodes [orange];
+
+The root of the tree is
a1 , i.e. it is true that
+
+
x # root == a1
+
+for every x from {
a1 ,
a2 ,
+
b1 ,
c1 ,
cherries ,
+
orange }.
Furthermore, the following properties hold:
+
+
a1 # attribute "att" = Value "apple"
+& a2 # attribute "att" = Value "orange"
+
+& cherries # data = "Cherries"
+& orange # data = "An orange"
+& a1 # data = "CherriesAn orange"
+
+& a1 # node_type = T_element "a"
+& a2 # node_type = T_element "a"
+& b1 # node_type = T_element "b"
+& c1 # node_type = T_element "c"
+& cherries # node_type = T_data
+& orange # node_type = T_data
+
+& a1 # sub_nodes = [ b1; c1 ]
+& a2 # sub_nodes = [ orange ]
+& b1 # sub_nodes = [ a2; cherries ]
+& c1 # sub_nodes = []
+& cherries # sub_nodes = []
+& orange # sub_nodes = []
+
+& a2 # parent == a1
+& b1 # parent == b1
+& c1 # parent == a1
+& cherries # parent == b1
+& orange # parent == a2 Searching nodes. The following function searches all nodes of a tree
+for which a certain condition holds:
+
+
let rec search p t =
+ if p t then
+ t :: search_list p (t # sub_nodes)
+ else
+ search_list p (t # sub_nodes)
+
+and search_list p l =
+ match l with
+ [] -> []
+ | t :: l' -> (search p t) @ (search_list p l')
+;; For example, if you want to search all elements of a certain
+type et , the function search can be
+applied as follows:
+
+
let search_element_type et t =
+ search (fun x -> x # node_type = T_element et) t
+;; Getting attribute values. Suppose we have the declaration:
+
+
<!ATTLIST e a CDATA #REQUIRED
+ b CDATA #IMPLIED
+ c CDATA "12345">
+
+In this case, every element
e must have an attribute
+
a , otherwise the parser would indicate an error. If
+the O'Caml variable
n holds the node of the tree
+corresponding to the element, you can get the value of the attribute
+
a by
+
+
let value_of_a = n # required_string_attribute "a"
+
+which is more or less an abbreviation for
+
+
let value_of_a =
+ match n # attribute "a" with
+ Value s -> s
+ | _ -> assert false
+
+- as the attribute is required, the
attribute method always
+returns a
Value .
In contrast to this, the attribute b can be
+omitted. In this case, the method required_string_attribute
+works only if the attribute is there, and the method will fail if the attribute
+is missing. To get the value, you can apply the method
+optional_string_attribute :
+
+
let value_of_b = n # optional_string_attribute "b"
+
+Now,
value_of_b is of type
string option ,
+and
None represents the omitted attribute. Alternatively,
+you could also use
attribute :
+
+
let value_of_b =
+ match n # attribute "b" with
+ Value s -> Some s
+ | Implied_value -> None
+ | _ -> assert false The attribute c behaves much like
+a , because it has always a value. If the attribute is
+omitted, the default, here "12345", will be returned instead. Because of this,
+you can again use required_string_attribute to get the
+value.
The type CDATA is the most general string
+type. The types NMTOKEN , ID ,
+IDREF , ENTITY , and all enumerators and
+notations are special forms of string types that restrict the possible
+values. From O'Caml, they behave like CDATA , i.e. you can
+use the methods required_string_attribute and
+optional_string_attribute , too.
In contrast to this, the types NMTOKENS ,
+IDREFS , and ENTITIES mean lists of
+strings. Suppose we have the declaration:
+
+
<!ATTLIST f d NMTOKENS #REQUIRED
+ e NMTOKENS #IMPLIED>
+
+The type
NMTOKENS stands for lists of space-separated
+tokens; for example the value
"1 abc 23ef" means the list
+
["1"; "abc"; "23ef"] . (Again,
IDREFS
+and
ENTITIES have more restricted values.) To get the
+value of attribute
d , one can use
+
+
let value_of_d = n # required_list_attribute "d"
+
+or
+
+
let value_of_d =
+ match n # attribute "d" with
+ Valuelist l -> l
+ | _ -> assert false
+
+As
d is required, the attribute cannot be omitted, and
+the
attribute method returns always a
+
Valuelist .
For optional attributes like e , apply
+
+
let value_of_e = n # optional_list_attribute "e"
+
+or
+
+
let value_of_e =
+ match n # attribute "e" with
+ Valuelist l -> l
+ | Implied_value -> []
+ | _ -> assert false
+
+Here, the case that the attribute is missing counts like the empty list.
There are also several iterators in Pxp_document; please see
+the mli file for details. You can find examples for them in the
+"simple_transformation" directory.
+
+
val find : ?deeply:bool ->
+ f:('ext node -> bool) -> 'ext node -> 'ext node
+
+val find_all : ?deeply:bool ->
+ f:('ext node -> bool) -> 'ext node -> 'ext node list
+
+val find_element : ?deeply:bool ->
+ string -> 'ext node -> 'ext node
+
+val find_all_elements : ?deeply:bool ->
+ string -> 'ext node -> 'ext node list
+
+exception Skip
+val map_tree : pre:('exta node -> 'extb node) ->
+ ?post:('extb node -> 'extb node) ->
+ 'exta node ->
+ 'extb node
+
+
+val map_tree_sibl :
+ pre: ('exta node option -> 'exta node -> 'exta node option ->
+ 'extb node) ->
+ ?post:('extb node option -> 'extb node -> 'extb node option ->
+ 'extb node) ->
+ 'exta node ->
+ 'extb node
+
+val iter_tree : ?pre:('ext node -> unit) ->
+ ?post:('ext node -> unit) ->
+ 'ext node ->
+ unit
+
+val iter_tree_sibl :
+ ?pre: ('ext node option -> 'ext node -> 'ext node option -> unit) ->
+ ?post:('ext node option -> 'ext node -> 'ext node option -> unit) ->
+ 'ext node ->
+ unit Prev Home Next The objects representing the document Up The class type extension
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/ps/markup.ps b/helm/DEVEL/pxp/pxp/doc/manual/ps/markup.ps
new file mode 100644
index 000000000..3a98c7964
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/ps/markup.ps
@@ -0,0 +1,8866 @@
+%!PS-Adobe-2.0
+%%Creator: dvips(k) 5.86 Copyright 1999 Radical Eye Software
+%%Pages: 96
+%%PageOrder: Ascend
+%%BoundingBox: 0 0 596 842
+%%DocumentFonts: Helvetica-Bold Times-Roman Times-Bold Times-Italic
+%%+ Courier Courier-Oblique Helvetica-BoldOblique Courier-Bold
+%%DocumentPaperSizes: a4
+%%EndComments
+%DVIPSWebPage: (www.radicaleye.com)
+%DVIPSCommandLine: dvips -f
+%DVIPSParameters: dpi=600, compressed
+%DVIPSSource: TeX output 2000.08.30:1757
+%%BeginProcSet: texc.pro
+%!
+/TeXDict 300 dict def TeXDict begin/N{def}def/B{bind def}N/S{exch}N/X{S
+N}B/A{dup}B/TR{translate}N/isls false N/vsize 11 72 mul N/hsize 8.5 72
+mul N/landplus90{false}def/@rigin{isls{[0 landplus90{1 -1}{-1 1}ifelse 0
+0 0]concat}if 72 Resolution div 72 VResolution div neg scale isls{
+landplus90{VResolution 72 div vsize mul 0 exch}{Resolution -72 div hsize
+mul 0}ifelse TR}if Resolution VResolution vsize -72 div 1 add mul TR[
+matrix currentmatrix{A A round sub abs 0.00001 lt{round}if}forall round
+exch round exch]setmatrix}N/@landscape{/isls true N}B/@manualfeed{
+statusdict/manualfeed true put}B/@copies{/#copies X}B/FMat[1 0 0 -1 0 0]
+N/FBB[0 0 0 0]N/nn 0 N/IEn 0 N/ctr 0 N/df-tail{/nn 8 dict N nn begin
+/FontType 3 N/FontMatrix fntrx N/FontBBox FBB N string/base X array
+/BitMaps X/BuildChar{CharBuilder}N/Encoding IEn N end A{/foo setfont}2
+array copy cvx N load 0 nn put/ctr 0 N[}B/sf 0 N/df{/sf 1 N/fntrx FMat N
+df-tail}B/dfs{div/sf X/fntrx[sf 0 0 sf neg 0 0]N df-tail}B/E{pop nn A
+definefont setfont}B/Cw{Cd A length 5 sub get}B/Ch{Cd A length 4 sub get
+}B/Cx{128 Cd A length 3 sub get sub}B/Cy{Cd A length 2 sub get 127 sub}
+B/Cdx{Cd A length 1 sub get}B/Ci{Cd A type/stringtype ne{ctr get/ctr ctr
+1 add N}if}B/id 0 N/rw 0 N/rc 0 N/gp 0 N/cp 0 N/G 0 N/CharBuilder{save 3
+1 roll S A/base get 2 index get S/BitMaps get S get/Cd X pop/ctr 0 N Cdx
+0 Cx Cy Ch sub Cx Cw add Cy setcachedevice Cw Ch true[1 0 0 -1 -.1 Cx
+sub Cy .1 sub]/id Ci N/rw Cw 7 add 8 idiv string N/rc 0 N/gp 0 N/cp 0 N{
+rc 0 ne{rc 1 sub/rc X rw}{G}ifelse}imagemask restore}B/G{{id gp get/gp
+gp 1 add N A 18 mod S 18 idiv pl S get exec}loop}B/adv{cp add/cp X}B
+/chg{rw cp id gp 4 index getinterval putinterval A gp add/gp X adv}B/nd{
+/cp 0 N rw exit}B/lsh{rw cp 2 copy get A 0 eq{pop 1}{A 255 eq{pop 254}{
+A A add 255 and S 1 and or}ifelse}ifelse put 1 adv}B/rsh{rw cp 2 copy
+get A 0 eq{pop 128}{A 255 eq{pop 127}{A 2 idiv S 128 and or}ifelse}
+ifelse put 1 adv}B/clr{rw cp 2 index string putinterval adv}B/set{rw cp
+fillstr 0 4 index getinterval putinterval adv}B/fillstr 18 string 0 1 17
+{2 copy 255 put pop}for N/pl[{adv 1 chg}{adv 1 chg nd}{1 add chg}{1 add
+chg nd}{adv lsh}{adv lsh nd}{adv rsh}{adv rsh nd}{1 add adv}{/rc X nd}{
+1 add set}{1 add clr}{adv 2 chg}{adv 2 chg nd}{pop nd}]A{bind pop}
+forall N/D{/cc X A type/stringtype ne{]}if nn/base get cc ctr put nn
+/BitMaps get S ctr S sf 1 ne{A A length 1 sub A 2 index S get sf div put
+}if put/ctr ctr 1 add N}B/I{cc 1 add D}B/bop{userdict/bop-hook known{
+bop-hook}if/SI save N @rigin 0 0 moveto/V matrix currentmatrix A 1 get A
+mul exch 0 get A mul add .99 lt{/QV}{/RV}ifelse load def pop pop}N/eop{
+SI restore userdict/eop-hook known{eop-hook}if showpage}N/@start{
+userdict/start-hook known{start-hook}if pop/VResolution X/Resolution X
+1000 div/DVImag X/IEn 256 array N 2 string 0 1 255{IEn S A 360 add 36 4
+index cvrs cvn put}for pop 65781.76 div/vsize X 65781.76 div/hsize X}N
+/p{show}N/RMat[1 0 0 -1 0 0]N/BDot 260 string N/Rx 0 N/Ry 0 N/V{}B/RV/v{
+/Ry X/Rx X V}B statusdict begin/product where{pop false[(Display)(NeXT)
+(LaserWriter 16/600)]{A length product length le{A length product exch 0
+exch getinterval eq{pop true exit}if}{pop}ifelse}forall}{false}ifelse
+end{{gsave TR -.1 .1 TR 1 1 scale Rx Ry false RMat{BDot}imagemask
+grestore}}{{gsave TR -.1 .1 TR Rx Ry scale 1 1 false RMat{BDot}
+imagemask grestore}}ifelse B/QV{gsave newpath transform round exch round
+exch itransform moveto Rx 0 rlineto 0 Ry neg rlineto Rx neg 0 rlineto
+fill grestore}B/a{moveto}B/delta 0 N/tail{A/delta X 0 rmoveto}B/M{S p
+delta add tail}B/b{S p tail}B/c{-4 M}B/d{-3 M}B/e{-2 M}B/f{-1 M}B/g{0 M}
+B/h{1 M}B/i{2 M}B/j{3 M}B/k{4 M}B/w{0 rmoveto}B/l{p -4 w}B/m{p -3 w}B/n{
+p -2 w}B/o{p -1 w}B/q{p 1 w}B/r{p 2 w}B/s{p 3 w}B/t{p 4 w}B/x{0 S
+rmoveto}B/y{3 2 roll p a}B/bos{/SS save N}B/eos{SS restore}B end
+
+%%EndProcSet
+%%BeginProcSet: 8r.enc
+% @@psencodingfile@{
+% author = "S. Rahtz, P. MacKay, Alan Jeffrey, B. Horn, K. Berry",
+% version = "0.6",
+% date = "1 July 1998",
+% filename = "8r.enc",
+% email = "tex-fonts@@tug.org",
+% docstring = "Encoding for TrueType or Type 1 fonts
+% to be used with TeX."
+% @}
+%
+% Idea is to have all the characters normally included in Type 1 fonts
+% available for typesetting. This is effectively the characters in Adobe
+% Standard Encoding + ISO Latin 1 + extra characters from Lucida.
+%
+% Character code assignments were made as follows:
+%
+% (1) the Windows ANSI characters are almost all in their Windows ANSI
+% positions, because some Windows users cannot easily reencode the
+% fonts, and it makes no difference on other systems. The only Windows
+% ANSI characters not available are those that make no sense for
+% typesetting -- rubout (127 decimal), nobreakspace (160), softhyphen
+% (173). quotesingle and grave are moved just because it's such an
+% irritation not having them in TeX positions.
+%
+% (2) Remaining characters are assigned arbitrarily to the lower part
+% of the range, avoiding 0, 10 and 13 in case we meet dumb software.
+%
+% (3) Y&Y Lucida Bright includes some extra text characters; in the
+% hopes that other PostScript fonts, perhaps created for public
+% consumption, will include them, they are included starting at 0x12.
+%
+% (4) Remaining positions left undefined are for use in (hopefully)
+% upward-compatible revisions, if someday more characters are generally
+% available.
+%
+% (5) hyphen appears twice for compatibility with both
+% ASCII and Windows.
+%
+/TeXBase1Encoding [
+% 0x00 (encoded characters from Adobe Standard not in Windows 3.1)
+ /.notdef /dotaccent /fi /fl
+ /fraction /hungarumlaut /Lslash /lslash
+ /ogonek /ring /.notdef
+ /breve /minus /.notdef
+% These are the only two remaining unencoded characters, so may as
+% well include them.
+ /Zcaron /zcaron
+% 0x10
+ /caron /dotlessi
+% (unusual TeX characters available in, e.g., Lucida Bright)
+ /dotlessj /ff /ffi /ffl
+ /.notdef /.notdef /.notdef /.notdef
+ /.notdef /.notdef /.notdef /.notdef
+ % very contentious; it's so painful not having quoteleft and quoteright
+ % at 96 and 145 that we move the things normally found there to here.
+ /grave /quotesingle
+% 0x20 (ASCII begins)
+ /space /exclam /quotedbl /numbersign
+ /dollar /percent /ampersand /quoteright
+ /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash
+% 0x30
+ /zero /one /two /three /four /five /six /seven
+ /eight /nine /colon /semicolon /less /equal /greater /question
+% 0x40
+ /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O
+% 0x50
+ /P /Q /R /S /T /U /V /W
+ /X /Y /Z /bracketleft /backslash /bracketright /asciicircum /underscore
+% 0x60
+ /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o
+% 0x70
+ /p /q /r /s /t /u /v /w
+ /x /y /z /braceleft /bar /braceright /asciitilde
+ /.notdef % rubout; ASCII ends
+% 0x80
+ /.notdef /.notdef /quotesinglbase /florin
+ /quotedblbase /ellipsis /dagger /daggerdbl
+ /circumflex /perthousand /Scaron /guilsinglleft
+ /OE /.notdef /.notdef /.notdef
+% 0x90
+ /.notdef /.notdef /.notdef /quotedblleft
+ /quotedblright /bullet /endash /emdash
+ /tilde /trademark /scaron /guilsinglright
+ /oe /.notdef /.notdef /Ydieresis
+% 0xA0
+ /.notdef % nobreakspace
+ /exclamdown /cent /sterling
+ /currency /yen /brokenbar /section
+ /dieresis /copyright /ordfeminine /guillemotleft
+ /logicalnot
+ /hyphen % Y&Y (also at 45); Windows' softhyphen
+ /registered
+ /macron
+% 0xD0
+ /degree /plusminus /twosuperior /threesuperior
+ /acute /mu /paragraph /periodcentered
+ /cedilla /onesuperior /ordmasculine /guillemotright
+ /onequarter /onehalf /threequarters /questiondown
+% 0xC0
+ /Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /AE /Ccedilla
+ /Egrave /Eacute /Ecircumflex /Edieresis
+ /Igrave /Iacute /Icircumflex /Idieresis
+% 0xD0
+ /Eth /Ntilde /Ograve /Oacute
+ /Ocircumflex /Otilde /Odieresis /multiply
+ /Oslash /Ugrave /Uacute /Ucircumflex
+ /Udieresis /Yacute /Thorn /germandbls
+% 0xE0
+ /agrave /aacute /acircumflex /atilde
+ /adieresis /aring /ae /ccedilla
+ /egrave /eacute /ecircumflex /edieresis
+ /igrave /iacute /icircumflex /idieresis
+% 0xF0
+ /eth /ntilde /ograve /oacute
+ /ocircumflex /otilde /odieresis /divide
+ /oslash /ugrave /uacute /ucircumflex
+ /udieresis /yacute /thorn /ydieresis
+] def
+
+%%EndProcSet
+%%BeginProcSet: texps.pro
+%!
+TeXDict begin/rf{findfont dup length 1 add dict begin{1 index/FID ne 2
+index/UniqueID ne and{def}{pop pop}ifelse}forall[1 index 0 6 -1 roll
+exec 0 exch 5 -1 roll VResolution Resolution div mul neg 0 0]/Metrics
+exch def dict begin Encoding{exch dup type/integertype ne{pop pop 1 sub
+dup 0 le{pop}{[}ifelse}{FontMatrix 0 get div Metrics 0 get div def}
+ifelse}forall Metrics/Metrics currentdict end def[2 index currentdict
+end definefont 3 -1 roll makefont/setfont cvx]cvx def}def/ObliqueSlant{
+dup sin S cos div neg}B/SlantFont{4 index mul add}def/ExtendFont{3 -1
+roll mul exch}def/ReEncodeFont{CharStrings rcheck{/Encoding false def
+dup[exch{dup CharStrings exch known not{pop/.notdef/Encoding true def}
+if}forall Encoding{]exch pop}{cleartomark}ifelse}if/Encoding exch def}
+def end
+
+%%EndProcSet
+%%BeginProcSet: special.pro
+%!
+TeXDict begin/SDict 200 dict N SDict begin/@SpecialDefaults{/hs 612 N
+/vs 792 N/ho 0 N/vo 0 N/hsc 1 N/vsc 1 N/ang 0 N/CLIP 0 N/rwiSeen false N
+/rhiSeen false N/letter{}N/note{}N/a4{}N/legal{}N}B/@scaleunit 100 N
+/@hscale{@scaleunit div/hsc X}B/@vscale{@scaleunit div/vsc X}B/@hsize{
+/hs X/CLIP 1 N}B/@vsize{/vs X/CLIP 1 N}B/@clip{/CLIP 2 N}B/@hoffset{/ho
+X}B/@voffset{/vo X}B/@angle{/ang X}B/@rwi{10 div/rwi X/rwiSeen true N}B
+/@rhi{10 div/rhi X/rhiSeen true N}B/@llx{/llx X}B/@lly{/lly X}B/@urx{
+/urx X}B/@ury{/ury X}B/magscale true def end/@MacSetUp{userdict/md known
+{userdict/md get type/dicttype eq{userdict begin md length 10 add md
+maxlength ge{/md md dup length 20 add dict copy def}if end md begin
+/letter{}N/note{}N/legal{}N/od{txpose 1 0 mtx defaultmatrix dtransform S
+atan/pa X newpath clippath mark{transform{itransform moveto}}{transform{
+itransform lineto}}{6 -2 roll transform 6 -2 roll transform 6 -2 roll
+transform{itransform 6 2 roll itransform 6 2 roll itransform 6 2 roll
+curveto}}{{closepath}}pathforall newpath counttomark array astore/gc xdf
+pop ct 39 0 put 10 fz 0 fs 2 F/|______Courier fnt invertflag{PaintBlack}
+if}N/txpose{pxs pys scale ppr aload pop por{noflips{pop S neg S TR pop 1
+-1 scale}if xflip yflip and{pop S neg S TR 180 rotate 1 -1 scale ppr 3
+get ppr 1 get neg sub neg ppr 2 get ppr 0 get neg sub neg TR}if xflip
+yflip not and{pop S neg S TR pop 180 rotate ppr 3 get ppr 1 get neg sub
+neg 0 TR}if yflip xflip not and{ppr 1 get neg ppr 0 get neg TR}if}{
+noflips{TR pop pop 270 rotate 1 -1 scale}if xflip yflip and{TR pop pop
+90 rotate 1 -1 scale ppr 3 get ppr 1 get neg sub neg ppr 2 get ppr 0 get
+neg sub neg TR}if xflip yflip not and{TR pop pop 90 rotate ppr 3 get ppr
+1 get neg sub neg 0 TR}if yflip xflip not and{TR pop pop 270 rotate ppr
+2 get ppr 0 get neg sub neg 0 S TR}if}ifelse scaleby96{ppr aload pop 4
+-1 roll add 2 div 3 1 roll add 2 div 2 copy TR .96 dup scale neg S neg S
+TR}if}N/cp{pop pop showpage pm restore}N end}if}if}N/normalscale{
+Resolution 72 div VResolution 72 div neg scale magscale{DVImag dup scale
+}if 0 setgray}N/psfts{S 65781.76 div N}N/startTexFig{/psf$SavedState
+save N userdict maxlength dict begin/magscale true def normalscale
+currentpoint TR/psf$ury psfts/psf$urx psfts/psf$lly psfts/psf$llx psfts
+/psf$y psfts/psf$x psfts currentpoint/psf$cy X/psf$cx X/psf$sx psf$x
+psf$urx psf$llx sub div N/psf$sy psf$y psf$ury psf$lly sub div N psf$sx
+psf$sy scale psf$cx psf$sx div psf$llx sub psf$cy psf$sy div psf$ury sub
+TR/showpage{}N/erasepage{}N/copypage{}N/p 3 def @MacSetUp}N/doclip{
+psf$llx psf$lly psf$urx psf$ury currentpoint 6 2 roll newpath 4 copy 4 2
+roll moveto 6 -1 roll S lineto S lineto S lineto closepath clip newpath
+moveto}N/endTexFig{end psf$SavedState restore}N/@beginspecial{SDict
+begin/SpecialSave save N gsave normalscale currentpoint TR
+@SpecialDefaults count/ocount X/dcount countdictstack N}N/@setspecial{
+CLIP 1 eq{newpath 0 0 moveto hs 0 rlineto 0 vs rlineto hs neg 0 rlineto
+closepath clip}if ho vo TR hsc vsc scale ang rotate rwiSeen{rwi urx llx
+sub div rhiSeen{rhi ury lly sub div}{dup}ifelse scale llx neg lly neg TR
+}{rhiSeen{rhi ury lly sub div dup scale llx neg lly neg TR}if}ifelse
+CLIP 2 eq{newpath llx lly moveto urx lly lineto urx ury lineto llx ury
+lineto closepath clip}if/showpage{}N/erasepage{}N/copypage{}N newpath}N
+/@endspecial{count ocount sub{pop}repeat countdictstack dcount sub{end}
+repeat grestore SpecialSave restore end}N/@defspecial{SDict begin}N
+/@fedspecial{end}B/li{lineto}B/rl{rlineto}B/rc{rcurveto}B/np{/SaveX
+currentpoint/SaveY X N 1 setlinecap newpath}N/st{stroke SaveX SaveY
+moveto}N/fil{fill SaveX SaveY moveto}N/ellipse{/endangle X/startangle X
+/yrad X/xrad X/savematrix matrix currentmatrix N TR xrad yrad scale 0 0
+1 startangle endangle arc savematrix setmatrix}N end
+
+%%EndProcSet
+%%BeginProcSet: color.pro
+%!
+TeXDict begin/setcmykcolor where{pop}{/setcmykcolor{dup 10 eq{pop
+setrgbcolor}{1 sub 4 1 roll 3{3 index add neg dup 0 lt{pop 0}if 3 1 roll
+}repeat setrgbcolor pop}ifelse}B}ifelse/TeXcolorcmyk{setcmykcolor}def
+/TeXcolorrgb{setrgbcolor}def/TeXcolorgrey{setgray}def/TeXcolorgray{
+setgray}def/TeXcolorhsb{sethsbcolor}def/currentcmykcolor where{pop}{
+/currentcmykcolor{currentrgbcolor 10}B}ifelse/DC{exch dup userdict exch
+known{pop pop}{X}ifelse}B/GreenYellow{0.15 0 0.69 0 setcmykcolor}DC
+/Yellow{0 0 1 0 setcmykcolor}DC/Goldenrod{0 0.10 0.84 0 setcmykcolor}DC
+/Dandelion{0 0.29 0.84 0 setcmykcolor}DC/Apricot{0 0.32 0.52 0
+setcmykcolor}DC/Peach{0 0.50 0.70 0 setcmykcolor}DC/Melon{0 0.46 0.50 0
+setcmykcolor}DC/YellowOrange{0 0.42 1 0 setcmykcolor}DC/Orange{0 0.61
+0.87 0 setcmykcolor}DC/BurntOrange{0 0.51 1 0 setcmykcolor}DC
+/Bittersweet{0 0.75 1 0.24 setcmykcolor}DC/RedOrange{0 0.77 0.87 0
+setcmykcolor}DC/Mahogany{0 0.85 0.87 0.35 setcmykcolor}DC/Maroon{0 0.87
+0.68 0.32 setcmykcolor}DC/BrickRed{0 0.89 0.94 0.28 setcmykcolor}DC/Red{
+0 1 1 0 setcmykcolor}DC/OrangeRed{0 1 0.50 0 setcmykcolor}DC/RubineRed{
+0 1 0.13 0 setcmykcolor}DC/WildStrawberry{0 0.96 0.39 0 setcmykcolor}DC
+/Salmon{0 0.53 0.38 0 setcmykcolor}DC/CarnationPink{0 0.63 0 0
+setcmykcolor}DC/Magenta{0 1 0 0 setcmykcolor}DC/VioletRed{0 0.81 0 0
+setcmykcolor}DC/Rhodamine{0 0.82 0 0 setcmykcolor}DC/Mulberry{0.34 0.90
+0 0.02 setcmykcolor}DC/RedViolet{0.07 0.90 0 0.34 setcmykcolor}DC
+/Fuchsia{0.47 0.91 0 0.08 setcmykcolor}DC/Lavender{0 0.48 0 0
+setcmykcolor}DC/Thistle{0.12 0.59 0 0 setcmykcolor}DC/Orchid{0.32 0.64 0
+0 setcmykcolor}DC/DarkOrchid{0.40 0.80 0.20 0 setcmykcolor}DC/Purple{
+0.45 0.86 0 0 setcmykcolor}DC/Plum{0.50 1 0 0 setcmykcolor}DC/Violet{
+0.79 0.88 0 0 setcmykcolor}DC/RoyalPurple{0.75 0.90 0 0 setcmykcolor}DC
+/BlueViolet{0.86 0.91 0 0.04 setcmykcolor}DC/Periwinkle{0.57 0.55 0 0
+setcmykcolor}DC/CadetBlue{0.62 0.57 0.23 0 setcmykcolor}DC
+/CornflowerBlue{0.65 0.13 0 0 setcmykcolor}DC/MidnightBlue{0.98 0.13 0
+0.43 setcmykcolor}DC/NavyBlue{0.94 0.54 0 0 setcmykcolor}DC/RoyalBlue{1
+0.50 0 0 setcmykcolor}DC/Blue{1 1 0 0 setcmykcolor}DC/Cerulean{0.94 0.11
+0 0 setcmykcolor}DC/Cyan{1 0 0 0 setcmykcolor}DC/ProcessBlue{0.96 0 0 0
+setcmykcolor}DC/SkyBlue{0.62 0 0.12 0 setcmykcolor}DC/Turquoise{0.85 0
+0.20 0 setcmykcolor}DC/TealBlue{0.86 0 0.34 0.02 setcmykcolor}DC
+/Aquamarine{0.82 0 0.30 0 setcmykcolor}DC/BlueGreen{0.85 0 0.33 0
+setcmykcolor}DC/Emerald{1 0 0.50 0 setcmykcolor}DC/JungleGreen{0.99 0
+0.52 0 setcmykcolor}DC/SeaGreen{0.69 0 0.50 0 setcmykcolor}DC/Green{1 0
+1 0 setcmykcolor}DC/ForestGreen{0.91 0 0.88 0.12 setcmykcolor}DC
+/PineGreen{0.92 0 0.59 0.25 setcmykcolor}DC/LimeGreen{0.50 0 1 0
+setcmykcolor}DC/YellowGreen{0.44 0 0.74 0 setcmykcolor}DC/SpringGreen{
+0.26 0 0.76 0 setcmykcolor}DC/OliveGreen{0.64 0 0.95 0.40 setcmykcolor}
+DC/RawSienna{0 0.72 1 0.45 setcmykcolor}DC/Sepia{0 0.83 1 0.70
+setcmykcolor}DC/Brown{0 0.81 1 0.60 setcmykcolor}DC/Tan{0.14 0.42 0.56 0
+setcmykcolor}DC/Gray{0 0 0 0.50 setcmykcolor}DC/Black{0 0 0 1
+setcmykcolor}DC/White{0 0 0 0 setcmykcolor}DC end
+
+%%EndProcSet
+TeXDict begin 39158280 55380996 1000 600 600 () @start
+/Fa 106[21 149[{TeXBase1Encoding ReEncodeFont}1 59.7758
+/Times-Roman rf /Fb 135[77 2[77 77 77 3[77 77 77 3[77
+3[77 77 77 99[{TeXBase1Encoding ReEncodeFont}11 129.116
+/Courier-Bold rf /Fc 134[65 65 2[65 65 65 65 1[65 65
+65 65 65 2[65 65 65 65 65 65 65 65 65 1[65 36[65 6[65
+65 65 49[{TeXBase1Encoding ReEncodeFont}25 107.597 /Courier-Bold
+rf /Fd 141[56 4[128 7[80 88 2[80 97[{TeXBase1Encoding ReEncodeFont}5
+143.462 /Helvetica-BoldOblique rf /Fe 147[21 4[37 1[33
+3[37 23[25 14[25 58[{TeXBase1Encoding ReEncodeFont}6
+74.7198 /Times-Italic rf /Ff 204[25 25 25 49[{
+TeXBase1Encoding ReEncodeFont}3 49.8132 /Times-Roman
+rf
+%DVIPSBitmapFont: Fg cmmi8 8 2
+/Fg 2 63 df60
+D<12E012F812FEEA3F80EA0FE0EA03F8EA00FEEB3F80EB0FE0EB03F8EB00FC143FEC0FC0
+EC07F0EC01FCEC007FED1FC0ED07F0ED01FCED007FEE1FC01607161FEE7F00ED01FCED07
+F0ED1FC0037FC7FCEC01FCEC07F0EC0FC0023FC8FC14FCEB03F8EB0FE0EB3F8001FEC9FC
+EA03F8EA0FE0EA3F8000FECAFC12F812E02A2B7AA537>62 D E
+%EndDVIPSBitmapFont
+/Fh 131[40 1[40 40 40 40 40 40 40 40 40 40 40 40 40 40
+40 40 1[40 40 40 1[40 40 40 40 40 1[40 5[40 3[40 40 40
+40 40 40 40 40 40 40 40 1[40 40 40 1[40 40 40 40 40 1[40
+40 40 40 40 40 1[40 4[40 1[40 1[40 40 40 40 40 40 40
+40 40 40 40 1[40 40 40 33[{TeXBase1Encoding ReEncodeFont}69
+67.2479 /Courier rf /Fi 105[37 28[37 37 54 37 37 21 29
+25 37 37 37 37 58 21 37 1[21 37 37 25 33 37 33 37 33
+7[54 54 3[46 5[54 66 46 2[25 2[42 2[50 50 54 5[21 21
+11[19 1[19 2[25 25 25 4[30 31[42 2[{TeXBase1Encoding ReEncodeFont}45
+74.7198 /Times-Roman rf /Fj 135[55 7[61 2[89 28 6[55
+3[55 27[66 69[{TeXBase1Encoding ReEncodeFont}7 99.6264
+/Helvetica-Bold rf /Fk 145[27 2[27 57[27 49[{
+TeXBase1Encoding ReEncodeFont}3 44.8318 /Courier-Oblique
+rf /Fl 135[50 3[50 50 3[50 50 3[50 50 3[50 1[50 50 2[50
+95[{TeXBase1Encoding ReEncodeFont}11 83.022 /Courier-Oblique
+rf
+%DVIPSBitmapFont: Fm cmmi10 10 2
+/Fm 2 63 df60
+D<126012FCB4FCEA7FC0EA1FF0EA07FCEA01FF38007FC0EB1FF0EB07FCEB01FF9038007F
+C0EC1FF0EC07FCEC01FF9138007FC0ED1FF0ED07FCED01FF9238007FC0EE1FF0EE07FCEE
+01FF9338007F80EF1FC0A2EF7F80933801FF00EE07FCEE1FF0EE7FC04B48C7FCED07FCED
+1FF0ED7FC04A48C8FCEC07FCEC1FF0EC7FC04948C9FCEB07FCEB1FF0EB7FC04848CAFCEA
+07FCEA3FF0EA7FC048CBFC12FC1270323279AD41>62 D E
+%EndDVIPSBitmapFont
+/Fn 134[45 45 1[45 45 45 45 45 1[45 45 45 45 45 1[45
+45 45 45 45 45 45 45 45 45 1[45 5[45 2[45 8[45 5[45 2[45
+45 1[45 19[45 45 44[{TeXBase1Encoding ReEncodeFont}32
+74.7198 /Courier-Oblique rf
+%DVIPSBitmapFont: Fo cmmi9 9 2
+/Fo 2 63 df<171C177EEE01FEEE07FCEE1FF0EE7FC0923801FF00ED07FCED1FF0ED7FC0
+4A48C7FCEC07FCEC1FF0EC7FC04948C8FCEB07FCEB1FF0EB7FC04848C9FCEA07FCEA1FF0
+EA7FC048CAFCA2EA7FC0EA1FF0EA07FCEA01FF38007FC0EB1FF0EB07FCEB01FF9038007F
+C0EC1FF0EC07FCEC01FF9138007FC0ED1FF0ED07FCED01FF9238007FC0EE1FF0EE07FCEE
+01FEEE007E171C2F2E7AA93C>60 D<127012FCB4FCEA7FC0EA1FF0EA07FCEA01FF38007F
+C0EB1FF0EB07FCEB01FF9038007FC0EC1FF0EC07FCEC01FF9138007FC0ED1FF0ED07FCED
+01FF9238007FC0EE1FF0EE07FCEE01FEA2EE07FCEE1FF0EE7FC0923801FF00ED07FCED1F
+F0ED7FC04A48C7FCEC07FCEC1FF0EC7FC04948C8FCEB07FCEB1FF0EB7FC04848C9FCEA07
+FCEA1FF0EA7FC048CAFC12FC12702F2E7AA93C>62 D E
+%EndDVIPSBitmapFont
+/Fp 134[66 66 93 66 73 40 66 47 1[73 73 73 106 33 2[33
+73 73 40 66 73 66 73 66 8[80 113 80 86 73 80 86 1[80
+1[86 100 73 2[33 86 1[73 80 86 86 1[86 1[73 5[66 66 66
+66 66 66 66 66 66 66 1[33 40 33 2[40 40 5[57 31[73 2[{
+TeXBase1Encoding ReEncodeFont}58 119.552 /Helvetica-Bold
+rf /Fq 129[45 45 45 45 45 45 45 45 45 45 45 45 45 45
+45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45
+45 45 45 45 1[45 45 45 45 45 45 45 45 45 45 45 45 45
+45 45 1[45 45 45 45 45 45 45 45 45 45 45 45 45 45 45
+45 45 45 45 1[45 45 45 45 45 45 45 45 45 45 45 45 45
+45 45 45 45 1[45 45 45 33[{TeXBase1Encoding ReEncodeFont}90
+74.7198 /Courier rf /Fr 134[37 37 55 37 42 23 32 32 1[42
+42 42 60 23 37 23 23 42 42 23 37 42 37 42 42 1[42 6[51
+69 1[60 46 42 2[51 1[55 69 46 2[28 3[51 60 55 1[51 1[42
+4[28 42 42 42 42 42 42 42 42 42 42 1[21 28 21 2[28 28
+6[28 30[42 2[{TeXBase1Encoding ReEncodeFont}58 83.022
+/Times-Italic rf /Fs 138[105 57 96 67 1[105 105 105 153
+48 1[48 48 105 105 57 96 105 96 105 96 8[115 163 1[124
+105 3[115 2[143 105 5[105 2[124 3[105 10[96 96 96 96
+2[48 43[105 2[{TeXBase1Encoding ReEncodeFont}35 172.154
+/Helvetica-Bold rf /Ft 106[23 29 29 25[33 33 48 33 33
+18 26 22 1[33 33 33 52 18 33 18 18 33 33 22 29 33 29
+33 29 8[48 3[41 37 2[37 6[22 1[48 12[18 10[18 17 1[17
+2[22 22 5[27 31[37 2[{TeXBase1Encoding ReEncodeFont}41
+66.4176 /Times-Roman rf /Fu 134[42 42 60 42 46 28 32
+37 1[46 42 46 69 23 46 1[23 46 42 28 37 46 37 46 42 9[83
+60 60 55 46 60 3[60 78 55 2[32 65 65 51 55 60 60 55 60
+1[42 6[42 1[42 42 42 42 42 42 2[21 28 21 4[28 39[{
+TeXBase1Encoding ReEncodeFont}53 83.022 /Times-Bold rf
+/Fv 27[37 58[63 42[45 40 1[40 37 42 42 60 42 42 23 32
+28 42 42 42 42 65 23 42 23 23 42 42 28 37 42 37 42 37
+28 42 1[28 23 28 1[60 60 78 60 60 51 46 55 60 46 60 60
+74 51 60 1[28 60 60 46 51 60 55 55 60 1[37 47 47 47 23
+23 42 42 42 42 42 42 42 42 42 42 23 21 28 21 2[28 28
+28 65 69 1[42 34 28 29[46 46 2[{TeXBase1Encoding ReEncodeFont}90
+83.022 /Times-Roman rf /Fw 136[65 1[51 1[46 32 2[51 51
+1[23 2[23 51 51 1[46 51 2[46 8[55 3[51 3[55 11[60 9[28
+18[23 39[{TeXBase1Encoding ReEncodeFont}19 83.022 /Helvetica-Bold
+rf /Fx 134[80 80 112 80 88 48 80 56 1[88 88 88 128 40
+80 1[40 88 88 48 80 88 80 88 80 8[96 1[96 104 88 96 104
+2[112 104 120 88 2[40 104 112 1[96 104 104 1[104 6[48
+4[80 80 80 80 80 2[40 48 45[{TeXBase1Encoding ReEncodeFont}48
+143.462 /Helvetica-Bold rf /Fy 138[126 1[115 80 8[57
+126 126 1[115 126 11[138 2[149 126 3[138 6[57 26[57 6[57
+39[{TeXBase1Encoding ReEncodeFont}15 206.584 /Helvetica-Bold
+rf end
+%%EndProlog
+%%BeginSetup
+%%Feature: *Resolution 600dpi
+TeXDict begin
+%%BeginPaperSize: a4
+a4
+%%EndPaperSize
+
+%%EndSetup
+%%Page: 1 1
+1 0 bop Black Black 890 647 a Fy(The)58 b(PXP)f(user')-12
+b(s)58 b(guide)1384 2594 y Fx(Ger)m(d)39 b(Stolpmann)p
+Black Black eop
+%%Page: 2 2
+2 1 bop Black Black -2 579 a Fw(The)22 b(PXP)j(user')-5
+b(s)23 b(guide)-2 687 y Fv(by)d(Gerd)f(Stolpmann)-2 903
+y(Cop)o(yright)f(\251)j(1999,)e(2000)g(by)g(Gerd)h(Stolpmann)-2
+1135 y(PXP)h(is)g(a)g(v)n(alidating)d(parser)i(for)f(XML-1.0)g(which)h
+(has)g(been)g(written)g(entirely)f(in)h(Objecti)n(v)o(e)g(Caml.)-2
+1285 y Fw(Do)o(wnload)h(PXP:)j Fv(The)c(free)g(PXP)h(library)e(can)h
+(be)g(do)n(wnloaded)d(at)k(http://www)-5 b(.ocaml-programming)o(.de)o
+(/pack)o(age)o(s/.)15 b(This)-2 1393 y(user')-5 b(s)20
+b(guide)f(is)j(included.)c(Ne)n(west)j(releases)f(of)g(PXP)h(will)g(be)
+f(announced)e(in)i(The)g(OCaml)g(Link)g(Database)-2 1500
+y(\(http://www)-5 b(.npc.de/ocaml/linkdb)o(/\).)-2 1899
+y Fu(License)-2 2090 y Ft(This)16 b(document,)j(and)e(the)h(described)h
+(softw)o(are,)f("PXP",)e(are)i(cop)o(yright)i(by)d(Gerd)g(Stolpmann.)-2
+2198 y(Permission)h(is)e(hereby)j(granted,)f(free)g(of)f(char)o(ge,)h
+(to)f(an)o(y)h(person)f(obtaining)j(a)d(cop)o(y)h(of)f(this)h(document)
+g(and)g(the)f("PXP")g(softw)o(are)i(\(the)f("Softw)o(are"\),)g(to)f
+(deal)i(in)-2 2306 y(the)f(Softw)o(are)g(without)h(restriction,)g
+(including)h(without)e(limitation)i(the)e(rights)g(to)f(use,)g(cop)o(y)
+l(,)g(modify)l(,)g(mer)o(ge,)g(publish,)h(distrib)o(ute,)h(sublicense,)
+g(and/or)f(sell)-2 2414 y(copies)g(of)f(the)h(Softw)o(are,)g(and)g(to)f
+(permit)h(persons)f(to)h(whom)e(the)i(Softw)o(are)h(is)e(furnished)h
+(to)f(do)g(so,)g(subject)h(to)g(the)f(follo)n(wing)j(conditions:)-2
+2522 y(The)d(abo)o(v)o(e)h(cop)o(yright)h(notice)g(and)f(this)f
+(permission)h(notice)h(shall)f(be)g(included)h(in)e(all)h(copies)h(or)e
+(substantial)i(portions)g(of)e(the)g(Softw)o(are.)-2
+2630 y(The)g(Softw)o(are)h(is)f(pro)o(vided)i(\223as)e(is\224,)g
+(without)i(w)o(arranty)g(of)e(an)o(y)g(kind,)h(e)o(xpress)f(or)g
+(implied,)i(including)g(b)o(ut)e(not)h(limited)h(to)e(the)h(w)o
+(arranties)h(of)e(merchantability)l(,)-2 2737 y(\002tness)g(for)g(a)g
+(particular)j(purpose)e(and)g(noninfringement.)i(In)d(no)g(e)n(v)o(ent)
+h(shall)h(Gerd)e(Stolpmann)h(be)g(liable)h(for)e(an)o(y)g(claim,)h
+(damages)g(or)f(other)h(liability)l(,)i(whether)-2 2845
+y(in)d(an)g(action)i(of)e(contract,)i(tort)f(or)f(otherwise,)i(arising)
+f(from,)e(out)i(of)f(or)g(in)g(connection)j(with)e(the)f(Softw)o(are)i
+(or)e(the)h(use)f(or)g(other)h(dealings)h(in)e(the)h(softw)o(are.)p
+Black Black eop
+%%Page: 3 3
+3 2 bop Black Black -2 621 a Fs(T)-14 b(ab)n(le)48 b(of)g(Contents)396
+815 y Fu(I.)21 b(User')m(s)g(guide)p Black 4 w(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black 4 w(6)596
+943 y Fv(1.)f(What)g(is)h(XML?)p Black 4 w(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black 4 w(7)795 1051
+y(1.1.)e(Introduction)p Black 14 w(.)p Black Black -1
+w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black 4 w(7)994 1159
+y(1.1.1.)g(The)g("hello)h(w)o(orld")g(e)o(xample)p Black
+13 w(.)p Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black 4 w(7)994 1267 y(1.1.2.)f(XML)h(parsers)g(and)f
+(processors)p Black 3 w(.)p Black Black -2 w(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black 4 w(9)994 1375 y(1.1.3.)g(Discussion)p
+Black 9 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+4 w(9)795 1483 y(1.2.)g(Highlights)g(of)h(XML)p Black
+10 w(.)p Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(11)994
+1591 y(1.2.1.)f(The)g(DTD)i(and)e(the)i(instance)p Black
+15 w(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(11)994 1699 y(1.2.2.)e(Reserv)o(ed)g(characters)p
+Black 19 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(12)994 1807 y(1.2.3.)g(Elements)g(and)h
+(ELEMENT)f(declarations)p Black 7 w(.)p Black Black -2
+w(.)p Black Black(.)p Black Black(.)p Black Black -1
+w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(13)994
+1915 y(1.2.4.)g(Attrib)n(ute)g(lists)j(and)e(A)-9 b(TTLIST)19
+b(declarations)p Black 6 w(.)p Black Black -2 w(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(15)994 2023 y(1.2.5.)g(P)o(arsed)g(entities)p
+Black 18 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black 4 w(16)994 2131 y(1.2.6.)g(Notations)g(and)h
+(unparsed)e(entities)p Black 14 w(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(19)795 2238 y(1.3.)h(A)i(complete)e(e)o(xample:)g
+(The)h Fr(r)m(eadme)f Fv(DTD)p Black 3 w(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(20)596 2346 y(2.)h(Using)g(PXP)p Black
+6 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(24)795 2454 y(2.1.)f(V)-9 b(alidation)p
+Black 3 w(.)p Black Black -2 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(24)795
+2562 y(2.2.)19 b(Ho)n(w)h(to)g(parse)g(a)h(document)d(from)h(an)h
+(application)p Black 10 w(.)p Black Black -2 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(24)795
+2670 y(2.3.)f(Class-based)h(processing)f(of)h(the)g(node)g(tree)p
+Black 8 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(29)795
+2778 y(2.4.)f(Example:)g(An)h(HTML)g(back)o(end)f(for)g(the)i
+Fr(r)m(eadme)e Fv(DTD)p Black 3 w(.)p Black Black -1
+w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(33)994 2886 y(2.4.1.)g(Header)p
+Black 9 w(.)p Black Black -2 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(33)994 2994 y(2.4.2.)g(T)-7 b(ype)19
+b(declarations)p Black 14 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(33)994 3102 y(2.4.3.)g(Class)i Fq(store)p Black
+11 w Fv(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black 4 w(34)994 3210 y(2.4.4.)e(Function)g
+Fq(escape_html)p Black Fv(.)p Black Black -2 w(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(35)994 3318 y(2.4.5.)g(V)-5 b(irtual)20 b(class)h
+Fq(shared)p Black 4 w Fv(.)p Black Black -2 w(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(35)994 3426 y(2.4.6.)e(Class)i
+Fq(only_data)p Black 17 w Fv(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(36)994
+3534 y(2.4.7.)e(Class)i Fq(readme)p Black 8 w Fv(.)p
+Black Black -1 w(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(36)994 3642 y(2.4.8.)e(Classes)i
+Fq(section)p Fv(,)f Fq(sect1)p Fv(,)f Fq(sect2)p Fv(,)h(and)g
+Fq(sect3)p Black 13 w Fv(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(39)994 3749 y(2.4.9.)f(Classes)i
+Fq(map_tag)p Fv(,)f Fq(p)p Fv(,)g Fq(em)p Fv(,)g Fq(ul)p
+Fv(,)g Fq(li)p Black 16 w Fv(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(39)994
+3857 y(2.4.10.)e(Class)k Fq(br)p Black Fv(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(40)994 3965 y(2.4.11.)c(Class)k
+Fq(code)p Black 13 w Fv(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(40)994 4073 y(2.4.12.)c(Class)k
+Fq(a)p Black 4 w Fv(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(41)994
+4181 y(2.4.13.)c(Class)k Fq(footnote)p Black 1 w Fv(.)p
+Black Black -2 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black 4 w(42)994 4289
+y(2.4.14.)c(The)i(speci\002cation)f(of)h(the)g(document)f(model)p
+Black 12 w(.)p Black Black -2 w(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black 4 w(43)596 4397 y(3.)h(The)f(objects)h
+(representing)e(the)j(document)p Black 4 w(.)p Black
+Black -3 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(46)795 4505 y(3.1.)e(The)h Fq(document)f Fv(class)p
+Black 7 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(46)795 4613 y(3.2.)g(The)h(class)h(type)f
+Fq(node)p Black 2 w Fv(.)p Black Black -2 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(47)994 4721 y(3.2.1.)f(The)g(structure)h(of)g(document)e(trees)p
+Black 3 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(49)994
+4829 y(3.2.2.)h(The)g(methods)h(of)f(the)i(class)g(type)f
+Fq(node)p Black 13 w Fv(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black 4 w(52)p Black 3842
+5278 a Fr(3)p Black eop
+%%Page: 4 4
+4 3 bop Black Black 994 579 a Fv(3.2.3.)19 b(The)g(class)j
+Fq(element_impl)p Black 2 w Fv(.)p Black Black -3 w(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(56)994 687 y(3.2.4.)d(The)g(class)j Fq(data_impl)p
+Black 12 w Fv(.)p Black Black -2 w(.)p Black Black -1
+w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(57)994
+795 y(3.2.5.)d(The)g(type)h Fq(spec)p Black 5 w Fv(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(58)994 903 y(3.2.6.)f(Examples)p Black
+5 w(.)p Black Black -3 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(60)994
+1011 y(3.2.7.)g(Iterators)p Black 12 w(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(64)795 1119 y(3.3.)g(The)h(class)h(type)f Fq(extension)p
+Black 6 w Fv(.)p Black Black -2 w(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black 4 w(65)994 1226 y(3.3.1.)f(Ho)n(w)h(to)g(de\002ne)
+g(an)g(e)o(xtension)f(class)p Black 13 w(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black 4 w(66)994 1334
+y(3.3.2.)g(Ho)n(w)h(to)g(bind)f(e)o(xtension)g(classes)i(to)g(element)e
+(types)p Black 10 w(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(68)795 1442 y(3.4.)g(Details)i(of)f(the)g(mapping)e(from)i(XML)g
+(te)o(xt)g(to)g(the)g(tree)h(representation)p Black 13
+w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(69)994
+1550 y(3.4.1.)e(The)g(representation)g(of)g(character)n(-free)f
+(elements)p Black 9 w(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(69)994 1658 y(3.4.2.)h(The)g(representation)g(of)g(character)g
+(data)p Black 10 w(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black 4 w(70)994 1766
+y(3.4.3.)g(The)g(representation)g(of)g(entities)i(within)f(documents)p
+Black 12 w(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black 4 w(70)994 1874 y(3.4.4.)f(The)g(representation)g
+(of)g(attrib)n(utes)p Black 20 w(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(71)994 1982 y(3.4.5.)g(The)g(representation)g(of)g
+(processing)g(instructions)p Black(.)p Black Black -1
+w(.)p Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(71)994 2090 y(3.4.6.)g(The)g
+(representation)g(of)g(comments)p Black 7 w(.)p Black
+Black -1 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(71)994 2198 y(3.4.7.)g(The)g(attrib)n(utes)i
+Fq(xml:lang)e Fv(and)h Fq(xml:space)p Black 10 w Fv(.)p
+Black Black -2 w(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(72)994 2306 y(3.4.8.)f(And)g(what)h(about)g(namespaces?)p
+Black 12 w(.)p Black Black -2 w(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(72)596
+2414 y(4.)g(Con\002guring)e(and)h(calling)h(the)g(parser)p
+Black 11 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(73)795 2522 y(4.1.)f(Ov)o(ervie)n(w)p
+Black 19 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(73)795
+2630 y(4.2.)g(Resolv)o(ers)h(and)g(sources)p Black 2
+w(.)p Black Black -1 w(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black 4 w(75)994 2737
+y(4.2.1.)f(Using)h(the)g(b)n(uilt-in)f(resolv)o(ers)h(\(called)f
+(sources\))p Black 5 w(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(75)994 2845 y(4.2.2.)g(The)g(resolv)o(er)g(API)p
+Black 11 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(76)994 2953 y(4.2.3.)g(Prede\002ned)f(resolv)o(er)h
+(components)p Black 13 w(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black 4 w(78)795 3061
+y(4.3.)g(The)h(DTD)g(classes)p Black 1 w(.)p Black Black
+1 w(.)p Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(81)795
+3169 y(4.4.)f(In)m(v)n(oking)f(the)i(parser)p Black 14
+w(.)p Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(89)994
+3277 y(4.4.1.)f(Def)o(aults)p Black 10 w(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(89)994 3385 y(4.4.2.)g(P)o(arsing)g(functions)p
+Black 4 w(.)p Black Black -3 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(90)994 3493 y(4.4.3.)g(Con\002guration)f(options)p
+Black 19 w(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(91)994 3601 y(4.4.4.)h(Which)h
+(con\002guration)d(should)i(I)i(use?)p Black 18 w(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(93)795 3709 y(4.5.)e(Updates)p Black 10 w(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(95)p Black 3842 5278 a
+Fr(4)p Black eop
+%%Page: 5 5
+5 4 bop Black Black -2 621 a Fs(List)48 b(of)g(Figures)396
+815 y Fv(3-1.)19 b(A)i(tree)f(with)h(element)e(nodes,)h(data)g(nodes,)f
+(and)g(attrib)n(utes)p Black 18 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(49)396 923 y(3-2.)g(Nodes)h(are)g(doubly)f(link)o
+(ed)g(trees)p Black 15 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(50)396
+1031 y(3-3.)g(A)i(node)e(can)h(only)g(be)g(added)f(if)h(it)h(is)g(a)g
+(root)p Black 5 w(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black 4 w(51)396 1139 y(3-4.)e(A)i(deleted)f(node)f
+(becomes)g(the)h(root)g(of)g(the)g(subtree)p Black 3
+w(.)p Black Black -1 w(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(51)396 1247 y(3-5.)f(The)h(clone)g(of)g(a)g(subtree)p
+Black 18 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(52)396 1355 y(3-6.)f(The)h(structure)g
+(of)f(nodes)h(and)g(e)o(xtensions)p Black 18 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(65)p Black 3842 5278 a Fr(5)p Black eop
+%%Page: 6 6
+6 5 bop Black Black 1241 647 a Fy(I.)58 b(User')-12 b(s)57
+b(guide)p Black Black eop
+%%Page: 7 7
+7 6 bop Black Black -2 621 a Fs(Chapter)48 b(1.)f(What)h(is)f(XML?)-2
+1055 y Fx(1.1.)39 b(Intr)m(oduction)396 1235 y Fv(XML)20
+b(\(short)g(for)f Fr(Extensible)h(Markup)g(Langua)o(g)o(e)p
+Fv(\))e(generalizes)h(the)h(idea)g(that)g(te)o(xt)g(documents)f(are)h
+(typically)396 1343 y(structured)f(in)h(sections,)g(sub-sections,)f
+(paragraphs,)f(and)i(so)g(on.)g(The)g(format)f(of)h(the)g(document)e
+(is)j(not)f(\002x)o(ed)g(\(as,)396 1451 y(for)g(e)o(xample,)e(in)j
+(HTML\),)e(b)n(ut)h(can)g(be)g(declared)f(by)h(a)h(so-called)e(DTD)i
+(\(document)c(type)j(de\002nition\).)f(The)g(DTD)396
+1559 y(describes)h(only)f(the)i(rules)f(ho)n(w)f(the)i(document)d(can)i
+(be)g(structured,)e(b)n(ut)j(not)e(ho)n(w)h(the)g(document)e(can)i(be)
+396 1667 y(processed.)f(F)o(or)h(e)o(xample,)e(if)j(you)e(w)o(ant)i(to)
+f(publish)f(a)i(book)e(that)h(uses)h(XML)f(markup,)e(you)h(will)i(need)
+f(a)g(processor)396 1775 y(that)h(con)m(v)o(erts)d(the)i(XML)g(\002le)h
+(into)f(a)h(printable)e(format)g(such)h(as)h(Postscript.)f(On)g(the)g
+(one)g(hand,)f(the)h(structure)f(of)396 1883 y(XML)h(documents)f(is)i
+(con\002gurable;)d(on)i(the)g(other)f(hand,)g(there)h(is)h(no)f(longer)
+f(a)h(canonical)f(interpretation)f(of)i(the)396 1991
+y(elements)g(of)g(the)g(document;)f(for)g(e)o(xample)g(one)h(XML)g(DTD)
+g(might)g(w)o(ant)g(that)g(paragraphes)e(are)i(delimited)g(by)396
+2099 y Fq(para)g Fv(tags,)h(and)e(another)g(DTD)h(e)o(xpects)g
+Fq(p)g Fv(tags)h(for)e(the)i(same)f(purpose.)e(As)j(a)g(result,)f(for)g
+(e)n(v)o(ery)e(DTD)j(a)f(ne)n(w)396 2206 y(processor)f(is)i(required.)
+396 2356 y(Although)e(XML)h(can)g(be)g(used)g(to)g(e)o(xpress)g
+(structured)f(te)o(xt)h(documents)e(it)j(is)g(not)f(limited)g(to)g
+(this)h(kind)e(of)396 2464 y(application.)g(F)o(or)h(e)o(xample,)e(XML)
+i(can)g(also)h(be)f(used)g(to)g(e)o(xchange)e(structured)h(data)h(o)o
+(v)o(er)f(a)h(netw)o(ork,)f(or)h(to)396 2572 y(simply)g(store)g
+(structured)f(data)h(in)g(\002les.)h(Note)f(that)h(XML)f(documents)e
+(cannot)i(contain)f(arbitrary)f(binary)h(data)396 2680
+y(because)g(some)g(characters)g(are)g(forbidden;)e(for)i(some)g
+(applications)g(you)f(need)h(to)h(encode)e(binary)g(data)h(as)h(te)o
+(xt)g(\(e.g.)396 2788 y(the)g(base)h(64)f(encoding\).)-2
+3116 y Fp(1.1.1.)35 b(The)f("hello)g(w)n(orld")e(e)n(xample)396
+3283 y Fv(The)20 b(follo)n(wing)f(e)o(xample)f(sho)n(ws)j(a)f(v)o(ery)f
+(simple)i(DTD,)f(and)f(a)i(corresponding)c(document)h(instance.)h(The)
+396 3391 y(document)f(is)k(structured)c(such)i(that)h(it)f(consists)h
+(of)f(sections,)g(and)g(that)g(sections)g(consist)h(of)f(paragraphs,)d
+(and)j(that)396 3499 y(paragraphs)e(contain)h(plain)h(te)o(xt:)396
+3679 y Fq()396
+3777 y()396 3874
+y()396 4065 y Fv(The)20
+b(follo)n(wing)f(document)f(is)j(an)f(instance)g(of)g(this)h(DTD:)396
+4245 y Fq()396
+4342 y()396
+4439 y()486 4536 y()576 4633 y(This)e(is)
+i(a)h(paragraph)e(of)i(the)f(first)g(section. )576
+4731 y(This)e(is)i(another)g(paragraph)f(of)i(the)f(first)g
+(section. )486 4828 y( )p Black 3839
+5278 a Fr(7)p Black eop
+%%Page: 8 8
+8 7 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 486 579 a Fq()576 676 y(This)42
+b(is)i(the)h(only)f(paragraph)f(of)i(the)f(second)g
+(section. )486 773 y( )396 870 y( )396
+1061 y Fv(As)21 b(in)g(HTML)f(\(and,)f(of)h(course,)f(in)h(grand-f)o
+(ather)d(SGML\),)j(the)g("pieces")g(of)g(the)g(document)f(are)h
+(delimited)f(by)396 1169 y(element)h(braces,)f(i.e.)i(such)f(a)g(piece)
+g(be)o(gins)f(with)i Fo(<)p Fq(name-of-the-type-of-the-piece)p
+Fo(>)15 b Fv(and)20 b(ends)g(with)396 1277 y Fo(<)p Fq
+(/name-of-the-type-of-the-piece)p Fo(>)p Fv(,)15 b(and)20
+b(the)g(pieces)g(are)g(called)g Fr(elements)p Fv(.)g(Unlik)o(e)g(HTML)g
+(and)396 1385 y(SGML,)g(both)g(start)g(tags)h(and)f(end)f(tags)i
+(\(i.e.)f(the)g(delimiters)g(written)g(in)g(angle)g(brack)o(ets\))f
+(can)h(ne)n(v)o(er)f(be)h(left)g(out.)396 1493 y(F)o(or)g(e)o(xample,)f
+(HTML)h(calls)h(the)f(paragraphs)e(simply)i Fq(p)p Fv(,)g(and)f
+(because)h(paragraphs)e(ne)n(v)o(er)h(contain)g(paragraphs,)f(a)396
+1601 y(sequence)h(of)h(se)n(v)o(eral)g(paragraphs)e(can)i(be)g(written)
+g(as:)396 1781 y Fq(First)44 b(paragraph)396 1878
+y(
Second)g(paragraph)396 2069 y Fv(This)21 b(is)g(not)f(possible)g
+(in)g(XML;)g(continuing)e(our)i(e)o(xample)e(abo)o(v)o(e)h(we)h(must)h
+(al)o(w)o(ays)f(write)396 2249 y Fq(First)42
+b(paragraph )396 2346 y(Second)g
+(paragraph )396 2537 y Fv(The)20 b(rationale)f(behind)g
+(that)h(is)i(to)e(\(1\))f(simplify)h(the)g(de)n(v)o(elopment)d(of)j
+(XML)h(parsers)f(\(you)e(need)i(not)g(con)m(v)o(ert)e(the)396
+2645 y(DTD)j(into)f(a)g(deterministic)f(\002nite)i(automaton)d(which)i
+(is)h(required)d(to)j(detect)f(omitted)f(tags\),)h(and)g(to)g(\(2\))g
+(mak)o(e)f(it)396 2753 y(possible)h(to)h(parse)e(the)i(document)d
+(independent)f(of)j(whether)f(the)i(DTD)f(is)h(kno)n(wn)e(or)h(not.)396
+2903 y(The)g(\002rst)h(line)f(of)g(our)g(sample)g(document,)396
+3083 y Fq()396
+3274 y Fv(is)21 b(the)e(so-called)g Fr(XML)h(declar)o(ation)p
+Fv(.)d(It)j(e)o(xpresses)e(that)i(the)f(document)f(follo)n(ws)h(the)g
+(con)m(v)o(entions)e(of)i(XML)g(v)o(ersion)396 3382 y(1.0,)h(and)f
+(that)h(the)h(document)d(is)j(encoded)d(using)i(characters)f(from)g
+(the)i(ISO-8859-1)c(character)i(set)i(\(often)e(kno)n(wn)396
+3490 y(as)i("Latin)e(1",)g(mostly)h(used)f(in)h(W)-7
+b(estern)20 b(Europe\).)d(Although)h(the)i(XML)g(declaration)e(is)i
+(not)g(mandatory)-5 b(,)16 b(it)21 b(is)f(good)396 3598
+y(style)h(to)f(include)f(it;)i(e)n(v)o(erybody)c(sees)k(at)g(the)f
+(\002rst)h(glance)f(that)g(the)g(document)e(uses)j(XML)f(markup)f(and)g
+(not)h(the)396 3706 y(similar)n(-looking)e(HTML)i(and)g(SGML)g(markup)f
+(languages.)f(If)i(you)g(omit)g(the)g(XML)g(declaration,)e(the)j
+(parser)e(will)396 3813 y(assume)h(that)h(the)f(document)e(is)j
+(encoded)e(as)i(UTF-8)e(or)h(UTF-16)f(\(there)h(is)h(a)g(rule)e(that)i
+(mak)o(es)f(it)h(possible)f(to)396 3921 y(distinguish)f(between)h
+(UTF-8)g(and)f(UTF-16)g(automatically\);)g(these)h(are)g(encodings)f
+(of)h(Unicode')-5 b(s)19 b(uni)n(v)o(ersal)396 4029 y(character)g(set.)
+i(\(Note)f(that)g(PXP,)h(unlik)o(e)e(its)i(predecessor)e("Markup",)f
+(fully)i(supports)f(Unicode.\))396 4179 y(The)h(second)f(line,)396
+4359 y Fq()396
+4550 y Fv(names)20 b(the)g(DTD)h(that)f(is)h(going)e(to)h(be)g(used)g
+(for)g(the)g(rest)h(of)f(the)g(document.)e(In)i(general,)f(it)i(is)g
+(possible)f(that)g(the)396 4658 y(DTD)h(consists)f(of)g(tw)o(o)h
+(parts,)f(the)g(so-called)f(e)o(xternal)g(and)h(the)g(internal)f
+(subset.)h("External")f(means)h(that)g(the)h(DTD)396
+4766 y(e)o(xists)g(as)g(a)f(second)g(\002le;)h("internal")e(means)h
+(that)g(the)g(DTD)h(is)g(included)d(in)j(the)f(same)g(\002le.)h(In)f
+(this)g(e)o(xample,)f(there)p Black 3842 5278 a Fr(8)p
+Black eop
+%%Page: 9 9
+9 8 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 396 579 a Fv(is)g(only)f(an)g(e)o(xternal)f(subset,)h(and)g(the)g
+(system)g(identi\002er)g("simple.dtd")e(speci\002es)j(where)f(the)g
+(DTD)g(\002le)h(can)f(be)396 687 y(found.)e(System)j(identi\002ers)f
+(are)g(interpreted)e(as)j(URLs;)g(for)f(instance)g(this)g(w)o(ould)g
+(be)g(le)o(gal:)396 867 y Fq()396 1058 y Fv(Please)21
+b(note)f(that)g(PXP)h(cannot)e(interpret)g(HTTP)i(identi\002ers)e(by)h
+(def)o(ault,)f(b)n(ut)i(it)g(is)g(possible)f(to)g(change)f(the)396
+1166 y(interpretation)f(of)i(system)h(identi\002ers.)396
+1315 y(The)f(w)o(ord)g(immediately)f(follo)n(wing)f Fq(DOCTYPE)i
+Fv(determines)f(which)g(of)h(the)g(declared)f(element)h(types)g(\(here)
+396 1423 y("document",)e("section",)h(and)h("paragraph"\))d(is)k(used)f
+(for)g(the)g(outermost)f(element,)g(the)h Fr(r)l(oot)h(element)q
+Fv(.)f(In)g(this)396 1531 y(e)o(xample)f(it)i(is)g Fq(document)f
+Fv(because)f(the)h(outermost)f(element)h(is)h(delimited)e(by)h
+Fo(<)p Fq(document)p Fo(>)f Fv(and)396 1639 y Fo(<)p
+Fq(/document)p Fo(>)p Fv(.)396 1789 y(The)h(DTD)g(consists)h(of)f
+(three)g(declarations)f(for)g(element)h(types:)g Fq(document)p
+Fv(,)f Fq(section)p Fv(,)g(and)h Fq(paragraph)p Fv(.)f(Such)396
+1896 y(a)i(declaration)d(has)j(tw)o(o)f(parts:)396 2077
+y Fo(<)p Fq(!ELEMENT)43 b Fn(name)i(content-model)p Fo(>)396
+2268 y Fv(The)20 b(content)f(model)h(is)h(a)f(re)o(gular)f(e)o
+(xpression)g(which)g(describes)h(the)g(possible)g(inner)f(structure)h
+(of)g(the)g(element.)396 2376 y(Here,)g Fq(document)f
+Fv(contains)h(one)g(or)g(more)f(sections,)h(and)g(a)g
+Fq(section)g Fv(contains)f(one)h(or)g(more)f(paragraphs.)f(Note)396
+2483 y(that)j(these)f(tw)o(o)g(element)g(types)g(are)g(not)g(allo)n
+(wed)f(to)i(contain)e(arbitrary)g(te)o(xt.)g(Only)h(the)g
+Fq(paragraph)g Fv(element)f(type)396 2591 y(is)i(declared)e(such)h
+(that)h(parsed)e(character)g(data)h(\(indicated)f(by)h(the)g(symbol)f
+Fq(#PCDATA)p Fv(\))g(is)i(permitted.)396 2741 y(See)g(belo)n(w)e(for)h
+(a)h(detailed)e(discussion)h(of)g(content)f(models.)-2
+3110 y Fp(1.1.2.)35 b(XML)e(par)n(ser)n(s)h(and)g(pr)n(ocessor)n(s)396
+3278 y Fv(XML)20 b(documents)f(are)h(human-readable,)c(b)n(ut)21
+b(this)f(is)h(not)f(the)h(main)e(purpose)g(of)h(this)h(language.)d(XML)
+i(has)g(been)396 3386 y(designed)f(such)h(that)g(documents)f(can)h(be)g
+(read)g(by)f(a)i(program)d(called)i(an)g Fr(XML)h(par)o(ser)r
+Fv(.)f(The)g(parser)g(checks)f(that)396 3494 y(the)h(document)f(is)i
+(well-formatted,)d(and)h(it)i(represents)f(the)g(document)e(as)j
+(objects)f(of)g(the)g(programming)d(language.)396 3602
+y(There)j(are)g(tw)o(o)g(aspects)h(when)e(checking)g(the)h(document:)e
+(First,)j(the)f(document)e(must)j(follo)n(w)e(some)h(basic)396
+3710 y(syntactic)g(rules,)g(such)g(as)h(that)f(tags)h(are)f(written)g
+(in)g(angle)g(brack)o(ets,)f(that)h(for)g(e)n(v)o(ery)f(start)h(tag)h
+(there)e(must)i(be)f(a)396 3818 y(corresponding)d(end)j(tag)g(and)f(so)
+i(on.)f(A)g(document)e(respecting)h(these)i(rules)f(is)h
+Fr(well-formed)r Fv(.)f(Second,)f(the)396 3926 y(document)f(must)j
+(match)e(the)i(DTD)f(in)g(which)g(case)h(the)f(document)e(is)j
+Fr(valid)r Fv(.)f(Man)o(y)f(parsers)h(check)f(only)h(on)396
+4034 y(well-formedness)e(and)i(ignore)f(the)h(DTD;)h(PXP)g(is)g
+(designed)e(such)g(that)i(it)g(can)f(e)n(v)o(en)f(v)n(alidate)g(the)i
+(document.)396 4183 y(A)g(parser)f(does)f(not)h(mak)o(e)g(a)h(sensible)
+f(application,)e(it)j(only)f(reads)g(XML)g(documents.)e(The)i(whole)g
+(application)396 4291 y(w)o(orking)f(with)h(XML-formatted)e(data)i(is)h
+(called)f(an)g Fr(XML)h(pr)l(ocessor)r Fv(.)f(Often)g(XML)g(processors)
+f(con)m(v)o(ert)396 4399 y(documents)g(into)h(another)e(format,)h(such)
+h(as)h(HTML)f(or)g(Postscript.)g(Sometimes)g(processors)f(e)o(xtract)g
+(data)h(of)g(the)396 4507 y(documents)f(and)g(output)g(the)i(processed)
+e(data)h(again)f(XML-formatted.)e(The)j(parser)g(can)g(help)f(the)i
+(application)396 4615 y(processing)e(the)h(document;)f(for)g(e)o
+(xample)g(it)i(can)f(pro)o(vide)e(means)i(to)g(access)h(the)f(document)
+e(in)j(a)f(speci\002c)h(manner)-5 b(.)396 4723 y(PXP)21
+b(supports)e(an)i(object-oriented)c(access)k(layer)e(specially)-5
+b(.)p Black 3842 5278 a Fr(9)p Black eop
+%%Page: 10 10
+10 9 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black -2 583 a Fp(1.1.3.)35 b(Discussion)396 751 y Fv(As)21
+b(we)g(ha)n(v)o(e)e(seen,)h(there)g(are)g(tw)o(o)h(le)n(v)o(els)f(of)g
+(description:)f(On)h(the)g(one)g(hand,)f(XML)h(can)g(de\002ne)f(rules)i
+(about)e(the)396 859 y(format)g(of)h(a)h(document)d(\(the)i(DTD\),)g
+(on)f(the)i(other)e(hand,)g(XML)h(e)o(xpresses)g(structured)f
+(documents.)f(There)h(are)h(a)396 967 y(number)f(of)h(possible)f
+(applications:)p Black 396 1199 a Ft(\225)p Black 60
+w Fv(XML)i(can)f(be)g(used)g(to)g(e)o(xpress)f(structured)g(te)o(xts.)h
+(Unlik)o(e)g(HTML,)g(there)g(is)h(no)e(canonical)g(interpretation;)g
+(one)479 1307 y(w)o(ould)h(ha)n(v)o(e)f(to)i(write)f(a)h(back)o(end)d
+(for)i(the)g(DTD)g(that)h(translates)f(the)g(structured)f(te)o(xts)h
+(into)g(a)h(format)e(that)479 1415 y(e)o(xisting)h(bro)n(wsers,)f
+(printers)g(etc.)i(understand.)c(The)j(adv)n(antage)e(of)i(a)h
+(self-de\002ned)e(document)f(format)h(is)i(that)f(it)479
+1523 y(is)h(possible)f(to)h(design)e(the)h(format)f(in)i(a)f(more)g
+(problem-oriented)c(w)o(ay)-5 b(.)20 b(F)o(or)f(e)o(xample,)g(if)h(the)
+h(task)f(is)h(to)g(e)o(xtract)479 1631 y(reports)f(from)f(a)h
+(database,)g(one)f(can)h(use)h(a)f(DTD)h(that)f(re\003ects)h(the)f
+(structure)f(of)h(the)g(report)f(or)h(the)g(database.)g(A)479
+1739 y(possible)g(approach)e(w)o(ould)i(be)g(to)g(ha)n(v)o(e)g(an)g
+(element)f(type)h(for)g(e)n(v)o(ery)f(database)g(table)h(and)g(for)g(e)
+n(v)o(ery)e(column.)479 1847 y(Once)i(the)g(DTD)h(has)f(been)g
+(designed,)e(the)j(report)e(procedure)e(can)j(be)g(splitted)h(up)e(in)i
+(a)f(part)g(that)h(selects)g(the)479 1955 y(database)f(ro)n(ws)g(and)g
+(outputs)f(them)h(as)h(an)f(XML)g(document)e(according)g(to)j(the)f
+(DTD,)g(and)g(in)g(a)g(part)g(that)479 2063 y(translates)h(the)f
+(document)e(into)i(other)f(formats.)g(Of)i(course,)e(the)h(latter)h
+(part)e(can)h(be)h(solv)o(ed)e(in)h(a)h(generic)e(w)o(ay)-5
+b(,)479 2170 y(e.g.)20 b(there)g(may)f(be)h(con\002gurable)e(back)o
+(ends)h(for)h(all)g(DTDs)h(that)f(follo)n(w)g(the)g(approach)e(and)i
+(ha)n(v)o(e)f(element)h(types)479 2278 y(for)g(tables)g(and)g(columns.)
+479 2428 y(XML)h(plays)f(the)g(role)g(of)g(a)g(con\002gurable)e
+(intermediate)h(format.)g(The)g(database)h(e)o(xtraction)e(function)h
+(can)h(be)479 2536 y(written)g(without)g(ha)n(ving)f(to)h(kno)n(w)f
+(the)h(details)h(of)f(typesetting;)f(the)h(back)o(ends)f(can)h(be)g
+(written)g(without)g(ha)n(ving)479 2644 y(to)h(kno)n(w)e(the)h(details)
+h(of)e(the)i(database.)479 2793 y(Of)g(course,)e(there)h(are)g
+(traditional)f(solutions.)g(One)h(can)g(de\002ne)g(an)g(ad)g(hoc)g
+(intermediate)e(te)o(xt)j(\002le)f(format.)f(This)479
+2901 y(disadv)n(antage)f(is)k(that)e(there)g(are)g(no)f(names)h(for)g
+(the)g(pieces)g(of)g(the)g(format,)f(and)h(that)g(such)g(formats)g
+(usually)f(lack)479 3009 y(of)h(documentation)d(because)j(of)g(this.)g
+(Another)f(solution)g(w)o(ould)h(be)g(to)g(ha)n(v)o(e)g(a)h(binary)e
+(representation,)e(either)j(as)479 3117 y(language-dependent)c(or)k
+(language-independent)14 b(structure)20 b(\(e)o(xample)e(of)i(the)g
+(latter)h(can)f(be)g(found)e(in)j(RPC)479 3225 y(implementations\).)d
+(The)i(disadv)n(antage)e(is)j(that)f(it)h(is)g(harder)e(to)i(vie)n(w)f
+(such)g(representations,)e(one)h(has)i(to)f(write)479
+3333 y(pretty)g(printers)f(for)h(this)g(purpose.)f(It)h(is)h(also)g
+(more)e(dif)n(\002cult)h(to)g(enter)g(test)h(data;)f(XML)g(is)h(plain)f
+(te)o(xt)g(that)h(can)f(be)479 3441 y(written)g(using)g(an)g(arbitrary)
+f(editor)g(\(Emacs)h(has)g(e)n(v)o(en)f(a)i(good)e(XML)h(mode,)f
+(PSGML\).)h(All)h(these)f(alternati)n(v)o(es)479 3549
+y(suf)n(fer)g(from)f(a)h(missing)g(structure)g(check)o(er)m(,)e(i.e.)i
+(the)h(programs)d(processing)h(these)h(formats)f(usually)h(do)g(not)479
+3657 y(check)g(the)g(input)f(\002le)i(or)f(input)g(object)f(in)i
+(detail;)f(XML)g(parsers)g(check)f(the)h(syntax)g(of)g(the)g(input)g
+(\(the)f(so-called)479 3765 y(well-formedness)f(check\),)h(and)h(the)g
+(adv)n(anced)e(parsers)i(lik)o(e)g(PXP)h(e)n(v)o(en)f(v)o(erify)e(that)
+j(the)f(structure)f(matches)h(the)479 3872 y(DTD)h(\(the)f(so-called)f
+(v)n(alidation\).)p Black 396 4022 a Ft(\225)p Black
+60 w Fv(XML)i(can)f(be)g(used)g(as)g(con\002gurable)e(communication)g
+(language.)g(A)i(fundamental)e(problem)h(of)h(e)n(v)o(ery)479
+4130 y(communication)e(is)j(that)f(sender)f(and)h(recei)n(v)o(er)f
+(must)h(follo)n(w)g(the)g(same)g(con)m(v)o(entions)e(about)h(the)h
+(language.)e(F)o(or)479 4238 y(data)i(e)o(xchange,)e(the)i(question)f
+(is)j(usually)d(which)h(data)g(records)f(and)h(\002elds)g(are)g(a)n(v)n
+(ailable,)g(ho)n(w)g(the)o(y)f(are)479 4346 y(syntactically)h
+(composed,)e(and)i(which)f(v)n(alues)h(are)g(possible)g(for)g(the)g(v)n
+(arious)f(\002elds.)h(Similar)h(questions)e(arise)479
+4454 y(for)h(te)o(xt)g(document)e(e)o(xchange.)g(XML)i(does)g(not)g
+(answer)g(these)g(problems)f(completely)-5 b(,)18 b(b)n(ut)i(it)h
+(reduces)e(the)479 4562 y(number)g(of)h(ambiguities)f(for)g(such)h(con)
+m(v)o(entions:)e(The)i(outlines)f(of)h(the)g(syntax)g(are)g
+(speci\002ed)g(by)g(the)g(DTD)g(\(b)n(ut)479 4669 y(not)g(necessarily)g
+(the)g(details\),)g(and)g(XML)g(introduces)e(canonical)h(names)h(for)g
+(the)g(components)e(of)i(documents)479 4777 y(such)g(that)h(it)f(is)i
+(simpler)d(to)i(describe)e(the)h(rest)h(of)f(the)g(syntax)g(and)f(the)h
+(semantics)h(informally)-5 b(.)p Black 3800 5278 a Fr(10)p
+Black eop
+%%Page: 11 11
+11 10 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black Black 396 579 a Ft(\225)p Black 60 w Fv(XML)f(is)g(a)g(data)f
+(storage)g(format.)f(Currently)-5 b(,)17 b(e)n(v)o(ery)h(softw)o(are)h
+(product)f(tends)h(to)h(use)f(its)i(o)n(wn)d(w)o(ay)i(to)f(store)h
+(data;)479 687 y(commercial)f(softw)o(are)h(often)f(does)h(not)g
+(describe)f(such)h(formats,)f(and)h(it)h(is)g(a)g(pain)e(to)i(inte)o
+(grate)e(such)h(softw)o(are)479 795 y(into)g(a)g(bigger)f(project.)f
+(XML)i(can)g(help)f(to)h(impro)o(v)o(e)e(this)j(situation)e(when)g(se)n
+(v)o(eral)g(applications)g(share)h(the)g(same)479 903
+y(syntax)g(of)g(data)g(\002les.)h(DTDs)f(are)g(then)g(neutral)g
+(instances)g(that)g(check)f(the)h(format)g(of)f(data)i(\002les)g
+(independent)c(of)479 1011 y(applications.)-2 1512 y
+Fx(1.2.)39 b(Highlights)e(of)i(XML)396 1692 y Fv(This)21
+b(section)f(e)o(xplains)f(man)o(y)g(of)h(the)g(features)f(of)h(XML,)g
+(b)n(ut)h(not)e(all,)i(and)f(some)g(features)f(not)h(in)g(detail.)g(F)o
+(or)g(a)396 1800 y(complete)f(description,)g(see)i(the)f(XML)g
+(speci\002cation)396 1908 y(\(http://www)-5 b(.w3.or)o
+(g/TR/1998/REC-xml-)o(19)o(98)o(02)o(10)o(.htm)o(l\).)-2
+2236 y Fp(1.2.1.)35 b(The)f(DTD)g(and)g(the)f(instance)396
+2404 y Fv(The)20 b(DTD)g(contains)g(v)n(arious)f(declarations;)g(in)h
+(general)f(you)h(can)g(only)f(use)i(a)f(feature)f(if)i(you)e(ha)n(v)o
+(e)h(pre)n(viously)396 2512 y(declared)f(it.)i(The)f(document)e
+(instance)i(\002le)h(may)e(contain)g(the)i(full)f(DTD,)g(b)n(ut)g(it)h
+(is)g(also)g(possible)f(to)g(split)h(the)f(DTD)396 2619
+y(into)g(an)g(internal)g(and)f(an)h(e)o(xternal)f(subset.)h(A)h
+(document)d(must)j(be)o(gin)e(as)h(follo)n(ws)g(if)h(the)f(full)g(DTD)g
+(is)h(included:)396 2800 y Fo(<)p Fq(?xml)44 b(version="1.0")f
+(encoding=")p Fn(Your)f(encoding)t Fq("?)p Fo(>)396 2897
+y(<)p Fq(!DOCTYPE)h Fn(root)i Fq([)486 2994 y Fn(Declarations)396
+3091 y Fq(])p Fo(>)396 3282 y Fv(These)20 b(declarations)f(are)h
+(called)g(the)h Fr(internal)e(subset)q Fv(.)i(Note)f(that)g(the)g
+(usage)g(of)g(entities)h(and)e(conditional)g(sections)396
+3390 y(is)i(restricted)f(within)g(the)g(internal)g(subset.)396
+3539 y(If)g(the)h(declarations)d(are)j(located)e(in)h(a)h(dif)n(ferent)
+e(\002le,)h(you)f(can)h(refer)g(to)g(this)h(\002le)g(as)g(follo)n(ws:)
+396 3720 y Fo(<)p Fq(?xml)44 b(version="1.0")f(encoding=")p
+Fn(Your)f(encoding)t Fq("?)p Fo(>)396 3817 y(<)p Fq(!DOCTYPE)h
+Fn(root)i Fq(SYSTEM)e(")p Fn(file)h(name)p Fq(")p Fo(>)396
+4008 y Fv(The)20 b(declarations)f(in)h(the)h(\002le)f(are)h(called)f
+(the)g Fr(e)n(xternal)g(subset)q Fv(.)g(The)g(\002le)h(name)f(is)h
+(called)f(the)g Fr(system)h(identi\002er)r Fv(.)e(It)396
+4116 y(is)i(also)g(possible)f(to)g(refer)g(to)g(the)g(\002le)h(by)f(a)g
+(so-called)g Fr(public)f(identi\002er)r Fv(,)g(b)n(ut)i(most)f(XML)g
+(applications)f(w)o(on')o(t)g(use)396 4223 y(this)i(feature.)396
+4373 y(Y)-9 b(ou)20 b(can)g(also)g(specify)g(both)f(internal)h(and)f(e)
+o(xternal)g(subsets.)i(In)e(this)i(case,)g(the)f(declarations)f(of)h
+(both)f(subsets)i(are)396 4481 y(mix)o(ed,)e(and)h(if)g(there)g(are)g
+(con\003icts,)g(the)g(declaration)f(of)h(the)g(internal)f(subset)i(o)o
+(v)o(errides)d(those)i(of)g(the)g(e)o(xternal)396 4589
+y(subset)h(with)f(the)g(same)h(name.)e(This)h(looks)g(as)h(follo)n(ws:)
+396 4769 y Fo(<)p Fq(?xml)44 b(version="1.0")f(encoding=")p
+Fn(Your)f(encoding)t Fq("?)p Fo(>)396 4866 y(<)p Fq(!DOCTYPE)h
+Fn(root)89 b Fq(SYSTEM)44 b(")p Fn(file)g(name)p Fq(")g([)p
+Black 3800 5278 a Fr(11)p Black eop
+%%Page: 12 12
+12 11 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 486 579 a Fn(Declarations)396 676 y Fq(])p Fo(>)396
+909 y Fv(The)f(XML)g(declaration)f(\(the)h(string)g(be)o(ginning)d
+(with)k Fo(<)p Fq(?xml)e Fv(and)h(ending)f(at)i Fq(?)p
+Fo(>)p Fv(\))f(should)f(specify)g(the)h(encoding)396
+1016 y(of)g(the)g(\002le.)h(Common)e(v)n(alues)h(are)g(UTF-8,)f(and)h
+(the)g(ISO-8859)e(series)j(of)f(character)f(sets.)i(Note)f(that)g(e)n
+(v)o(ery)f(\002le)396 1124 y(parsed)h(by)f(the)i(XML)f(processor)f(can)
+h(be)o(gin)f(with)h(an)g(XML)h(declaration)d(and)i(that)g(e)n(v)o(ery)f
+(\002le)i(may)e(ha)n(v)o(e)h(its)h(o)n(wn)396 1232 y(encoding.)396
+1382 y(The)f(name)g(of)g(the)g(root)f(element)h(must)g(be)g(mentioned)f
+(directly)g(after)h(the)g Fq(DOCTYPE)g Fv(string.)f(This)i(means)e
+(that)i(a)396 1490 y(full)f(document)f(instance)g(looks)h(lik)o(e)396
+1670 y Fo(<)p Fq(?xml)44 b(version="1.0")f(encoding=")p
+Fn(Your)f(encoding)t Fq("?)p Fo(>)396 1767 y(<)p Fq(!DOCTYPE)h
+Fn(root)89 b Fq(SYSTEM)44 b(")p Fn(file)g(name)p Fq(")g([)486
+1864 y Fn(Declarations)396 1961 y Fq(])p Fo(>)396 2156
+y(<)p Fn(root)p Fo(>)486 2253 y Fn(inner)g(contents)396
+2350 y Fo(<)p Fq(/)p Fn(root)p Fo(>)-2 2802 y Fp(1.2.2.)35
+b(Reser)q(ved)h(c)o(haracter)n(s)396 2970 y Fv(Some)20
+b(characters)f(are)i(generally)d(reserv)o(ed)h(to)h(indicate)g(markup)e
+(such)i(that)g(the)o(y)g(cannot)f(be)h(used)g(for)g(character)396
+3078 y(data.)g(These)g(characters)f(are)h Fm(<)p Fv(,)h
+Fm(>)p Fv(,)f(and)f(&.)h(Furthermore,)e(single)i(and)g(double)e(quotes)
+i(are)g(sometimes)g(reserv)o(ed.)396 3186 y(If)g(you)g(w)o(ant)g(to)g
+(include)f(such)h(a)h(character)e(as)i(character)m(,)d(write)j(it)f(as)
+h(follo)n(ws:)p Black 396 3473 a Ft(\225)p Black 60 w
+Fq(<)f Fv(instead)g(of)g Fm(<)p Black 396 3581 a Ft(\225)p
+Black 60 w Fq(>)g Fv(instead)g(of)g Fm(>)p Black 396
+3689 a Ft(\225)p Black 60 w Fq(&)g Fv(instead)g(of)g(&)p
+Black 396 3797 a Ft(\225)p Black 60 w Fq(')g Fv(instead)g(of)g(')p
+Black 396 3905 a Ft(\225)p Black 60 w Fq(")g Fv(instead)g(of)g(")
+396 4054 y(All)h(other)e(characters)h(are)g(free)g(in)g(the)g(document)
+e(instance.)i(It)g(is)i(possible)d(to)i(include)e(a)i(character)e(by)g
+(its)j(position)396 4162 y(in)f(the)f(Unicode)f(alphabet:)396
+4342 y Fq()p Fn(n)p Fq(;)396 4533 y Fv(where)h Fl(n)g
+Fv(is)i(the)e(decimal)f(number)g(of)h(the)g(character)-5
+b(.)19 b(Alternati)n(v)o(ely)-5 b(,)18 b(you)h(can)h(specify)g(the)g
+(character)f(by)h(its)396 4641 y(he)o(xadecimal)e(number:)396
+4822 y Fq()p Fn(n)p Fq(;)p Black 3800 5278 a Fr(12)p
+Black eop
+%%Page: 13 13
+13 12 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 396 579 a Fv(In)f(the)g(scope)g(of)g(declarations,)f(the)h
+(character)f(\045)i(is)g(no)f(longer)f(free.)g(T)-7 b(o)20
+b(include)g(it)h(as)f(character)m(,)f(you)g(must)h(use)396
+687 y(the)g(notations)g Fq(%)g Fv(or)f Fq(%)p
+Fv(.)396 836 y(Note)h(that)h(besides)f(<,)g(>,)g(&,)f
+(',)g(and)h(")f(there)h(are)g(no)g(prede\002nes)f(character)g
+(entities.)h(This)396 944 y(is)h(dif)n(ferent)e(from)g(HTML)h(which)g
+(de\002nes)g(a)g(list)i(of)d(characters)h(that)g(can)g(be)g(referenced)
+e(by)i(name)f(\(e.g.)h(ä)396 1052 y(for)g(\344\);)g(ho)n(we)n(v)o
+(er)m(,)e(if)i(you)g(prefer)e(named)i(characters,)f(you)g(can)h
+(declare)f(such)h(entities)h(yourself)e(\(see)h(belo)n(w\).)-2
+1422 y Fp(1.2.3.)35 b(Elements)g(and)f(ELEMENT)e(dec)n(larations)396
+1589 y Fv(Elements)20 b(structure)f(the)h(document)f(instance)g(in)i(a)
+f(hierarchical)f(w)o(ay)-5 b(.)20 b(There)f(is)i(a)g(top-le)n(v)o(el)d
+(element,)i(the)g Fr(r)l(oot)396 1697 y(element)q Fv(,)g(which)g
+(contains)g(a)g(sequence)f(of)h(inner)g(elements)f(and)h(character)f
+(sections.)h(The)g(inner)f(elements)h(are)396 1805 y(structured)f(in)h
+(the)f(same)h(w)o(ay)-5 b(.)20 b(Ev)o(ery)e(element)h(has)h(an)g
+Fr(element)f(type)p Fv(.)h(The)f(be)o(ginning)f(of)h(the)h(element)f
+(is)i(indicated)396 1913 y(by)f(a)h Fr(start)g(ta)o(g)p
+Fv(,)e(written)396 2093 y Fo(<)p Fn(element-type)p Fo(>)396
+2284 y Fv(and)h(the)g(element)g(continues)f(until)h(the)g
+(corresponding)d Fr(end)i(ta)o(g)h Fv(is)h(reached:)396
+2465 y Fo(<)p Fq(/)p Fn(element-type)p Fo(>)396 2655
+y Fv(In)f(XML,)f(it)i(is)f(not)g(allo)n(wed)f(to)h(omit)f(start)i(or)e
+(end)g(tags,)h(e)n(v)o(en)f(if)h(the)g(DTD)g(w)o(ould)f(permit)g(this.)
+h(Note)g(that)g(there)f(are)396 2763 y(no)h(special)g(rules)g(ho)n(w)g
+(to)g(interpret)g(spaces)g(or)g(ne)n(wlines)g(near)f(start)i(or)f(end)g
+(tags;)g(all)h(spaces)f(and)g(ne)n(wlines)g(count.)396
+2913 y(Ev)o(ery)f(element)h(type)f(must)i(be)f(declared)f(before)f(it)j
+(can)f(be)g(used.)g(The)g(declaration)f(consists)h(of)g(tw)o(o)h
+(parts:)f(the)396 3021 y(ELEMENT)f(declaration)f(describes)h(the)h
+(content)f(model,)f(i.e.)i(which)f(inner)g(elements)g(are)h(allo)n
+(wed;)f(the)h(A)-9 b(TTLIST)396 3129 y(declaration)19
+b(describes)h(the)g(attrib)n(utes)g(of)g(the)g(element.)396
+3278 y(An)g(element)g(can)g(simply)g(allo)n(w)g(e)n(v)o(erything)e(as)i
+(content.)f(This)i(is)g(written:)396 3458 y Fo(<)p Fq(!ELEMENT)43
+b Fn(name)i Fq(ANY)p Fo(>)396 3649 y Fv(On)20 b(the)h(opposite,)e(an)h
+(element)f(can)h(be)g(forced)f(to)i(be)f(empty;)f(declared)g(by:)396
+3829 y Fo(<)p Fq(!ELEMENT)43 b Fn(name)i Fq(EMPTY)p Fo(>)396
+4020 y Fv(Note)20 b(that)h(there)e(is)j(an)e(abbre)n(viated)e(notation)
+h(for)g(empty)g(element)h(instances:)g Fo(<)p Fn(name)p
+Fq(/)p Fo(>)p Fv(.)396 4170 y(There)g(are)g(tw)o(o)g(more)g
+(sophisticated)f(forms)g(of)h(declarations:)f(so-called)h
+Fr(mixed)g(declar)o(ations)p Fv(,)e(and)i Fr(r)m(e)m(gular)396
+4278 y(e)n(xpr)m(essions)p Fv(.)g(An)h(element)e(with)i(mix)o(ed)e
+(content)g(contains)g(character)g(data)h(interspersed)f(with)i(inner)e
+(elements,)396 4386 y(and)h(the)g(set)h(of)f(allo)n(wed)g(inner)f
+(elements)h(can)g(be)g(speci\002ed.)g(In)f(contrast)h(to)g(this,)h(a)g
+(re)o(gular)d(e)o(xpression)396 4494 y(declaration)h(does)h(not)g(allo)
+n(w)g(character)f(data,)h(b)n(ut)g(the)g(inner)f(elements)h(can)g(be)g
+(described)f(by)h(the)g(more)g(po)n(werful)396 4601 y(means)g(of)g(re)o
+(gular)f(e)o(xpressions.)396 4751 y(A)i(declaration)e(for)g(mix)o(ed)g
+(content)g(looks)h(as)h(follo)n(ws:)p Black 3800 5278
+a Fr(13)p Black eop
+%%Page: 14 14
+14 13 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 396 579 a Fo(<)p Fq(!ELEMENT)43 b Fn(name)i Fq(\(#PCDATA)e(|)i
+Fn(element)1892 609 y Fk(1)1962 579 y Fq(|)g(...)f(|)h
+Fn(element)2636 609 y Fk(n)2707 579 y Fq(\)*)p Fo(>)396
+770 y Fv(or)20 b(if)h(you)e(do)h(not)g(w)o(ant)g(to)g(allo)n(w)g(an)o
+(y)g(inner)f(element,)h(simply)396 950 y Fo(<)p Fq(!ELEMENT)43
+b Fn(name)i Fq(\(#PCDATA\))p Fo(>)396 1279 y Fj(Example)479
+1426 y Fi(If)19 b(element)g(type)g Fh(q)g Fi(is)g(declared)h(as)479
+1596 y Fh()479
+1776 y Fi(this)19 b(is)f(a)h(le)o(gal)g(instance:)479
+1947 y Fh(This)43 b(is)e(character)j(data with)h( inner)
+g(elements )479 2127 y Fi(But)19 b(this)g(is)f(ille)o(gal)g(because)
+i Fh(t)f Fi(has)h(not)f(been)g(enumerated)i(in)e(the)g(declaration:)479
+2297 y Fh(This)43 b(is)e(character)j(data with)h( inner)
+g(elements )396 2571 y Fv(The)20 b(other)f(form)h(uses)g(a)h(re)o
+(gular)e(e)o(xpression)f(to)j(describe)e(the)h(possible)g(contents:)396
+2752 y Fo(<)p Fq(!ELEMENT)43 b Fn(name)i(regexp)p Fo(>)396
+2942 y Fv(The)20 b(follo)n(wing)f(well-kno)n(wn)f(re)o(ge)o(xp)g
+(operators)h(are)h(allo)n(wed:)p Black 396 3299 a Ft(\225)p
+Black 60 w Fn(element-name)p Black 396 3407 a Ft(\225)p
+Black 60 w Fq(\()p Fn(subexpr)839 3437 y Fk(1)910 3407
+y Fq(,)g Fv(...)g Fq(,)45 b Fn(subexpr)1463 3437 y Fk(n)1533
+3407 y Fq(\))p Black 396 3515 a Ft(\225)p Black 60 w
+Fq(\()p Fn(subexpr)839 3545 y Fk(1)910 3515 y Fq(|)20
+b Fv(...)g Fq(|)45 b Fn(subexpr)1463 3545 y Fk(n)1533
+3515 y Fq(\))p Black 396 3623 a Ft(\225)p Black 60 w
+Fn(subexpr)s Fq(*)p Black 396 3731 a Ft(\225)p Black
+60 w Fn(subexpr)s Fq(+)p Black 396 3839 a Ft(\225)p Black
+60 w Fn(subexpr)s Fq(?)396 3989 y Fv(The)20 b Fq(,)h
+Fv(operator)d(indicates)i(a)h(sequence)e(of)h(sub-models,)e(the)i
+Fq(|)h Fv(operator)d(describes)i(alternati)n(v)o(e)f(sub-models.)f(The)
+396 4096 y Fq(*)j Fv(indicates)f(zero)f(or)h(more)g(repetitions,)f(and)
+g Fq(+)i Fv(one)f(or)f(more)h(repetitions.)f(Finally)-5
+b(,)19 b Fq(?)i Fv(can)f(be)g(used)g(for)f(optional)396
+4204 y(sub-models.)g(As)i(atoms)f(the)g(re)o(ge)o(xp)e(can)i(contain)f
+(names)h(of)g(elements;)g(note)g(that)g(it)h(is)g(not)f(allo)n(wed)f
+(to)i(include)396 4312 y Fq(#PCDATA)p Fv(.)396 4462 y(The)f(e)o(xact)g
+(syntax)f(of)h(the)g(re)o(gular)f(e)o(xpressions)g(is)i(rather)e
+(strange.)h(This)g(can)g(be)g(e)o(xplained)f(best)h(by)g(a)g(list)i(of)
+396 4570 y(constraints:)p Black 396 4802 a Ft(\225)p
+Black 60 w Fv(The)e(outermost)f(e)o(xpression)g(must)h(not)g(be)g
+Fn(element-name)p Fv(.)p Black 3800 5278 a Fr(14)p Black
+eop
+%%Page: 15 15
+15 14 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 479 579 a(Ille)m(gal:)e Fq()p
+Fv(;)21 b(this)f(must)h(be)f(written)g(as)h Fq()p Fv(.)p Black 396 728 a Ft(\225)p Black
+60 w Fv(F)o(or)20 b(the)g(unary)f(operators)g Fn(subexpr)s
+Fq(*)p Fv(,)g Fn(subexpr)s Fq(+)p Fv(,)g(and)g Fn(subexpr)s
+Fq(?)p Fv(,)g(the)h Fn(subexpr)i Fv(must)f(not)f(be)g(again)f(an)479
+836 y(unary)g(operator)-5 b(.)479 986 y Fr(Ille)m(gal:)19
+b Fq()p Fv(;)20 b(this)h(must)f(be)g(written)g
+(as)h Fq()p Fv(.)p Black 396
+1135 a Ft(\225)p Black 60 w Fv(Between)21 b Fq(\))f Fv(and)g(one)f(of)h
+(the)h(unary)d(operatory)g Fq(*)p Fv(,)j Fq(+)p Fv(,)f(or)g
+Fq(?)p Fv(,)g(there)g(must)g(not)g(be)g(whitespace.)479
+1285 y Fr(Ille)m(gal:)f Fq()p
+Fv(;)21 b(this)f(must)h(be)f(written)g(as)h Fq()p Fv(.)p Black 396 1434 a Ft(\225)p Black
+60 w Fv(There)20 b(is)h(the)f(additional)f(constraint)g(that)h(the)h
+(right)e(parenthsis)g(must)i(be)f(contained)e(in)j(the)f(same)g(entity)
+g(as)h(the)479 1542 y(left)g(parenthesis;)e(see)i(the)f(section)g
+(about)f(parsed)h(entities)g(belo)n(w)-5 b(.)396 1733
+y(Note)20 b(that)g(there)g(is)h(another)e(restriction)g(on)h(re)o
+(gular)e(e)o(xpressions)h(which)h(must)g(be)g(deterministic.)f(This)h
+(means)g(that)396 1841 y(the)g(parser)g(must)g(be)g(able)g(to)h(see)g
+(by)e(looking)g(at)i(the)f(ne)o(xt)f(tok)o(en)h(which)f(alternati)n(v)o
+(e)g(is)i(actually)f(used,)g(or)f(whether)396 1949 y(the)h(repetition)f
+(stops.)i(The)f(reason)f(for)g(this)i(is)g(simply)f(compatability)f
+(with)h(SGML)g(\(there)g(is)h(no)f(intrinsic)f(reason)396
+2057 y(for)h(this)h(rule;)e(XML)i(can)f(li)n(v)o(e)g(without)f(this)i
+(restriction\).)396 2302 y Fj(Example)479 2449 y Fi(The)e(elements)g
+(are)g(declared)h(as)f(follo)n(ws:)479 2620 y Fh()479 2707 y()479 2795 y()479 2882
+y()479 3062 y Fi(This)19
+b(is)f(a)h(le)o(gal)g(instance:)479 3233 y Fh(Some)44
+b(characters <)q(/q>)479 3413 y Fi(\(Note:)19
+b Fg(<)p Fh(s/)p Fg(>)g Fi(is)g(an)g(abbre)n(viation)h(for)f
+Fg(<)p Fh(s)p Fg(><)p Fh(/s)p Fg(>)p Fi(.\))g(It)f(w)o(ould)i(be)f
+(ille)o(gal)f(to)h(lea)o(v)o(e)g Fh( )h Fi(out)f(because)h(at)f
+(least)f(one)479 3510 y(instance)i(of)f Fh(s)g Fi(or)g
+Fh(t)g Fi(must)g(be)g(present.)g(It)f(w)o(ould)i(be)f(ille)o(gal,)f
+(too,)h(if)f(characters)i(e)o(xisted)f(outside)h(the)e
+Fh(r)i Fi(element;)f(the)g(only)479 3607 y(e)o(xception)h(is)f(white)g
+(space.)g(\226)g(This)f(is)h(le)o(gal,)f(too:)479 3778
+y Fh( <)q(/q>)q( )q(q>)-2 4230
+y Fp(1.2.4.)35 b(Attrib)n(ute)e(lists)h(and)g(A)-11 b(TTLIST)34
+b(dec)n(larations)396 4398 y Fv(Elements)20 b(may)g(ha)n(v)o(e)f
+(attrib)n(utes.)h(These)g(are)g(put)g(into)g(the)g(start)h(tag)f(of)g
+(an)g(element)g(as)h(follo)n(ws:)396 4578 y Fo(<)p Fn(element-name)43
+b(attribute)1444 4608 y Fk(1)1469 4578 y Fq(=")p Fn(value)1784
+4608 y Fk(1)1810 4578 y Fq(")i(...)f Fn(attribute)2484
+4608 y Fk(n)2509 4578 y Fq(=")p Fn(value)2824 4608 y
+Fk(n)2850 4578 y Fq(")p Fo(>)396 4769 y Fv(Instead)20
+b(of)g Fq(")p Fn(value)1017 4799 y Fk(k)1043 4769 y Fq(")g
+Fv(it)h(is)g(also)g(possible)f(to)g(use)g(single)g(quotes)g(as)h(in)f
+Fq(')p Fn(value)2817 4799 y Fk(k)2843 4769 y Fq(')p Fv(.)g(Note)h(that)
+f(you)f(cannot)g(use)396 4877 y(double)g(quotes)h(literally)g(within)g
+(the)g(v)n(alue)f(of)h(the)g(attrib)n(ute)g(if)h(double)d(quotes)i(are)
+g(the)g(delimiters;)g(the)g(same)p Black 3800 5278 a
+Fr(15)p Black eop
+%%Page: 16 16
+16 15 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 396 579 a Fv(applies)f(to)h(single)f(quotes.)f(Y)-9
+b(ou)20 b(can)g(generally)e(not)i(use)g Fm(<)h Fv(and)e(&)i(as)g
+(characters)e(in)h(attrib)n(ute)g(v)n(alues.)g(It)g(is)396
+687 y(possible)g(to)h(include)e(the)h(paraphrases)e(<,)j(>,)f
+(&,)f(',)g(and)h(")f(\(and)g(an)o(y)g(other)h(reference)e
+(to)j(a)396 795 y(general)e(entity)h(as)h(long)f(as)g(the)h(entity)f
+(is)h(not)e(de\002ned)h(by)f(an)i(e)o(xternal)d(\002le\))j(as)g(well)g
+(as)g()p Fl(n)p Fv(;.)396 944 y(Before)f(you)f(can)h(use)h(an)f
+(attrib)n(ute)g(you)f(must)h(declare)g(it.)g(An)g(A)-9
+b(TTLIST)20 b(declaration)e(looks)i(as)h(follo)n(ws:)396
+1124 y Fo(<)p Fq(!ATTLIST)43 b Fn(element-name)845 1222
+y(attribute-name)f(attribute-type)h(attribute-default)845
+1319 y Fq(...)845 1416 y Fn(attribute-name)f(attribute-type)h
+(attribute-default)396 1513 y Fo(>)396 1704 y Fv(There)20
+b(are)g(a)g(lot)h(of)f(types,)f(b)n(ut)i(most)f(important)f(are:)p
+Black 396 2061 a Ft(\225)p Black 60 w Fq(CDATA)p Fv(:)h(Ev)o(ery)f
+(string)h(is)h(allo)n(wed)f(as)g(attrib)n(ute)g(v)n(alue.)p
+Black 396 2169 a Ft(\225)p Black 60 w Fq(NMTOKEN)p Fv(:)g(Ev)o(ery)f
+(nametok)o(en)f(is)j(allo)n(wed)f(as)g(attrib)n(ute)g(v)n(alue.)g
+(Nametok)o(ens)f(consist)h(\(mainly\))f(of)g(letters,)479
+2277 y(digits,)h(.,)h(:,)f(-,)g(_)h(in)f(arbitrary)f(order)-5
+b(.)p Black 396 2385 a Ft(\225)p Black 60 w Fq(NMTOKENS)p
+Fv(:)20 b(A)g(space-separated)f(list)i(of)f(nametok)o(ens)e(is)k(allo)n
+(wed)d(as)i(attrib)n(ute)f(v)n(alue.)396 2534 y(The)g(most)g
+(interesting)g(def)o(ault)f(declarations)g(are:)p Black
+396 2767 a Ft(\225)p Black 60 w Fq(#REQUIRED)p Fv(:)h(The)f(attrib)n
+(ute)h(must)g(be)h(speci\002ed.)p Black 396 2874 a Ft(\225)p
+Black 60 w Fq(#IMPLIED)p Fv(:)e(The)h(attrib)n(ute)f(can)g(be)h
+(speci\002ed)f(b)n(ut)h(also)g(can)f(be)h(left)g(out.)f(The)g
+(application)g(can)g(\002nd)g(out)h(whether)479 2982
+y(the)g(attrib)n(ute)g(w)o(as)h(present)f(or)g(not.)p
+Black 396 3090 a Ft(\225)p Black 60 w Fq(")p Fn(value)p
+Fq(")g Fv(or)f Fq(')p Fn(value)p Fq(')p Fv(:)h(This)g(particular)e(v)n
+(alue)i(is)g(used)g(as)h(def)o(ault)e(if)h(the)g(attrib)n(ute)g(is)g
+(omitted)g(in)g(the)g(element.)396 3378 y Fj(Example)479
+3525 y Fi(This)f(is)f(a)h(v)n(alid)g(attrib)o(ute)g(declaration)g(for)g
+(element)g(type)h Fh(r)p Fi(:)479 3695 y Fh()479 4137 y Fi(This)19 b(means)g(that)g Fh(x)g
+Fi(is)g(a)g(required)g(attrib)o(ute)f(that)h(cannot)h(be)f(left)g(out,)
+f(while)h Fh(y)g Fi(and)h Fh(z)f Fi(are)g(optional.)g(The)g(XML)g
+(parser)479 4235 y(indicates)h(the)f(application)g(whether)h
+Fh(y)f Fi(is)f(present)i(or)f(not,)f(b)o(ut)h(if)f Fh(z)h
+Fi(is)g(missing)g(the)g(def)o(ault)h(v)n(alue)f("one)h(tw)o(o)f(three")
+h(is)479 4332 y(returned)g(automatically)-5 b(.)479 4470
+y(This)19 b(is)f(a)h(v)n(alid)g(e)o(xample)h(of)f(these)g(attrib)o
+(utes:)479 4641 y Fh()p Black 3798 5278 a Fr(16)p
+Black eop
+%%Page: 17 17
+17 16 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black -2 583 a Fp(1.2.5.)35 b(P)l(ar)n(sed)g(entities)396
+751 y Fv(Elements)20 b(describe)f(the)i(logical)e(structure)h(of)g(the)
+g(document,)e(while)i Fr(entities)g Fv(determine)f(the)h(physical)g
+(structure.)396 859 y(Entities)h(are)f(the)g(pieces)g(of)g(te)o(xt)g
+(the)g(parser)g(operates)f(on,)h(mostly)g(\002les)h(and)f(macros.)f
+(Entities)h(may)g(be)g Fr(par)o(sed)i Fv(in)396 967 y(which)e(case)h
+(the)f(parser)f(reads)h(the)g(te)o(xt)h(and)e(interprets)g(it)i(as)g
+(XML)g(markup,)d(or)i Fr(unpar)o(sed)h Fv(which)e(simply)h(means)396
+1075 y(that)h(the)f(data)g(of)g(the)g(entity)g(has)g(a)h(foreign)d
+(format)h(\(e.g.)h(a)g(GIF)h(icon\).)396 1224 y(If)f(the)g(parsed)f
+(entity)g(is)i(going)e(to)h(be)g(used)f(as)i(part)e(of)h(the)g(DTD,)g
+(it)g(is)h(called)f(a)g Fr(par)o(ameter)f(entity)p Fv(.)h(Y)-9
+b(ou)19 b(can)h(declare)396 1332 y(a)h(parameter)e(entity)g(with)i(a)f
+(\002x)o(ed)g(te)o(xt)g(as)h(content)e(by:)396 1512 y
+Fo(<)p Fq(!ENTITY)44 b(\045)g Fn(name)g Fq(")p Fn(value)p
+Fq(")p Fo(>)396 1703 y Fv(W)m(ithin)20 b(the)h(DTD,)f(you)f(can)h
+Fr(r)m(efer)h(to)f Fv(this)h(entity)-5 b(,)19 b(i.e.)i(read)e(the)h(te)
+o(xt)g(of)g(the)h(entity)-5 b(,)19 b(by:)396 1883 y Fq(\045)p
+Fn(name)p Fq(;)396 2074 y Fv(Such)h(entities)h(beha)n(v)o(e)e(lik)o(e)h
+(macros,)f(i.e.)i(when)e(the)o(y)h(are)g(referred)e(to,)i(the)g(macro)g
+(te)o(xt)g(is)h(inserted)e(and)h(read)396 2182 y(instead)g(of)g(the)g
+(original)f(te)o(xt.)396 2478 y Fj(Example)479 2625 y
+Fi(F)o(or)g(e)o(xample,)g(you)h(can)f(declare)h(tw)o(o)f(elements)g
+(with)f(the)h(same)h(content)f(model)h(by:)479 2795 y
+Fh()479
+2882 y()479 2970 y()396 3202 y Fv(If)20 b(the)h(contents)e(of)h(the)g
+(entity)g(are)g(gi)n(v)o(en)f(as)i(string)f(constant,)f(the)h(entity)g
+(is)h(called)f(an)g Fr(internal)g Fv(entity)-5 b(.)19
+b(It)i(is)g(also)396 3310 y(possible)f(to)h(name)e(a)i(\002le)g(to)f
+(be)g(used)g(as)h(content)e(\(an)h Fr(e)n(xternal)g Fv(entity\):)396
+3490 y Fo(<)p Fq(!ENTITY)44 b(\045)g Fn(name)g Fq(SYSTEM)g(")p
+Fn(file)g(name)p Fq(")p Fo(>)396 3681 y Fv(There)20 b(are)g(some)g
+(restrictions)f(for)h(parameter)f(entities:)p Black 396
+4038 a Ft(\225)p Black 60 w Fv(If)h(the)h(internal)e(parameter)g
+(entity)g(contains)h(the)g(\002rst)h(tok)o(en)e(of)h(a)h(declaration)e
+(\(i.e.)g Fo(<)p Fq(!)p Fv(\),)h(it)h(must)f(also)h(contain)479
+4146 y(the)f(last)i(tok)o(en)d(of)h(the)g(declaration,)e(i.e.)j(the)f
+Fo(>)p Fv(.)g(This)g(means)g(that)h(the)f(entity)g(either)g(contains)f
+(a)i(whole)e(number)479 4254 y(of)h(complete)f(declarations,)g(or)h
+(some)g(te)o(xt)g(from)f(the)h(middle)g(of)g(one)f(declaration.)479
+4404 y Fr(Ille)m(gal:)479 4542 y Fq(">)479 4639 y()j Fv(is)h(contained)e(in)h(the)h(entity)e Fq(e)p
+Fv(.)p Black 3797 5278 a Fr(17)p Black eop
+%%Page: 18 18
+18 17 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black Black 396 579 a Ft(\225)p Black 60 w Fv(If)f(the)h(internal)e
+(parameter)g(entity)g(contains)h(a)h(left)f(paranthesis,)f(it)i(must)f
+(also)h(contain)e(the)h(corresponding)d(right)479 687
+y(paranthesis.)479 836 y Fr(Ille)m(gal:)479 975 y Fq()479 1072 y()479 1222 y Fv(Because)21 b Fq(\()f Fv(is)h(contained)e(in)h
+(the)g(entity)g Fq(e)p Fv(,)h(and)e(the)i(corresponding)16
+b Fq(\))21 b Fv(is)g(contained)e(in)h(the)g(main)g(entity)-5
+b(.)p Black 396 1371 a Ft(\225)p Black 60 w Fv(When)20
+b(reading)e(te)o(xt)i(from)f(an)g(entity)-5 b(,)19 b(the)h(parser)f
+(automatically)f(inserts)i(one)g(space)f(character)g(before)f(the)i
+(entity)479 1479 y(te)o(xt)g(and)g(one)g(space)g(character)f(after)h
+(the)g(entity)g(te)o(xt.)f(Ho)n(we)n(v)o(er)m(,)f(this)j(rule)f(is)h
+(not)f(applied)f(within)h(the)g(de\002nition)479 1587
+y(of)g(another)f(entity)-5 b(.)479 1736 y Fr(Le)m(gal:)479
+1875 y Fq()479 1972
+y()479 2121
+y Fv(Because)21 b Fq(\045suffix;)e Fv(is)i(referenced)d(within)i(the)g
+(de\002nition)f(te)o(xt)h(for)g Fq(iconfile)p Fv(,)f(no)h(additional)f
+(spaces)h(are)479 2229 y(added.)479 2379 y Fr(Ille)m(gal:)479
+2517 y Fq()479 2615
+y()479 2764 y Fv(Because)21
+b Fq(\045suffix;)e Fv(is)i(referenced)d(outside)i(the)g(de\002nition)f
+(te)o(xt)h(of)g(another)f(entity)-5 b(,)19 b(the)h(parser)g(replaces)
+479 2872 y Fq(\045suffix;)g Fv(by)f Fn(space)p Fq(test)p
+Fn(space)p Fv(.)479 3021 y Fr(Ille)m(gal:)479 3160 y
+Fq()479
+3257 y()479 3407 y Fv(Because)21
+b(there)e(is)j(a)e(whitespace)g(between)f Fq(\))i Fv(and)e
+Fq(*)p Fv(,)i(which)e(is)i(ille)o(gal.)p Black 396 3556
+a Ft(\225)p Black 60 w Fv(An)f(e)o(xternal)f(parameter)g(entity)h(must)
+g(al)o(w)o(ays)h(consist)f(of)g(a)h(whole)e(number)g(of)h(complete)f
+(declarations.)p Black 396 3664 a Ft(\225)p Black 60
+w Fv(In)h(the)g(internal)g(subset)g(of)g(the)g(DTD,)g(a)h(reference)d
+(to)j(a)f(parameter)f(entity)h(\(internal)f(or)h(e)o(xternal\))e(is)k
+(only)479 3772 y(allo)n(wed)e(at)h(positions)e(where)h(a)g(ne)n(w)g
+(declaration)f(can)h(start.)396 3963 y(If)g(the)f(parsed)g(entity)g(is)
+h(going)e(to)i(be)f(used)g(in)h(the)f(document)e(instance,)i(it)h(is)h
+(called)e(a)h Fr(g)o(ener)o(al)e(entity)p Fv(.)h(Such)g(entities)396
+4071 y(can)h(be)g(used)g(as)h(abbre)n(viations)d(for)i(frequent)e
+(phrases,)i(or)g(to)g(include)f(e)o(xternal)g(\002les.)i(Internal)e
+(general)g(entities)i(are)396 4179 y(declared)e(as)i(follo)n(ws:)396
+4359 y Fo(<)p Fq(!ENTITY)44 b Fn(name)g Fq(")p Fn(value)p
+Fq(")p Fo(>)396 4550 y Fv(External)19 b(general)g(entities)i(are)f
+(declared)f(this)i(w)o(ay:)396 4730 y Fo(<)p Fq(!ENTITY)44
+b Fn(name)g Fq(SYSTEM)g(")p Fn(file)g(name)p Fq(")p Fo(>)p
+Black 3800 5278 a Fr(18)p Black eop
+%%Page: 19 19
+19 18 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 396 579 a Fv(References)f(to)g(general)f(entities)i(are)f
+(written)g(as:)396 759 y Fq(&)p Fn(name)p Fq(;)396 950
+y Fv(The)g(main)g(dif)n(ference)e(between)h(parameter)g(and)h(general)f
+(entities)h(is)i(that)e(the)g(former)f(are)h(only)f(recognized)f(in)j
+(the)396 1058 y(DTD)g(and)e(that)i(the)f(latter)g(are)g(only)g
+(recognized)e(in)i(the)g(document)e(instance.)i(As)h(the)f(DTD)g(is)i
+(parsed)d(before)g(the)396 1166 y(document,)f(the)i(parameter)f
+(entities)i(are)f(e)o(xpanded)d(\002rst;)k(for)f(e)o(xample)f(it)i(is)g
+(possible)f(to)g(use)h(the)f(content)f(of)h(a)396 1274
+y(parameter)f(entity)h(as)h(the)f(name)g(of)f(a)i(general)e(entity:)h
+Fq(&\045name;;)2557 1241 y Ff(1)2580 1274 y Fv(.)396
+1423 y(General)g(entities)g(must)h(respect)e(the)i(element)e(hierarchy)
+-5 b(.)17 b(This)k(means)f(that)g(there)g(must)g(be)g(an)g(end)g(tag)g
+(for)g(e)n(v)o(ery)396 1531 y(start)h(tag)f(in)h(the)f(entity)g(v)n
+(alue,)f(and)h(that)g(end)f(tags)i(without)e(corresponding)e(start)k
+(tags)f(are)g(not)g(allo)n(wed.)396 1777 y Fj(Example)479
+1924 y Fi(If)f(the)f(author)i(of)f(a)f(document)j(changes)f(sometimes,)
+f(it)f(is)g(w)o(orthwhile)h(to)g(set)f(up)i(a)e(general)i(entity)e
+(containing)i(the)f(names)479 2021 y(of)g(the)g(authors.)h(If)e(the)h
+(author)h(changes,)g(you)f(need)h(only)g(to)e(change)j(the)e
+(de\002nition)g(of)g(the)g(entity)-5 b(,)18 b(and)i(do)f(not)h(need)f
+(to)479 2118 y(check)h(all)f(occurrences)h(of)f(authors')h(names:)479
+2289 y Fh()479
+2469 y Fi(In)19 b(the)g(document)i(te)o(xt,)d(you)i(can)f(no)n(w)h
+(refer)e(to)h(the)g(author)h(names)f(by)h(writing)e Fh(&authors;)p
+Fi(.)479 2607 y Fe(Ille)m(gal:)h Fi(The)g(follo)n(wing)g(tw)o(o)g
+(entities)g(are)g(ille)o(gal)f(because)i(the)f(elements)g(in)g(the)g
+(de\002nition)g(do)g(not)h(nest)f(properly:)479 2778
+y Fh()q(">)479 2865 y(">)396 3139 y Fv(Earlier)20 b(in)g(this)h(introduction)d(we)i
+(e)o(xplained)e(that)j(there)e(are)i(substitutes)f(for)g(reserv)o(ed)e
+(characters:)i(<,)g(>,)396 3247 y(&,)f(',)h(and)f
+(".)g(These)h(are)g(simply)g(prede\002ned)e(general)h(entities;)i
+(note)f(that)g(the)o(y)g(are)g(the)g(only)396 3355 y(prede\002ned)e
+(entities.)j(It)f(is)h(allo)n(wed)f(to)g(de\002ne)g(these)g(entities)h
+(again)e(as)i(long)e(as)i(the)f(meaning)f(is)i(unchanged.)-2
+3725 y Fp(1.2.6.)35 b(Notations)g(and)e(unpar)n(sed)i(entities)396
+3892 y Fv(Unparsed)19 b(entities)i(ha)n(v)o(e)e(a)i(foreign)d(format)i
+(and)f(can)h(thus)g(not)g(be)g(read)g(by)g(the)g(XML)g(parser)-5
+b(.)20 b(Unparsed)f(entities)396 4000 y(are)h(al)o(w)o(ays)h(e)o
+(xternal.)e(The)h(format)f(of)h(an)g(unparsed)e(entity)i(must)g(ha)n(v)
+o(e)g(been)f(declared,)g(such)h(a)h(format)e(is)i(called)f(a)396
+4108 y Fr(notation)p Fv(.)f(The)g(entity)h(can)g(then)g(be)g(declared)f
+(by)h(referring)e(to)i(this)h(notation.)e(As)i(unparsed)d(entities)j
+(do)f(not)396 4216 y(contain)f(XML)i(te)o(xt,)e(it)i(is)h(not)d
+(possible)h(to)h(include)e(them)h(directly)f(into)h(the)g(document;)e
+(you)i(can)g(only)f(declare)396 4324 y(attrib)n(utes)h(such)g(that)h
+(names)e(of)h(unparsed)f(entities)h(are)h(acceptable)e(v)n(alues.)396
+4474 y(As)i(you)f(can)g(see,)g(unparsed)f(entities)h(are)g(too)g
+(complicated)f(in)h(order)f(to)h(ha)n(v)o(e)g(an)o(y)f(purpose.)g(It)h
+(is)h(almost)f(al)o(w)o(ays)396 4581 y(better)g(to)g(simply)g(pass)h
+(the)f(name)g(of)g(the)g(data)g(\002le)h(as)g(normal)e(attrib)n(ute)g
+(v)n(alue,)h(and)f(let)i(the)f(application)f(recognize)396
+4689 y(and)h(process)g(the)g(foreign)e(format.)p Black
+3800 5278 a Fr(19)p Black eop
+%%Page: 20 20
+20 19 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black -2 597 a Fx(1.3.)39 b(A)g(complete)f(e)n(xample:)g(The)h
+Fd(readme)k Fx(DTD)396 777 y Fv(The)20 b(reason)g(for)f
+Fr(r)m(eadme)h Fv(w)o(as)h(that)f(I)g(often)g(wrote)g(tw)o(o)g(v)o
+(ersions)f(of)h(\002les)h(such)f(as)h(README)g(and)e(INST)-8
+b(ALL)396 885 y(which)20 b(e)o(xplain)f(aspects)h(of)g(a)h(distrib)n
+(uted)e(softw)o(are)h(archi)n(v)o(e;)f(one)g(v)o(ersion)g(w)o(as)i
+(ASCII-formatted,)d(the)i(other)g(w)o(as)396 993 y(written)g(in)h
+(HTML.)e(Maintaining)g(both)g(v)o(ersions)h(means)f(double)g(amount)g
+(of)h(w)o(ork,)f(and)h(changes)f(of)h(one)f(v)o(ersion)396
+1101 y(may)h(be)g(for)o(gotten)e(in)i(the)g(other)f(v)o(ersion.)g(T)-7
+b(o)20 b(impro)o(v)o(e)e(this)j(situation)e(I)i(in)m(v)o(ented)d(the)i
+Fr(r)m(eadme)g Fv(DTD)g(which)f(allo)n(ws)396 1209 y(me)h(to)h
+(maintain)e(only)h(one)f(source)h(written)g(as)g(XML)h(document,)d(and)
+h(to)i(generate)e(the)h(ASCII)g(and)g(the)g(HTML)396
+1317 y(v)o(ersion)f(from)g(it.)396 1466 y(In)h(this)h(section,)f(I)g(e)
+o(xplain)f(only)g(the)i(DTD.)f(The)f Fr(r)m(eadme)h Fv(DTD)h(is)g
+(contained)d(in)j(the)f(PXP)h(distrib)n(ution)e(together)396
+1574 y(with)i(the)f(tw)o(o)g(con)m(v)o(erters)e(to)j(produce)d(ASCII)i
+(and)g(HTML.)g(Another)e(section)i(of)g(this)h(manual)e(describes)h
+(the)396 1682 y(HTML)g(con)m(v)o(erter)-5 b(.)396 1831
+y(The)20 b(documents)f(ha)n(v)o(e)g(a)i(simple)f(structure:)f(There)h
+(are)g(up)g(to)g(three)g(le)n(v)o(els)g(of)g(nested)g(sections,)g
+(paragraphs,)d(item)396 1939 y(lists,)22 b(footnotes,)c(hyperlinks,)g
+(and)h(te)o(xt)h(emphasis.)g(The)g(outermost)f(element)g(has)i(usually)
+e(the)h(type)g Fq(readme)p Fv(,)g(it)h(is)396 2047 y(declared)e(by)396
+2228 y Fq()396 2325
+y()396
+2613 y Fv(This)21 b(means)f(that)g(this)h(element)e(contains)h(one)f
+(or)h(more)f(sections)i(of)f(the)g(\002rst)h(le)n(v)o(el)f(\(element)f
+(type)h Fq(sect1)p Fv(\),)f(and)396 2721 y(that)i(the)f(element)f(has)i
+(a)f(required)f(attrib)n(ute)h Fq(title)f Fv(containing)g(character)g
+(data)h(\(CD)m(A)-9 b(T)h(A\).)19 b(Note)h(that)h Fq(readme)396
+2829 y Fv(elements)f(must)g(not)g(contain)f(te)o(xt)h(data.)396
+2978 y(The)g(three)g(le)n(v)o(els)g(of)g(sections)g(are)g(declared)f
+(as)i(follo)n(ws:)396 3158 y Fq()396 3352 y()396 3547 y()396 3738 y Fv(Ev)o(ery)19 b(section)h(has)g(a)h
+Fq(title)f Fv(element)g(as)g(\002rst)h(subelement.)e(After)h(the)g
+(title)h(an)f(arbitrary)f(b)n(ut)h(non-empty)396 3846
+y(sequence)f(of)h(inner)g(sections,)g(paragraphs)e(and)h(item)i(lists)g
+(follo)n(ws.)f(Note)g(that)g(the)g(inner)g(sections)g(must)g(belong)f
+(to)396 3954 y(the)h(ne)o(xt)g(higher)f(section)h(le)n(v)o(el;)g
+Fq(sect3)g Fv(elements)f(must)i(not)f(contain)f(inner)g(sections)h
+(because)g(there)g(is)h(no)e(ne)o(xt)396 4061 y(higher)g(le)n(v)o(el.)
+396 4211 y(Ob)o(viously)-5 b(,)18 b(all)j(three)f(declarations)e(allo)n
+(w)j(paragraphs)d(\()p Fq(p)p Fv(\))h(and)h(item)g(lists)i(\()p
+Fq(ul)p Fv(\).)e(The)f(de\002nition)g(can)h(be)396 4319
+y(simpli\002ed)g(at)h(this)g(point)e(by)h(using)f(a)i(parameter)e
+(entity:)396 4499 y Fq()396
+4693 y()p
+Black 3800 5278 a Fr(20)p Black eop
+%%Page: 21 21
+21 20 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 396 579 a Fq()396 773 y()396 964 y Fv(Here,)20 b(the)g(entity)g
+Fq(p.like)g Fv(is)h(nothing)e(b)n(ut)h(a)g(macro)g(abbre)n(viating)d
+(the)j(same)h(sequence)e(of)h(declarations;)f(if)h(ne)n(w)396
+1072 y(elements)f(on)h(the)f(same)h(le)n(v)o(el)f(as)h
+Fq(p)g Fv(and)f Fq(ul)h Fv(are)f(later)h(added,)e(it)i(is)h(suf)n
+(\002cient)e(only)f(to)i(change)e(the)i(entity)f(de\002nition.)396
+1180 y(Note)h(that)h(there)e(are)i(some)f(restrictions)f(on)h(the)g
+(usage)g(of)g(entities)h(in)f(this)h(conte)o(xt;)e(most)h(important,)e
+(entities)396 1288 y(containing)h(a)h(left)h(paranthesis)e(must)h(also)
+h(contain)e(the)h(corresponding)d(right)i(paranthesis.)396
+1437 y(Note)h(that)h(the)f(entity)g Fq(p.like)g Fv(is)h(a)f
+Fr(par)o(ameter)i Fv(entity)-5 b(,)19 b(i.e.)h(the)g(ENTITY)g
+(declaration)e(contains)i(a)g(percent)f(sign,)396 1545
+y(and)h(the)g(entity)g(is)h(referred)e(to)h(by)g Fq(\045p.like;)p
+Fv(.)f(This)h(kind)g(of)f(entity)h(must)h(be)f(used)g(to)g(abbre)n
+(viate)e(parts)j(of)f(the)396 1653 y(DTD;)g(the)g Fr(g)o(ener)o(al)f
+Fv(entities)h(declared)e(without)h(percent)g(sign)h(and)f(referred)f
+(to)i(as)g Fq(&name;)f Fv(are)h(not)f(allo)n(wed)g(in)h(this)396
+1761 y(conte)o(xt.)396 1911 y(The)g Fq(title)g Fv(element)g
+(speci\002es)g(the)h(title)f(of)g(the)h(section)f(in)g(which)g(it)g
+(occurs.)g(The)f(title)i(is)h(gi)n(v)o(en)c(as)j(character)396
+2019 y(data,)f(optionally)f(interspersed)f(with)j(line)f(breaks)g(\()p
+Fq(br)p Fv(\):)396 2199 y Fq()
+396 2390 y Fv(Compared)19 b(with)h(the)g Fq(title)g Fr(attrib)n(ute)g
+Fv(of)g(the)h Fq(readme)e Fv(element,)h(this)g(element)g(allo)n(ws)g
+(inner)g(markup)e(\(i.e.)i Fq(br)p Fv(\))396 2498 y(while)g(attrib)n
+(ute)g(v)n(alues)g(do)g(not:)g(It)g(is)h(an)g(error)e(if)h(an)g(attrib)
+n(ute)g(v)n(alue)g(contains)f(the)h(left)h(angle)e(brack)o(et)g
+Fm(<)i Fv(literally)396 2605 y(such)f(that)g(it)h(is)h(impossible)d(to)
+h(include)g(inner)f(elements.)396 2755 y(The)h(paragraph)e(element)h
+Fq(p)i Fv(has)f(a)h(structure)e(similar)i(to)f Fq(title)p
+Fv(,)g(b)n(ut)g(it)h(allo)n(ws)f(more)g(inner)f(elements:)396
+2935 y Fq()396
+3129 y()396 3320
+y Fv(Line)20 b(breaks)g(do)f(not)h(ha)n(v)o(e)g(inner)f(structure,)g
+(so)i(the)o(y)e(are)h(declared)f(as)i(being)e(empty:)396
+3500 y Fq()396 3691 y Fv(This)21
+b(means)f(that)g(really)g(nothing)e(is)j(allo)n(wed)f(within)g
+Fq(br)p Fv(;)g(you)f(must)i(al)o(w)o(ays)f(write)h Fq( )e
+Fv(or)h(abbre)n(viated)396 3799 y Fq( )p Fv(.)396
+3949 y(Code)g(samples)h(should)e(be)h(mark)o(ed)f(up)h(by)f(the)h
+Fq(code)h Fv(tag;)f(emphasized)f(te)o(xt)h(can)g(be)g(indicated)f(by)h
+Fq(em)p Fv(:)396 4129 y Fq()396
+4323 y()396 4514
+y Fv(That)20 b Fq(code)g Fv(elements)g(are)g(not)g(allo)n(wed)g(to)g
+(contain)f(further)g(markup)f(while)i Fq(em)h Fv(elements)f(do)g(is)h
+(a)f(design)g(decision)396 4622 y(by)g(the)g(author)f(of)h(the)g(DTD.)
+396 4772 y(Unordered)e(lists)k(simply)d(consists)i(of)f(one)g(or)g
+(more)f(list)i(items,)g(and)e(a)i(list)g(item)g(may)e(contain)g
+(paragraph-le)n(v)o(el)396 4879 y(material:)p Black 3800
+5278 a Fr(21)p Black eop
+%%Page: 22 22
+22 21 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 396 579 a Fq()396 773
+y()396 964 y Fv(F)o(ootnotes)19
+b(are)h(described)f(by)h(the)g(te)o(xt)g(of)g(the)g(note;)g(this)h(te)o
+(xt)f(may)g(contain)f(te)o(xt-le)n(v)o(el)g(markup.)f(There)h(is)i(no)
+396 1072 y(mechanism)e(to)i(describe)e(the)h(numbering)e(scheme)h(of)h
+(footnotes,)f(or)h(to)g(specify)g(ho)n(w)f(footnote)g(references)f(are)
+396 1180 y(printed.)396 1360 y Fq()396 1551 y Fv(Hyperlinks)19
+b(are)h(written)g(as)h(in)f(HTML.)g(The)g(anchor)f(tag)h(contains)f
+(the)h(te)o(xt)g(describing)f(where)h(the)g(link)g(points)g(to,)396
+1659 y(and)g(the)g Fq(href)g Fv(attrib)n(ute)g(is)h(the)f(pointer)f
+(\(as)i(URL\).)f(There)f(is)j(no)d(w)o(ay)i(to)f(describe)f(locations)h
+(of)g("hash)g(marks".)f(If)396 1767 y(the)h(link)g(refers)g(to)g
+(another)f Fr(r)m(eadme)h Fv(document,)e(the)i(attrib)n(ute)g
+Fq(readmeref)f Fv(should)g(be)h(used)g(instead)g(of)g
+Fq(href)p Fv(.)396 1875 y(The)g(reason)g(is)h(that)f(the)g(con)m(v)o
+(erted)e(document)g(has)i(usually)g(a)h(dif)n(ferent)d(system)i
+(identi\002er)g(\(\002le)h(name\),)d(and)i(the)396 1983
+y(link)g(to)h(a)f(con)m(v)o(erted)e(document)g(must)i(be)g(con)m(v)o
+(erted,)e(too.)396 2163 y Fq()396
+2260 y()396
+2742 y Fv(Note)20 b(that)h(although)d(it)j(is)g(only)e(sensible)i(to)f
+(specify)g(one)f(of)h(the)g(tw)o(o)h(attrib)n(utes,)f(the)g(DTD)g(has)h
+(no)e(means)h(to)396 2850 y(e)o(xpress)g(this)g(restriction.)396
+3000 y(So)h(f)o(ar)f(the)g(DTD.)g(Finally)-5 b(,)19 b(here)h(is)h(a)g
+(document)d(for)i(it:)396 3180 y Fq()396 3277 y()396 3374 y()396 3471 y()486 3569 y
+(Usage )486 3666 y()576 3763 y(The)g(readme )e
+(converter)i(is)g(invoked)g(on)g(the)h(command)e(line)h(by:)486
+3860 y(
)486 3957 y()576 4054 y(readme)e([)j(-text)f(|)h
+(-html)f(])g(input.xml
)486 4151 y(
)486 4248
+y()576 4346 y(Here)g(a)g(list)h(of)f(options:)486
+4443 y(
)486 4540 y()486 1162
+y()576 1259 y(The)f(input)g(file)g(must)g(be)h(given)f(on)g(the)h
+(command)e(line.)h(The)h(converted)e(output)h(is)576
+1356 y(printed)f(to)i(stdout .)486 1453 y(
)396
+1550 y( )396 1647 y()486 1745 y(Author )486
+1842 y()576 1939 y(The)f(program)g(has)g(been)g(written)g(by)576
+2036 y(Ge)
+o(rd)39 b(Stolpmann .)486 2133 y(
)396 2230 y( )396
+2327 y( )-2 2746 y Fx(Notes)p Black 396 2926
+a Fv(1.)p Black 70 w(This)20 b(construct)g(is)h(only)e(allo)n(wed)h
+(within)g(the)g(de\002nition)f(of)h(another)e(entity;)i(otherwise)g(e)o
+(xtra)f(spaces)i(w)o(ould)529 3034 y(be)f(added)f(\(as)i(e)o(xplained)d
+(abo)o(v)o(e\).)g(Such)i(indirection)e(is)j(not)f(recommended.)529
+3172 y Fi(Complete)f(e)o(xample:)529 3343 y Fh()243 b()529
+3430 y()529
+3518 y()529
+3605 y()529
+3785 y Fi(Y)-8 b(ou)19 b(can)h(no)n(w)f(write)f Fh(&text;)j
+Fi(in)e(the)g(document)h(instance,)f(and)h(depending)h(on)e(the)g(v)n
+(alue)g(of)g Fh(variant)i Fi(either)e Fh(text-a)i Fi(or)529
+3882 y Fh(text-b)g Fi(is)d(inserted.)p Black 3800 5278
+a Fr(23)p Black eop
+%%Page: 24 24
+24 23 bop Black Black -2 621 a Fs(Chapter)48 b(2.)f(Using)i(PXP)-2
+1055 y Fx(2.1.)39 b(V)-9 b(alidation)396 1235 y Fv(The)20
+b(parser)g(can)g(be)g(used)g(to)g Fr(validate)f Fv(a)i(document.)d
+(This)i(means)g(that)g(all)h(the)f(constraints)g(that)g(must)g(hold)g
+(for)f(a)396 1343 y(v)n(alid)h(document)e(are)i(actually)g(check)o(ed.)
+f(V)-9 b(alidation)19 b(is)i(the)f(def)o(ault)f(mode)h(of)g(PXP,)g
+(i.e.)h(e)n(v)o(ery)d(document)h(is)396 1451 y(v)n(alidated)g(while)i
+(it)f(is)i(being)d(parsed.)396 1600 y(In)h(the)g Fq(examples)g
+Fv(directory)e(of)i(the)g(distrib)n(ution)f(you)h(\002nd)g(the)g
+Fq(pxpvalidate)f Fv(application.)f(It)j(is)g(in)m(v)n(ok)o(ed)d(in)j
+(the)396 1708 y(follo)n(wing)e(w)o(ay:)396 1888 y Fq(pxpvalidate)43
+b([)i(-wf)f(])h Fn(file)p Fq(...)396 2079 y Fv(The)20
+b(\002les)h(mentioned)e(on)g(the)i(command)d(line)i(are)g(v)n
+(alidated,)f(and)h(e)n(v)o(ery)e(w)o(arning)h(and)h(e)n(v)o(ery)f
+(error)g(messages)h(are)396 2187 y(printed)f(to)i(stderr)-5
+b(.)396 2337 y(The)20 b(-wf)g(switch)h(modi\002es)e(the)i(beha)n(viour)
+d(such)i(that)g(a)h(well-formedness)d(parser)h(is)i(simulated.)f(In)g
+(this)g(mode,)f(the)396 2445 y(ELEMENT)-6 b(,)19 b(A)-9
+b(TTLIST)j(,)19 b(and)g(NO)m(T)-8 b(A)f(TION)20 b(declarations)f(of)h
+(the)g(DTD)g(are)g(ignored,)e(and)i(only)f(the)i(ENTITY)396
+2553 y(declarations)e(will)i(tak)o(e)f(ef)n(fect.)g(This)g(mode)f(is)i
+(intended)e(for)h(documents)e(lacking)h(a)i(DTD.)f(Please)h(note)f
+(that)g(the)396 2661 y(parser)g(still)h(scans)g(the)f(DTD)g(fully)g
+(and)g(will)h(report)e(all)h(errors)g(in)g(the)g(DTD;)h(such)f(checks)f
+(are)h(not)g(required)f(by)g(a)396 2769 y(well-formedness)f(parser)-5
+b(.)396 2918 y(The)20 b Fq(pxpvalidate)f Fv(application)g(is)i(the)f
+(simplest)h(sensible)f(program)e(using)i(PXP,)g(you)g(may)f(consider)g
+(it)i(as)396 3026 y("hello)f(w)o(orld")f(program.)-2
+3445 y Fx(2.2.)39 b(Ho)n(w)g(to)g(par)n(se)f(a)i(document)d(fr)m(om)i
+(an)g(application)396 3624 y Fv(Let)21 b(me)f(\002rst)h(gi)n(v)o(e)e(a)
+i(rough)d(o)o(v)o(ervie)n(w)g(of)i(the)h(object)e(model)g(of)h(the)h
+(parser)-5 b(.)19 b(The)h(follo)n(wing)f(items)h(are)h(represented)396
+3732 y(by)f(objects:)p Black 396 4055 a Ft(\225)p Black
+60 w Fr(Documents:)f Fv(The)h(document)e(representation)g(is)j(more)e
+(or)h(less)h(the)f(anchor)f(for)g(the)h(application;)f(all)i(accesses)g
+(to)479 4163 y(the)f(parsed)g(entities)h(start)f(here.)g(It)g(is)h
+(described)e(by)h(the)g(class)h Fq(document)f Fv(contained)e(in)j(the)f
+(module)479 4271 y Fq(Pxp_document)p Fv(.)f(Y)-9 b(ou)19
+b(can)h(get)h(some)f(global)f(information,)e(such)j(as)h(the)f(XML)h
+(declaration)d(the)j(document)479 4379 y(be)o(gins)f(with,)g(the)g(DTD)
+g(of)g(the)g(document,)e(global)i(processing)e(instructions,)h(and)h
+(most)g(important,)f(the)479 4487 y(document)f(tree.)p
+Black 396 4595 a Ft(\225)p Black 60 w Fr(The)j(contents)e(of)h
+(documents:)f Fv(The)h(contents)f(ha)n(v)o(e)h(the)g(structure)f(of)h
+(a)h(tree:)f(Elements)g(contain)f(other)g(elements)479
+4703 y(and)h(te)o(xt)744 4670 y Ff(1)768 4703 y Fv(.)h(The)e(common)g
+(type)h(to)g(represent)f(both)g(kinds)h(of)g(content)f(is)i
+Fq(node)f Fv(which)g(is)h(a)g(class)g(type)e(that)479
+4811 y(uni\002es)h(the)h(properties)d(of)i(elements)g(and)g(character)f
+(data.)h(Ev)o(ery)e(node)i(has)g(a)h(list)g(of)f(children)f(\(which)g
+(is)i(empty)p Black 3800 5278 a Fr(24)p Black eop
+%%Page: 25 25
+25 24 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 479 579 a Fv(if)h(the)f(element)g(is)h(empty)e(or)h(the)g(node)f
+(represents)h(te)o(xt\);)f(nodes)h(may)g(ha)n(v)o(e)f(attrib)n(utes;)h
+(nodes)g(ha)n(v)o(e)f(al)o(w)o(ays)i(te)o(xt)479 687
+y(contents.)d(There)g(are)g(tw)o(o)h(implementations)e(of)h
+Fq(node)p Fv(,)h(the)f(class)i Fq(element_impl)d Fv(for)h(elements,)g
+(and)g(the)h(class)479 795 y Fq(data_impl)h Fv(for)f(te)o(xt)h(data.)g
+(Y)-9 b(ou)20 b(\002nd)f(these)i(classes)g(and)f(class)h(types)f(in)g
+(the)g(module)f Fq(Pxp_document)p Fv(,)g(too.)479 944
+y(Note)h(that)h(attrib)n(ute)f(lists)h(are)f(represented)f(by)g
+(non-class)h(v)n(alues.)p Black 396 1094 a Ft(\225)p
+Black 60 w Fr(The)h(node)e(e)n(xtension:)g Fv(F)o(or)h(adv)n(anced)e
+(usage,)i(e)n(v)o(ery)e(node)i(of)f(the)i(document)d(may)i(ha)n(v)o(e)f
+(an)h(associated)479 1202 y Fr(e)n(xtension)g Fv(which)g(is)h(simply)f
+(a)g(second)f(object.)h(This)g(object)g(must)g(ha)n(v)o(e)g(the)g
+(three)g(methods)f Fq(clone)p Fv(,)g Fq(node)p Fv(,)h(and)479
+1310 y Fq(set_node)f Fv(as)h(bare)f(minimum,)e(b)n(ut)j(you)e(are)i
+(free)e(to)i(add)f(methods)f(as)i(you)f(w)o(ant.)g(This)g(is)i(the)e
+(preferred)e(w)o(ay)j(to)479 1417 y(add)g(functionality)e(to)i(the)h
+(document)d(tree)1746 1384 y Ff(2)1770 1417 y Fv(.)j(The)e(class)j
+(type)d Fq(extension)h Fv(is)h(de\002ned)e(in)h Fq(Pxp_document)p
+Fv(,)f(too.)p Black 396 1525 a Ft(\225)p Black 60 w Fr(The)i(DTD:)f
+Fv(Sometimes)g(it)h(is)g(necessary)e(to)i(access)f(the)h(DTD)f(of)g(a)h
+(document;)d(the)i(a)n(v)o(erage)f(application)g(does)479
+1633 y(not)h(need)g(this)g(feature.)f(The)h(class)h Fq(dtd)g
+Fv(describes)e(DTDs,)i(and)e(mak)o(es)h(it)h(possible)f(to)h(get)f
+(representations)e(of)479 1741 y(element,)i(entity)-5
+b(,)19 b(and)h(notation)e(declarations)h(as)i(well)g(as)g(processing)e
+(instructions)g(contained)f(in)j(the)f(DTD.)479 1849
+y(This)g(class,)g(and)f Fq(dtd_element)p Fv(,)g Fq(dtd_notation)p
+Fv(,)e(and)i Fq(proc_instruction)f Fv(can)h(be)h(found)e(in)i(the)f
+(module)479 1957 y Fq(Pxp_dtd)p Fv(.)h(There)f(are)h(a)h(couple)e(of)h
+(classes)h(representing)d(dif)n(ferent)h(kinds)g(of)h(entities;)h
+(these)f(can)g(be)g(found)f(in)479 2065 y(the)h(module)f
+Fq(Pxp_entity)p Fv(.)396 2214 y(Additionally)-5 b(,)18
+b(the)i(follo)n(wing)f(modules)g(play)h(a)g(role:)p Black
+396 2447 a Ft(\225)p Black 60 w Fr(Pxp_yacc:)e Fv(Here)i(the)h(main)e
+(parsing)h(functions)e(such)i(as)h Fq(parse_document_entity)c
+Fv(are)k(located.)e(Some)479 2555 y(additional)g(types)h(and)g
+(functions)f(allo)n(w)h(the)g(parser)f(to)i(be)f(con\002gured)e(in)i(a)
+h(non-standard)c(w)o(ay)-5 b(.)p Black 396 2663 a Ft(\225)p
+Black 60 w Fr(Pxp_types:)19 b Fv(This)h(is)h(a)g(collection)e(of)h
+(basic)g(types)g(and)g(e)o(xceptions.)396 2812 y(There)g(are)g(some)g
+(further)e(modules)i(that)g(are)g(needed)f(internally)g(b)n(ut)h(are)g
+(not)g(part)g(of)g(the)g(API.)396 2962 y(Let)h(the)f(document)e(to)i
+(be)h(parsed)e(be)h(stored)g(in)g(a)h(\002le)g(called)f
+Fq(doc.xml)p Fv(.)f(The)h(parsing)f(process)h(is)h(started)f(by)396
+3070 y(calling)g(the)g(function)396 3250 y Fq(val)45
+b(parse_document_entity)c(:)k(config)e(->)i(source)f(->)g('ext)g(spec)h
+(->)f('ext)g(document)396 3441 y Fv(de\002ned)19 b(in)i(the)f(module)f
+Fq(Pxp_yacc)p Fv(.)g(The)h(\002rst)h(ar)o(gument)d(speci\002es)i(some)g
+(global)g(properties)e(of)i(the)g(parser;)g(it)h(is)396
+3549 y(recommended)c(to)j(start)g(with)g(the)g Fq(default_config)p
+Fv(.)e(The)h(second)g(ar)o(gument)e(determines)i(where)g(the)h
+(document)396 3657 y(to)h(be)f(parsed)f(comes)h(from;)f(this)i(may)f
+(be)g(a)g(\002le,)h(a)g(channel,)d(or)i(an)g(entity)g(ID.)g(T)-7
+b(o)21 b(parse)f Fq(doc.xml)p Fv(,)f(it)i(is)g(suf)n(\002cient)396
+3764 y(to)g(pass)f Fq(from_file)44 b("doc.xml")p Fv(.)396
+3914 y(The)20 b(third)g(ar)o(gument)e(passes)i(the)h(object)e
+(speci\002cation)h(to)g(use.)g(Roughly)f(speaking,)g(it)i(determines)e
+(which)g(classes)396 4022 y(implement)g(the)h(node)g(objects)f(of)h
+(which)g(element)g(types,)f(and)h(which)g(e)o(xtensions)f(are)h(to)g
+(be)g(used.)g(The)g Fq('ext)396 4130 y Fv(polymorphic)d(v)n(ariable)i
+(is)j(the)e(type)f(of)h(the)h(e)o(xtension.)d(F)o(or)i(the)g(moment,)f
+(let)i(us)f(simply)g(pass)h Fq(default_spec)d Fv(as)396
+4238 y(this)j(ar)o(gument,)d(and)h(ignore)g(it.)396 4387
+y(So)i(the)f(follo)n(wing)e(e)o(xpression)h(parses)h
+Fq(doc.xml)p Fv(:)396 4567 y Fq(open)44 b(Pxp_yacc)396
+4664 y(let)h(d)f(=)h(parse_document_entity)c(default_config)i
+(\(from_file)g("doc.xml"\))g(de-)396 4762 y(fault_spec)p
+Black 3800 5278 a Fr(25)p Black eop
+%%Page: 26 26
+26 25 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fv(Note)g(that)h Fq(default_config)d
+Fv(implies)i(that)h(w)o(arnings)e(are)h(collected)g(b)n(ut)g(not)g
+(printed.)e(Errors)h(raise)i(one)f(of)g(the)396 687 y(e)o(xception)f
+(de\002ned)g(in)h Fq(Pxp_types)p Fv(;)f(to)i(get)f(readable)f(errors)g
+(and)h(w)o(arnings)f(catch)h(the)g(e)o(xceptions)f(as)i(follo)n(ws:)396
+867 y Fq(class)44 b(warner)g(=)486 964 y(object)576 1061
+y(method)f(warn)i(w)f(=)665 1158 y(print_endline)f(\("WARNING:)g(")i(^)
+f(w\))486 1256 y(end)396 1353 y(;;)396 1547 y(try)486
+1644 y(let)g(config)g(=)h({)f(default_config)f(with)h(warner)g(=)h(new)
+f(warner)g(})g(in)486 1741 y(let)g(d)h(=)g(parse_document_entity)c
+(config)j(\(from_file)f("doc.xml"\))g(default_spec)486
+1838 y(in)576 1935 y(...)396 2033 y(with)531 2130 y(e)h(->)620
+2227 y(print_endline)f(\(Pxp_types.string_of_exn)e(e\))396
+2418 y Fv(No)n(w)20 b Fq(d)h Fv(is)g(an)f(object)g(of)g(the)g
+Fq(document)f Fv(class.)i(If)f(you)g(w)o(ant)g(the)g(node)f(tree,)h
+(you)g(can)g(get)g(the)g(root)f(element)h(by)396 2598
+y Fq(let)45 b(root)f(=)g(d)h(#)g(root)396 2789 y Fv(and)20
+b(if)g(you)g(w)o(ould)f(rather)h(lik)o(e)g(to)g(access)h(the)f(DTD,)g
+(determine)f(it)i(by)396 2969 y Fq(let)45 b(dtd)f(=)h(d)f(#)h(dtd)396
+3160 y Fv(As)21 b(it)g(is)g(more)f(interesting,)f(let)h(us)h(in)m(v)o
+(estigate)e(the)h(node)f(tree)h(no)n(w)-5 b(.)19 b(Gi)n(v)o(en)g(the)i
+(root)e(element,)g(it)i(is)h(possible)d(to)396 3268 y(recursi)n(v)o
+(ely)f(tra)n(v)o(erse)h(the)h(whole)f(tree.)g(The)g(children)g(of)g(a)h
+(node)f Fq(n)h Fv(are)f(returned)f(by)h(the)h(method)e
+Fq(sub_nodes)p Fv(,)g(and)396 3376 y(the)i(type)g(of)g(a)h(node)e(is)i
+(returned)d(by)i Fq(node_type)p Fv(.)f(This)i(function)d(tra)n(v)o
+(erses)i(the)g(tree,)g(and)g(prints)g(the)g(type)f(of)h(each)396
+3484 y(node:)396 3664 y Fq(let)45 b(rec)f(print_structure)e(n)j(=)486
+3761 y(let)f(ntype)g(=)h(n)g(#)f(node_type)g(in)486 3858
+y(match)g(ntype)g(with)576 3955 y(T_element)f(name)h(->)665
+4053 y(print_endline)f(\("Element)g(of)i(type)f(")h(^)f(name\);)665
+4150 y(let)h(children)e(=)i(n)f(#)h(sub_nodes)e(in)665
+4247 y(List.iter)h(print_structure)e(children)486 4344
+y(|)j(T_data)e(->)665 4441 y(print_endline)g("Data")486
+4538 y(|)i(_)f(->)665 4635 y(\(*)h(Other)f(node)g(types)g(are)g(not)h
+(possible)e(unless)h(the)g(parser)g(is)h(configured)800
+4733 y(differently.)710 4830 y(*\))p Black 3798 5278
+a Fr(26)p Black eop
+%%Page: 27 27
+27 26 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 665 579 a Fq(assert)44 b(false)396 770 y Fv(Y)-9
+b(ou)20 b(can)g(call)g(this)h(function)e(by)396 950 y
+Fq(print_structure)43 b(root)396 1141 y Fv(The)20 b(type)g(returned)e
+(by)i Fq(node_type)f Fv(is)i(either)f Fq(T_element)43
+b(name)21 b Fv(or)e Fq(T_data)p Fv(.)h(The)g Fq(name)g
+Fv(of)g(the)g(element)g(type)396 1249 y(is)h(the)g(string)e(included)g
+(in)i(the)f(angle)f(brack)o(ets.)h(Note)g(that)g(only)f(elements)h(ha)n
+(v)o(e)g(children;)f(data)h(nodes)f(are)h(al)o(w)o(ays)396
+1357 y(lea)n(v)o(es)h(of)e(the)i(tree.)396 1506 y(There)f(are)g(some)g
+(more)f(methods)g(in)i(order)e(to)h(access)h(a)f(parsed)g(node)f(tree:)
+p Black 396 1739 a Ft(\225)p Black 60 w Fq(n)45 b(#)g(parent)p
+Fv(:)19 b(Returns)h(the)h(parent)e(node,)g(or)h(raises)h
+Fq(Not_found)e Fv(if)h(the)g(node)g(is)h(already)e(the)h(root)p
+Black 396 1847 a Ft(\225)p Black 60 w Fq(n)45 b(#)g(root)p
+Fv(:)20 b(Returns)g(the)g(root)g(of)f(the)i(node)e(tree.)p
+Black 396 1955 a Ft(\225)p Black 60 w Fq(n)45 b(#)g(attribute)e(a)p
+Fv(:)21 b(Returns)f(the)g(v)n(alue)f(of)h(the)g(attrib)n(ute)g(with)h
+(name)e Fq(a)p Fv(.)i(The)e(method)g(returns)h(a)g(v)n(alue)g(for)479
+2063 y(e)n(v)o(ery)f Fr(declar)m(ed)j Fv(attrib)n(ute,)d(independently)
+e(of)j(whether)f(the)i(attrib)n(ute)e(instance)h(is)h(de\002ned)e(or)h
+(not.)g(If)g(the)479 2170 y(attrib)n(ute)g(is)h(not)f(declared,)f
+Fq(Not_found)g Fv(will)i(be)f(raised.)g(\(In)f(well-formedness)f(mode,)
+h(e)n(v)o(ery)g(attrib)n(ute)h(is)479 2278 y(considered)f(as)i(being)e
+(implicitly)h(declared)e(with)j(type)f Fq(CDATA)p Fv(.\))479
+2428 y(The)g(follo)n(wing)f(return)g(v)n(alues)g(are)i(possible:)f
+Fq(Value)44 b(s)p Fv(,)20 b Fq(Valuelist)43 b(sl)21 b
+Fv(,)f(and)g Fq(Implied_value)p Fv(.)e(The)i(\002rst)479
+2536 y(tw)o(o)h(v)n(alue)e(types)h(indicate)g(that)g(the)g(attrib)n
+(ute)g(v)n(alue)g(is)h(a)n(v)n(ailable,)e(either)h(because)g(there)f
+(is)i(a)g(de\002nition)479 2644 y Fn(a)p Fq(=")p Fn(value)p
+Fq(")f Fv(in)g(the)g(XML)g(te)o(xt,)g(or)g(because)g(there)f(is)i(a)g
+(def)o(ault)e(v)n(alue)h(\(declared)f(in)h(the)g(DTD\).)g(Only)g(if)g
+(both)479 2752 y(the)g(instance)g(de\002nition)f(and)h(the)g(def)o
+(ault)g(declaration)e(are)i(missing,)g(the)h(latter)f(v)n(alue)f
+Fq(Implied_value)g Fv(will)479 2860 y(be)h(returned.)479
+3009 y(In)g(the)g(DTD,)h(e)n(v)o(ery)d(attrib)n(ute)i(is)h(typed.)e
+(There)h(are)g(single-v)n(alue)e(types)i(\(CD)m(A)-9
+b(T)h(A,)20 b(ID,)g(IDREF)-7 b(,)21 b(ENTITY)-11 b(,)479
+3117 y(NMT)o(OKEN,)19 b(enumerations\),)f(in)i(which)g(case)g(the)h
+(method)d(passes)j Fq(Value)44 b(s)21 b Fv(back,)e(where)h
+Fq(s)g Fv(is)h(the)479 3225 y(normalized)e(string)g(v)n(alue)h(of)g
+(the)g(attrib)n(ute.)g(The)f(other)h(types)g(\(IDREFS,)g(ENTITIES,)f
+(NMT)o(OKENS\))479 3333 y(represent)g(list)j(v)n(alues,)d(and)h(the)g
+(parser)g(splits)h(the)f(XML)g(literal)h(into)e(se)n(v)o(eral)h(tok)o
+(ens)g(and)f(returns)h(these)g(tok)o(ens)479 3441 y(as)h
+Fq(Valuelist)44 b(sl)p Fv(.)479 3590 y(Normalization)19
+b(means)h(that)g(entity)g(references)e(\(the)i Fq(&)p
+Fn(name)p Fq(;)g Fv(tok)o(ens\))f(and)h(character)f(references)479
+3698 y(\()p Fq()p Fn(number)s Fq(;)p Fv(\))g(are)h(replaced)f(by)g
+(the)i(te)o(xt)f(the)o(y)f(represent,)g(and)h(that)g(white)g(space)g
+(characters)f(are)i(con)m(v)o(erted)479 3806 y(into)f(plain)g(spaces.)p
+Black 396 3955 a Ft(\225)p Black 60 w Fq(n)45 b(#)g(data)p
+Fv(:)20 b(Returns)g(the)g(character)f(data)h(contained)f(in)h(the)g
+(node.)f(F)o(or)h(data)g(nodes,)f(the)h(meaning)f(is)i(ob)o(vious)479
+4063 y(as)g(this)g(is)g(the)f(main)g(content)f(of)h(data)g(nodes.)f(F)o
+(or)h(element)g(nodes,)f(this)i(method)e(returns)g(the)h(concatenated)
+479 4171 y(contents)g(of)g(all)g(inner)g(data)g(nodes.)479
+4321 y(Note)g(that)h(entity)f(references)e(included)h(in)h(the)h(te)o
+(xt)f(are)g(resolv)o(ed)f(while)h(the)o(y)f(are)h(being)g(parsed;)f
+(for)h(e)o(xample)479 4429 y(the)g(te)o(xt)h("a)f(<>)g(b")g(will)
+h(be)f(returned)e(as)j("a)g(<>)f(b")g(by)g(this)h(method.)d(Spaces)j
+(of)f(data)g(nodes)f(are)h(al)o(w)o(ays)479 4537 y(preserv)o(ed.)e(Ne)n
+(wlines)j(are)f(preserv)o(ed,)e(b)n(ut)i(al)o(w)o(ays)g(con)m(v)o
+(erted)e(to)i(\\n)h(characters)e(e)n(v)o(en)g(if)i(ne)n(wlines)e(are)i
+(encoded)479 4644 y(as)g(\\r\\n)f(or)g(\\r)-5 b(.)21
+b(Normally)e(you)g(will)i(ne)n(v)o(er)e(see)i(tw)o(o)f(adjacent)f(data)
+i(nodes)e(because)h(the)g(parser)f(collapses)h(all)h(data)479
+4752 y(material)f(at)h(one)e(location)h(into)g(one)f(node.)g(\(Ho)n(we)
+n(v)o(er)m(,)f(if)i(you)g(create)g(your)f(o)n(wn)g(tree)h(or)g
+(transform)f(the)h(parsed)479 4860 y(tree,)g(it)h(is)g(possible)f(to)h
+(ha)n(v)o(e)e(adjacent)h(data)g(nodes.\))p Black 3797
+5278 a Fr(27)p Black eop
+%%Page: 28 28
+28 27 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 479 579 a Fv(Note)g(that)h(elements)f(that)g(do)g
+Fr(not)h Fv(allo)n(w)f(#PCD)m(A)-9 b(T)h(A)20 b(as)h(content)e(will)i
+(not)f(ha)n(v)o(e)g(data)g(nodes)f(as)i(children.)e(This)479
+687 y(means)h(that)g(spaces)h(and)f(ne)n(wlines,)f(the)h(only)g
+(character)f(material)g(allo)n(wed)h(for)g(such)f(elements,)h(are)g
+(silently)479 795 y(dropped.)396 986 y(F)o(or)g(e)o(xample,)e(if)i(the)
+f(task)h(is)h(to)f(print)f(all)h(contents)f(of)g(elements)h(with)f
+(type)h("v)n(aluable")e(whose)h(attrib)n(ute)g("priority")396
+1094 y(is)i("1",)f(this)h(function)d(can)i(help:)396
+1274 y Fq(let)45 b(rec)f(print_valuable_prio1)d(n)k(=)486
+1371 y(let)f(ntype)g(=)h(n)g(#)f(node_type)g(in)486 1468
+y(match)g(ntype)g(with)576 1565 y(T_element)f("valuable")g(when)h(n)h
+(#)g(attribute)e("priority")g(=)i(Value)f("1")g(->)665
+1662 y(print_endline)f("Valuable)g(node)h(with)h(priotity)e(1)i
+(found:";)665 1759 y(print_endline)e(\(n)h(#)h(data\))486
+1857 y(|)g(\(T_element)e(_)h(|)h(T_data\))f(->)665 1954
+y(let)h(children)e(=)i(n)f(#)h(sub_nodes)e(in)665 2051
+y(List.iter)h(print_valuable_prio1)d(children)486 2148
+y(|)k(_)f(->)665 2245 y(assert)g(false)396 2436 y Fv(Y)-9
+b(ou)20 b(can)g(call)g(this)h(function)e(by:)396 2616
+y Fq(print_valuable_prio1)42 b(root)396 2807 y Fv(If)20
+b(you)g(lik)o(e)g(a)h(DSSSL-lik)o(e)f(style,)g(you)g(can)g(mak)o(e)f
+(the)h(function)f Fq(process_children)f Fv(e)o(xplicit:)396
+2987 y Fq(let)45 b(rec)f(print_valuable_prio1)d(n)k(=)486
+3182 y(let)f(process_children)e(n)j(=)576 3279 y(let)f(children)f(=)i
+(n)g(#)f(sub_nodes)g(in)576 3376 y(List.iter)f(print_valuable_prio1)e
+(children)486 3473 y(in)486 3667 y(let)j(ntype)g(=)h(n)g(#)f(node_type)
+g(in)486 3764 y(match)g(ntype)g(with)576 3862 y(T_element)f("valuable")
+g(when)h(n)h(#)g(attribute)e("priority")g(=)i(Value)f("1")g(->)665
+3959 y(print_endline)f("Valuable)g(node)h(with)h(priority)e(1)i
+(found:";)665 4056 y(print_endline)e(\(n)h(#)h(data\))486
+4153 y(|)g(\(T_element)e(_)h(|)h(T_data\))f(->)665 4250
+y(process_children)e(n)486 4347 y(|)j(_)f(->)665 4444
+y(assert)g(false)396 4635 y Fv(So)21 b(f)o(ar)m(,)e(O'Caml)h(is)i(no)n
+(w)d(a)i(simple)f("style-sheet)g(language":)e(Y)-9 b(ou)20
+b(can)g(form)f(a)h(big)g("match")g(e)o(xpression)e(to)396
+4743 y(distinguish)h(between)h(all)h(signi\002cant)e(cases,)i(and)f
+(pro)o(vide)e(dif)n(ferent)g(reactions)i(on)g(dif)n(ferent)e
+(conditions.)h(But)h(this)396 4851 y(technique)f(has)h(limitations;)g
+(the)h("match")e(e)o(xpression)g(tends)h(to)g(get)g(lar)o(ger)f(and)h
+(lar)o(ger)m(,)e(and)i(it)g(is)i(dif)n(\002cult)d(to)i(store)p
+Black 3800 5278 a Fr(28)p Black eop
+%%Page: 29 29
+29 28 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fv(intermediate)f(v)n(alues)h(as)h(there)e(is)j(only)d
+(one)h(big)f(recursion.)g(Alternati)n(v)o(ely)-5 b(,)18
+b(it)j(is)g(also)f(possible)g(to)h(represent)e(the)396
+687 y(v)n(arious)g(cases)i(as)g(classes,)g(and)f(to)g(use)h(dynamic)d
+(method)h(lookup)g(to)h(\002nd)g(the)g(appropiate)e(class.)j(The)f(ne)o
+(xt)f(section)396 795 y(e)o(xplains)g(this)i(technique)e(in)h(detail.)
+-2 1213 y Fx(2.3.)39 b(Class-based)e(pr)m(ocessing)g(of)j(the)f(node)f
+(tree)396 1393 y Fv(By)21 b(def)o(ault,)e(the)h(parsed)g(node)f(tree)h
+(consists)h(of)f(objects)g(of)g(the)g(same)g(class;)h(this)g(is)g(a)g
+(good)e(design)g(as)i(long)e(as)i(you)396 1501 y(w)o(ant)g(only)e(to)h
+(access)h(selected)f(parts)g(of)g(the)h(document.)c(F)o(or)j(comple)o
+(x)f(transformations,)e(it)k(may)f(be)g(better)g(to)g(use)396
+1609 y(dif)n(ferent)f(classes)i(for)f(objects)g(describing)e(dif)n
+(ferent)h(element)g(types.)396 1758 y(F)o(or)h(e)o(xample,)f(if)h(the)g
+(DTD)h(declares)e(the)i(element)e(types)h Fq(a)p Fv(,)h
+Fq(b)p Fv(,)f(and)g Fq(c)p Fv(,)g(and)g(if)g(the)g(task)h(is)g(to)f
+(con)m(v)o(ert)e(an)j(arbitrary)396 1866 y(document)d(into)i(a)h
+(printable)e(format,)g(the)h(idea)g(is)h(to)f(de\002ne)g(for)g(e)n(v)o
+(ery)f(element)g(type)h(a)g(separate)g(class)h(that)g(has)f(a)396
+1974 y(method)f Fq(print)p Fv(.)h(The)g(classes)h(are)f
+Fq(eltype_a)p Fv(,)f Fq(eltype_b)p Fv(,)g(and)h Fq(eltype_c)p
+Fv(,)f(and)h(e)n(v)o(ery)f(class)i(implements)396 2082
+y Fq(print)f Fv(such)g(that)g(elements)g(of)g(the)g(type)g
+(corresponding)d(to)j(the)g(class)i(are)e(con)m(v)o(erted)d(to)k(the)f
+(output)f(format.)396 2232 y(The)h(parser)g(supports)f(such)h(a)g
+(design)g(directly)-5 b(.)19 b(As)i(it)g(is)g(impossible)e(to)i(deri)n
+(v)o(e)d(recursi)n(v)o(e)h(classes)i(in)g(O'Caml)3703
+2198 y Ff(3)3727 2232 y Fv(,)g(the)396 2340 y(specialized)f(element)f
+(classes)j(cannot)d(be)h(formed)f(by)g(simply)h(inheriting)f(from)g
+(the)h(b)n(uilt-in)g(classes)h(of)f(the)g(parser)396
+2447 y(and)g(adding)f(methods)g(for)g(customized)g(functionality)-5
+b(.)18 b(T)-7 b(o)20 b(get)g(around)f(this)h(limitation,)g(e)n(v)o(ery)
+f(node)g(of)h(the)396 2555 y(document)e(tree)j(is)g(represented)d(by)i
+Fr(two)h Fv(objects,)e(one)h(called)g("the)g(node")f(and)h(containing)e
+(the)i(recursi)n(v)o(e)396 2663 y(de\002nition)f(of)h(the)g(tree,)g
+(one)g(called)g("the)g(e)o(xtension".)e(Ev)o(ery)h(node)g(object)h(has)
+g(a)h(reference)e(to)h(the)g(e)o(xtension,)f(and)396
+2771 y(the)h(e)o(xtension)f(has)i(a)f(reference)f(to)h(the)g(node.)f
+(The)h(adv)n(antage)e(of)i(this)h(model)e(is)i(that)g(it)g(is)g(no)n(w)
+e(possible)h(to)396 2879 y(customize)g(the)g(e)o(xtension)f(without)g
+(af)n(fecting)g(the)h(typing)f(constraints)g(of)h(the)h(recursi)n(v)o
+(e)d(node)h(de\002nition.)396 3029 y(Ev)o(ery)g(e)o(xtension)g(must)h
+(ha)n(v)o(e)g(the)g(three)g(methods)f Fq(clone)p Fv(,)g
+Fq(node)p Fv(,)h(and)g Fq(set_node)p Fv(.)f(The)h(method)f
+Fq(clone)h Fv(creates)396 3137 y(a)h(deep)e(cop)o(y)h(of)g(the)g(e)o
+(xtension)f(object)g(and)h(returns)f(it;)i Fq(node)f
+Fv(returns)g(the)g(node)f(object)h(for)f(this)i(e)o(xtension)e(object;)
+396 3244 y(and)h Fq(set_node)f Fv(is)i(used)f(to)h(tell)g(the)f(e)o
+(xtension)f(object)g(which)h(node)f(is)i(associated)f(with)g(it,)h
+(this)g(method)e(is)396 3352 y(automatically)g(called)h(when)g(the)g
+(node)f(tree)h(is)h(initialized.)f(The)g(follo)n(wing)e(de\002nition)h
+(is)i(a)g(good)e(starting)h(point)396 3460 y(for)g(these)g(methods;)f
+(usually)h Fq(clone)g Fv(must)g(be)g(further)f(re\002ned)g(when)h
+(instance)g(v)n(ariables)f(are)h(added)f(to)h(the)h(class:)396
+3640 y Fq(class)44 b(custom_extension)e(=)486 3738 y(object)i(\(self\))
+576 3932 y(val)g(mutable)g(node)g(=)g(\(None)g(:)h(custom_extension)d
+(node)i(option\))576 4126 y(method)f(clone)h(=)h({<)g(>})576
+4223 y(method)e(node)i(=)665 4320 y(match)f(node)g(with)845
+4418 y(None)g(->)934 4515 y(assert)g(false)755 4612 y(|)h(Some)f(n)g
+(->)h(n)576 4709 y(method)e(set_node)h(n)h(=)665 4806
+y(node)f(<-)h(Some)f(n)p Black 3800 5278 a Fr(29)p Black
+eop
+%%Page: 30 30
+30 29 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 486 676 a Fq(end)396 867 y Fv(This)h(part)e(of)h(the)h(e)o
+(xtension)d(is)j(usually)f(the)g(same)h(for)e(all)i(classes,)g(so)g(it)
+g(is)g(a)f(good)f(idea)h(to)g(consider)396 975 y Fq(custom_extension)e
+Fv(as)j(the)f(super)n(-class)g(of)g(the)h(further)d(class)j
+(de\002nitions.)e(Continuining)f(the)j(e)o(xample)d(of)396
+1083 y(abo)o(v)o(e,)h(we)h(can)g(de\002ne)g(the)g(element)g(type)f
+(classes)j(as)e(follo)n(ws:)396 1263 y Fq(class)44 b(virtual)g
+(custom_extension)e(=)486 1360 y(object)i(\(self\))576
+1457 y(...)g(clone,)g(node,)g(set_node)f(defined)h(as)g(above)g(...)576
+1652 y(method)f(virtual)h(print)g(:)h(out_channel)e(->)h(unit)486
+1749 y(end)396 1943 y(class)g(eltype_a)g(=)486 2040 y(object)g
+(\(self\))576 2137 y(inherit)f(custom_extension)576 2234
+y(method)g(print)h(ch)h(=)g(...)486 2332 y(end)396 2526
+y(class)f(eltype_b)g(=)486 2623 y(object)g(\(self\))576
+2720 y(inherit)f(custom_extension)576 2817 y(method)g(print)h(ch)h(=)g
+(...)486 2914 y(end)396 3109 y(class)f(eltype_c)g(=)486
+3206 y(object)g(\(self\))576 3303 y(inherit)f(custom_extension)576
+3400 y(method)g(print)h(ch)h(=)g(...)486 3497 y(end)396
+3688 y Fv(The)20 b(method)f Fq(print)h Fv(can)g(no)n(w)f(be)i
+(implemented)d(for)h(e)n(v)o(ery)g(element)h(type)g(separately)-5
+b(.)18 b(Note)i(that)h(you)e(get)h(the)396 3796 y(associated)g(node)f
+(by)h(in)m(v)n(oking)396 3976 y Fq(self)44 b(#)h(node)396
+4167 y Fv(and)20 b(you)f(get)h(the)h(e)o(xtension)d(object)i(of)g(a)h
+(node)e Fq(n)h Fv(by)g(writing)396 4347 y Fq(n)45 b(#)g(extension)396
+4538 y Fv(It)21 b(is)g(guaranteed)d(that)396 4718 y Fq(self)44
+b(#)h(node)f(#)h(extension)e(==)i(self)p Black 3800 5278
+a Fr(30)p Black eop
+%%Page: 31 31
+31 30 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fv(al)o(w)o(ays)h(holds.)396 728 y(Here)f(are)g(sample)
+g(de\002nitions)g(of)g(the)g Fq(print)g Fv(methods:)396
+909 y Fq(class)44 b(eltype_a)g(=)486 1006 y(object)g(\(self\))576
+1103 y(inherit)f(custom_extension)576 1200 y(method)g(print)h(ch)h(=)
+665 1297 y(\(*)g(Nodes)f(... )f(are)h(only)g(containers:)f(*\))
+665 1394 y(output_string)g(ch)h("\(";)665 1491 y(List.iter)755
+1588 y(\(fun)g(n)h(->)f(n)h(#)g(extension)e(#)i(print)f(ch\))755
+1686 y(\(self)g(#)h(node)f(#)g(sub_nodes\);)665 1783
+y(output_string)f(ch)h("\)";)486 1880 y(end)396 2074
+y(class)g(eltype_b)g(=)486 2171 y(object)g(\(self\))576
+2268 y(inherit)f(custom_extension)576 2366 y(method)g(print)h(ch)h(=)
+665 2463 y(\(*)g(Print)f(the)g(value)g(of)h(the)f(CDATA)g(attribute)f
+("print":)h(*\))665 2560 y(match)g(self)g(#)h(node)f(#)h(attribute)e
+("print")h(with)755 2657 y(Value)g(s)314 b(->)44 b(output_string)f(ch)h
+(s)665 2754 y(|)h(Implied_value)e(->)h(output_string)f(ch)h
+("")665 2851 y(|)h(Valuelist)e(l)135 b(->)44
+b(assert)g(false)1517 2948 y(\(*)h(not)f(possible)f(because)h(the)g
+(att)h(is)f(CDATA)g(*\))486 3045 y(end)396 3240 y(class)g(eltype_c)g(=)
+486 3337 y(object)g(\(self\))576 3434 y(inherit)f(custom_extension)576
+3531 y(method)g(print)h(ch)h(=)665 3628 y(\(*)g(Print)f(the)g(contents)
+g(of)g(this)g(element:)g(*\))665 3725 y(output_string)f(ch)h(\(self)g
+(#)h(node)f(#)h(data\))486 3823 y(end)396 4017 y(class)f
+(null_extension)f(=)486 4114 y(object)h(\(self\))576
+4211 y(inherit)f(custom_extension)576 4308 y(method)g(print)h(ch)h(=)g
+(assert)e(false)486 4405 y(end)396 4638 y Fv(The)20 b(remaining)f(task)
+h(is)h(to)g(con\002gure)d(the)i(parser)g(such)g(that)g(these)g(e)o
+(xtension)f(classes)i(are)f(actually)g(used.)g(Here)396
+4746 y(another)f(problem)f(arises:)j(It)g(is)g(not)f(possible)g(to)g
+(dynamically)e(select)j(the)f(class)h(of)f(an)g(object)g(to)g(be)h
+(created.)e(As)396 4854 y(w)o(orkaround,)e(PXP)k(allo)n(ws)g(the)f
+(user)g(to)g(specify)g Fr(e)n(xemplar)g(objects)g Fv(for)f(the)h(v)n
+(arious)g(element)f(types;)h(instead)g(of)p Black 3800
+5278 a Fr(31)p Black eop
+%%Page: 32 32
+32 31 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fv(creating)f(the)i(nodes)e(of)h(the)g(tree)g(by)g
+(applying)f(the)h Fq(new)g Fv(operator)e(the)j(nodes)e(are)h(produced)e
+(by)i(duplicating)e(the)396 687 y(e)o(x)o(emplars.)h(As)h(object)g
+(duplication)f(preserv)o(es)g(the)h(class)h(of)f(the)g(object,)f(one)h
+(can)g(create)g(fresh)g(objects)g(of)g(e)n(v)o(ery)396
+795 y(class)h(for)f(which)g(pre)n(viously)e(an)i(e)o(x)o(emplar)e(has)j
+(been)e(re)o(gistered.)396 944 y(Ex)o(emplars)g(are)h(meant)g(as)h
+(objects)f(without)f(contents,)g(the)h(only)g(interesting)f(thing)g(is)
+j(that)e(e)o(x)o(emplars)e(are)396 1052 y(instances)i(of)g(a)h(certain)
+f(class.)g(The)g(creation)f(of)h(an)h(e)o(x)o(emplar)d(for)h(an)h
+(element)g(node)f(can)h(be)g(done)f(by:)396 1232 y Fq(let)45
+b(element_exemplar)d(=)i(new)h(element_impl)e(extension_exemplar)396
+1423 y Fv(And)20 b(a)h(data)f(node)f(e)o(x)o(emplar)f(is)j(created)f
+(by:)396 1603 y Fq(let)45 b(data_exemplar)d(=)j(new)f(data_impl)g
+(extension_exemplar)396 1794 y Fv(The)20 b(classes)h
+Fq(element_impl)e Fv(and)h Fq(data_impl)f Fv(are)h(de\002ned)f(in)i
+(the)f(module)f Fq(Pxp_document)p Fv(.)f(The)396 1902
+y(constructors)h(initialize)h(the)g(fresh)g(objects)g(as)h(empty)e
+(objects,)h(i.e.)g(without)g(children,)e(without)i(data)g(contents,)f
+(and)396 2010 y(so)i(on.)e(The)h Fq(extension_exemplar)e
+Fv(is)j(the)f(initial)h(e)o(xtension)e(object)g(the)h(e)o(x)o(emplars)f
+(are)h(associated)g(with.)396 2160 y(Once)g(the)g(e)o(x)o(emplars)f
+(are)h(created)f(and)h(stored)g(some)n(where)f(\(e.g.)g(in)h(a)h(hash)f
+(table\),)f(you)h(can)g(tak)o(e)g(an)g(e)o(x)o(emplar)396
+2268 y(and)g(create)g(a)g(concrete)f(instance)h(\(with)g(contents\))f
+(by)h(duplicating)e(it.)j(As)g(user)f(of)g(the)g(parser)g(you)f(are)h
+(normally)396 2376 y(not)g(concerned)e(with)i(this)h(as)g(this)g(is)g
+(part)f(of)g(the)g(internal)f(logic)h(of)g(the)g(parser)m(,)f(b)n(ut)h
+(as)h(background)c(kno)n(wledge)h(it)396 2483 y(is)j(w)o(orthwhile)e
+(to)i(mention)e(that)h(the)g(tw)o(o)h(methods)e Fq(create_element)f
+Fv(and)i Fq(create_data)f Fv(actually)g(perform)396 2591
+y(the)h(duplication)f(of)g(the)i(e)o(x)o(emplar)d(for)h(which)h(the)o
+(y)f(are)h(in)m(v)n(ok)o(ed,)e(additionally)g(apply)i(modi\002cations)e
+(to)j(the)f(clone,)396 2699 y(and)g(\002nally)g(return)f(the)h(ne)n(w)g
+(object.)f(Moreo)o(v)o(er)m(,)f(the)i(e)o(xtension)e(object)i(is)h
+(copied,)e(too,)h(and)f(the)i(ne)n(w)f(node)f(object)396
+2807 y(is)i(associated)f(with)g(the)g(fresh)g(e)o(xtension)e(object.)i
+(Note)g(that)g(this)g(is)h(the)f(reason)g(why)f(e)n(v)o(ery)g(e)o
+(xtension)f(object)i(must)396 2915 y(ha)n(v)o(e)g(a)g
+Fq(clone)g Fv(method.)396 3065 y(The)g(con\002guration)e(of)i(the)g
+(set)h(of)f(e)o(x)o(emplars)e(is)j(passed)f(to)h(the)f
+Fq(parse_document_entity)d Fv(function)i(as)i(third)396
+3173 y(ar)o(gument.)d(In)i(our)f(e)o(xample,)g(this)h(ar)o(gument)e
+(can)i(be)g(set)h(up)f(as)h(follo)n(ws:)396 3353 y Fq(let)45
+b(spec)f(=)486 3450 y(make_spec_from_alist)576 3547 y(~data_exemplar:)
+535 b(\(new)44 b(data_impl)g(\(new)g(null_extension\)\))576
+3644 y(~default_element_exemplar:)c(\(new)k(element_impl)f(\(new)h
+(null_extension\)\))576 3741 y(~element_alist:)710 3838
+y([)h("a",)89 b(new)44 b(element_impl)f(\(new)h(eltype_a\);)800
+3935 y("b",)89 b(new)44 b(element_impl)f(\(new)h(eltype_b\);)800
+4033 y("c",)89 b(new)44 b(element_impl)f(\(new)h(eltype_c\);)710
+4130 y(])576 4227 y(\(\))396 4418 y Fv(The)20 b Fq(~element_alist)f
+Fv(function)f(ar)o(gument)g(de\002nes)i(the)g(mapping)e(from)h(element)
+h(types)g(to)g(e)o(x)o(emplars)f(as)396 4526 y(associati)n(v)o(e)h
+(list.)h(The)f(ar)o(gument)e Fq(~data_exemplar)g Fv(speci\002es)j(the)f
+(e)o(x)o(emplar)e(for)i(data)g(nodes,)f(and)h(the)396
+4634 y Fq(~default_element_exemplar)d Fv(is)k(used)f(whene)n(v)o(er)e
+(the)i(parser)g(\002nds)g(an)g(element)g(type)f(for)h(which)g(the)396
+4742 y(associati)n(v)o(e)g(list)h(does)f(not)g(de\002ne)g(an)g(e)o(x)o
+(emplar)-5 b(.)p Black 3800 5278 a Fr(32)p Black eop
+%%Page: 33 33
+33 32 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fv(The)g(con\002guration)e(is)j(no)n(w)e(complete.)g(Y)
+-9 b(ou)20 b(can)g(still)h(use)g(the)f(same)g(parsing)f(functions,)g
+(only)g(the)h(initialization)g(is)396 687 y(a)h(bit)f(dif)n(ferent.)f
+(F)o(or)g(e)o(xample,)g(call)i(the)f(parser)f(by:)396
+867 y Fq(let)45 b(d)f(=)h(parse_document_entity)c(default_config)i
+(\(from_file)g("doc.xml"\))g(spec)396 1058 y Fv(Note)20
+b(that)h(the)f(resulting)f(document)f Fq(d)j Fv(has)f(a)h(usable)f
+(type;)g(especially)f(the)i Fq(print)f Fv(method)e(we)j(added)e(is)i
+(visible.)396 1166 y(So)g(you)e(can)h(print)g(your)e(document)h(by)396
+1346 y Fq(d)45 b(#)g(root)f(#)g(extension)g(#)g(print)g(stdout)396
+1578 y Fv(This)21 b(object-oriented)c(approach)h(looks)i(rather)f
+(complicated;)g(this)h(is)i(mostly)d(caused)h(by)g(w)o(orking)e(around)
+h(some)396 1686 y(problems)g(of)h(the)g(strict)h(typing)e(system)h(of)g
+(O'Caml.)g(Some)g(auxiliary)f(concepts)g(such)h(as)h(e)o(xtensions)e
+(were)396 1794 y(needed,)g(b)n(ut)h(the)g(practical)g(consequences)e
+(are)i(lo)n(w)-5 b(.)20 b(In)g(the)g(ne)o(xt)f(section,)h(one)g(of)g
+(the)g(e)o(xamples)f(of)h(the)396 1902 y(distrib)n(ution)f(is)i(e)o
+(xplained,)d(a)j(con)m(v)o(erter)d(from)h Fr(r)m(eadme)h
+Fv(documents)e(to)i(HTML.)-2 2321 y Fx(2.4.)39 b(Example:)f(An)h(HTML)f
+(bac)m(kend)g(f)m(or)h(the)g Fd(readme)44 b Fx(DTD)396
+2501 y Fv(The)20 b(con)m(v)o(erter)e(from)h Fr(r)m(eadme)h
+Fv(documents)e(to)i(HTML)g(documents)f(follo)n(ws)h(strictly)g(the)g
+(approach)e(to)j(de\002ne)e(one)396 2609 y(class)i(per)f(element)g
+(type.)f(The)h(HTML)g(code)g(is)h(similar)f(to)g(the)h
+Fr(r)m(eadme)e Fv(source,)g(because)h(of)g(this)h(most)f(elements)396
+2716 y(can)g(be)g(con)m(v)o(erted)e(in)i(the)g(follo)n(wing)f(w)o(ay:)h
+(Gi)n(v)o(en)g(the)g(input)f(element)396 2897 y Fq(content )396
+3088 y Fv(the)h(con)m(v)o(ersion)e(te)o(xt)i(is)h(the)f(concatenation)e
+(of)i(a)h(computed)d(pre\002x,)h(the)h(recursi)n(v)o(ely)f(con)m(v)o
+(erted)e(content,)i(and)h(a)396 3195 y(computed)e(suf)n(\002x.)396
+3345 y(Only)i(one)g(element)f(type)h(cannot)f(be)h(handled)f(by)h(this)
+g(scheme:)g Fq(footnote)p Fv(.)f(F)o(ootnotes)g(are)h(collected)g
+(while)g(the)o(y)396 3453 y(are)g(found)f(in)h(the)g(input)g(te)o(xt,)g
+(and)f(the)o(y)h(are)g(printed)f(after)h(the)g(main)g(te)o(xt)g(has)g
+(been)g(con)m(v)o(erted)d(and)j(printed.)-2 3781 y Fp(2.4.1.)35
+b(Header)396 4021 y Fq(open)44 b(Pxp_types)396 4118 y(open)g
+(Pxp_document)-2 4571 y Fp(2.4.2.)35 b(T)-7 b(ype)34
+b(dec)n(larations)396 4811 y Fq(class)44 b(type)g(footnote_printer)f(=)
+p Black 3800 5278 a Fr(33)p Black eop
+%%Page: 34 34
+34 33 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 486 579 a Fq(object)576 676 y(method)43 b(footnote_to_html)g(:)h
+(store_type)f(-)p Fo(>)i Fq(out_channel)e(-)p Fo(>)h
+Fq(unit)486 773 y(end)396 967 y(and)h(store_type)e(=)486
+1065 y(object)576 1162 y(method)g(alloc_footnote)g(:)i
+(footnote_printer)d(-)p Fo(>)i Fq(int)576 1259 y(method)f
+(print_footnotes)g(:)h(out_channel)f(-)p Fo(>)i Fq(unit)486
+1356 y(end)396 1453 y(;;)-2 1906 y Fp(2.4.3.)35 b(Class)g
+Fc(store)396 2073 y Fv(The)20 b Fq(store)g Fv(is)h(a)g(container)d(for)
+i(footnotes.)f(Y)-9 b(ou)19 b(can)h(add)g(a)g(footnote)f(by)h(in)m(v)n
+(oking)e Fq(alloc_footnote)p Fv(;)g(the)396 2181 y(ar)o(gument)g(is)j
+(an)f(object)g(of)g(the)g(class)h Fq(footnote_printer)p
+Fv(,)d(the)i(method)f(returns)g(the)i(number)d(of)i(the)g(footnote.)396
+2289 y(The)g(interesting)f(property)f(of)i(a)h(footnote)d(is)k(that)e
+(it)h(can)f(be)g(con)m(v)o(erted)d(to)k(HTML,)e(so)i(a)g
+Fq(footnote_printer)d Fv(is)396 2397 y(an)i(object)g(with)g(a)h(method)
+e Fq(footnote_to_html)p Fv(.)f(The)i(class)h Fq(footnote)e
+Fv(which)h(is)h(de\002ned)e(belo)n(w)h(has)g(a)396 2505
+y(compatible)f(method)g Fq(footnote_to_html)f Fv(such)i(that)g(objects)
+g(created)f(from)h(it)h(can)f(be)g(used)g(as)396 2613
+y Fq(footnote_printer)p Fv(s.)396 2763 y(The)g(other)f(method,)g
+Fq(print_footnotes)f Fv(prints)i(the)g(footnotes)f(as)i(de\002nition)e
+(list,)i(and)f(is)h(typically)e(in)m(v)n(ok)o(ed)396
+2870 y(after)h(the)g(main)g(material)g(of)g(the)g(page)g(has)g(already)
+f(been)h(printed.)e(Ev)o(ery)h(item)h(of)g(the)h(list)g(is)g(printed)e
+(by)396 2978 y Fq(footnote_to_html)p Fv(.)396 3200 y
+Fq(class)44 b(store)g(=)486 3297 y(object)g(\(self\))576
+3491 y(val)g(mutable)g(footnotes)f(=)i(\()f([])h(:)f(\(int)h(*)f
+(footnote_printer\))e(list)i(\))576 3589 y(val)g(mutable)g
+(next_footnote_number)d(=)k(1)576 3783 y(method)e(alloc_footnote)g(n)i
+(=)665 3880 y(let)g(number)e(=)i(next_footnote_number)d(in)665
+3977 y(next_footnote_number)g Fo(<)p Fq(-)i(number+1;)665
+4074 y(footnotes)g Fo(<)p Fq(-)g(footnotes)f(@)i([)g(number,)e(n)i(];)
+665 4171 y(number)576 4366 y(method)e(print_footnotes)g(ch)h(=)665
+4463 y(if)h(footnotes)e Fo(<>)h Fq([])h(then)f(begin)396
+4560 y(output_string)f(ch)h(")p Fo(<)p Fq(hr)g(align=left)g
+(noshade=noshade)e(width=\\"30\045\\")p Fo(>)p Fq(\\n";)396
+4657 y(output_string)h(ch)h(")p Fo(<)p Fq(dl)p Fo(>)p
+Fq(\\n";)396 4754 y(List.iter)486 4851 y(\(fun)g(\(_,n\))g(-)p
+Fo(>)p Black 3800 5278 a Fr(34)p Black eop
+%%Page: 35 35
+35 34 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 620 579 a Fq(n)45 b(#)g(footnote_to_html)d(\(self)i(:)h
+(#store_type)e(:)p Fo(>)h Fq(store_type\))f(ch\))486
+676 y(footnotes;)396 773 y(output_string)g(ch)h(")p Fo(<)p
+Fq(/dl)p Fo(>)p Fq(\\n";)665 870 y(end)486 1065 y(end)396
+1162 y(;;)-2 1614 y Fp(2.4.4.)35 b(Function)f Fc(escape_html)396
+1782 y Fv(This)21 b(function)d(con)m(v)o(erts)h(the)h(characters)f
+Fm(<)p Fv(,)h Fm(>)p Fv(,)g(&,)g(and)g(")h(to)f(their)g(HTML)g
+(representation.)e(F)o(or)h(e)o(xample,)396 1890 y Fq(escape_html)43
+b(")p Fo(<>)p Fq(")h(=)h("<>")p Fv(.)19 b(Other)g(characters)h
+(are)g(left)g(unchanged.)396 2070 y Fq(let)45 b(escape_html)e(s)h(=)486
+2167 y(Str.global_substitute)576 2264 y(\(Str.regexp)f(")p
+Fo(<)p Fq(\\\\|)p Fo(>)p Fq(\\\\|&\\\\|\\""\))576 2362
+y(\(fun)h(s)g(-)p Fo(>)665 2459 y Fq(match)g(Str.matched_string)e(s)j
+(with)755 2556 y(")p Fo(<)p Fq(")f(-)p Fo(>)h Fq("<")665
+2653 y(|)g(")p Fo(>)p Fq(")f(-)p Fo(>)h Fq(">")665
+2750 y(|)g("&")f(-)p Fo(>)h Fq("&")665 2847 y(|)g("\\"")f(-)p
+Fo(>)g Fq(""")665 2944 y(|)h(_)g(-)p Fo(>)f Fq(assert)g(false\))
+576 3042 y(s)396 3139 y(;;)-2 3591 y Fp(2.4.5.)35 b(Vir)r(tual)f(c)n
+(lass)h Fc(shared)396 3759 y Fv(This)21 b(virtual)e(class)i(is)g(the)g
+(abstract)f(superclass)g(of)f(the)i(e)o(xtension)d(classes)k(sho)n(wn)d
+(belo)n(w)-5 b(.)19 b(It)i(de\002nes)f(the)g(standard)396
+3867 y(methods)f Fq(clone)p Fv(,)h Fq(node)p Fv(,)g(and)g
+Fq(set_node)p Fv(,)f(and)g(declares)h(the)g(type)g(of)g(the)g(virtual)g
+(method)e Fq(to_html)p Fv(.)i(This)396 3975 y(method)f(recursi)n(v)o
+(ely)f(tra)n(v)o(erses)i(the)g(whole)g(element)g(tree,)g(and)f(prints)h
+(the)g(con)m(v)o(erted)e(HTML)i(code)f(to)i(the)f(output)396
+4083 y(channel)f(passed)h(as)h(second)f(ar)o(gument.)d(The)j(\002rst)h
+(ar)o(gument)d(is)j(the)f(reference)f(to)h(the)g(global)f
+Fq(store)h Fv(object)g(which)396 4191 y(collects)h(the)f(footnotes.)396
+4371 y Fq(class)44 b(virtual)g(shared)g(=)486 4468 y(object)g(\(self\))
+576 4662 y(\(*)g(--)h(default_ext)e(--)h(*\))576 4857
+y(val)g(mutable)g(node)g(=)g(\(None)g(:)h(shared)f(node)g(option\))p
+Black 3800 5278 a Fr(35)p Black eop
+%%Page: 36 36
+36 35 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 576 676 a Fq(method)43 b(clone)h(=)h({)p Fo(<)f(>)p
+Fq(})576 773 y(method)f(node)i(=)665 870 y(match)f(node)g(with)845
+967 y(None)g(-)p Fo(>)934 1065 y Fq(assert)g(false)755
+1162 y(|)h(Some)f(n)g(-)p Fo(>)h Fq(n)576 1259 y(method)e(set_node)h(n)
+h(=)665 1356 y(node)f Fo(<)p Fq(-)h(Some)f(n)576 1550
+y(\(*)g(--)h(virtual)e(--)i(*\))576 1745 y(method)e(virtual)h(to_html)g
+(:)g(store)g(-)p Fo(>)h Fq(out_channel)e(-)p Fo(>)h Fq(unit)486
+1939 y(end)396 2036 y(;;)-2 2489 y Fp(2.4.6.)35 b(Class)g
+Fc(only_data)396 2656 y Fv(This)21 b(class)g(de\002nes)f
+Fq(to_html)f Fv(such)h(that)h(the)f(character)f(data)h(of)g(the)g
+(current)f(node)g(is)i(con)m(v)o(erted)d(to)i(HTML.)g(Note)396
+2764 y(that)h Fq(self)f Fv(is)h(an)f(e)o(xtension)f(object,)g
+Fq(self)44 b(#)h(node)20 b Fv(is)h(the)f(node)f(object,)h(and)f
+Fq(self)45 b(#)f(node)g(#)h(data)20 b Fv(returns)396
+2872 y(the)g(character)f(data)h(of)g(the)h(node.)396
+3052 y Fq(class)44 b(only_data)g(=)486 3149 y(object)g(\(self\))576
+3247 y(inherit)f(shared)576 3441 y(method)g(to_html)h(store)g(ch)h(=)
+665 3538 y(output_string)e(ch)h(\(escape_html)f(\(self)h(#)h(node)f(#)h
+(data\)\))486 3635 y(end)396 3732 y(;;)-2 4185 y Fp(2.4.7.)35
+b(Class)g Fc(readme)396 4353 y Fv(This)21 b(class)g(con)m(v)o(erts)d
+(elements)i(of)g(type)g Fq(readme)g Fv(to)g(HTML.)g(Such)f(an)h
+(element)g(is)h(\(by)f(de\002nition\))e(al)o(w)o(ays)j(the)396
+4461 y(root)f(element)f(of)h(the)g(document.)e(First,)j(the)f(HTML)g
+(header)f(is)j(printed;)d(the)h Fq(title)g Fv(attrib)n(ute)f(of)h(the)h
+(element)396 4568 y(determines)e(the)i(title)f(of)g(the)h(HTML)f(page.)
+f(Some)h(aspects)g(of)g(the)g(HTML)g(page)g(can)g(be)g(con\002gured)e
+(by)h(setting)396 4676 y(certain)h(parameter)f(entities,)h(for)g(e)o
+(xample)e(the)i(background)d(color)m(,)i(the)h(te)o(xt)g(color)m(,)f
+(and)h(link)g(colors.)f(After)h(the)396 4784 y(header)m(,)f(the)h
+Fq(body)g Fv(tag,)g(and)g(the)g(headline)f(ha)n(v)o(e)g(been)h
+(printed,)f(the)h(contents)f(of)h(the)g(page)g(are)g(con)m(v)o(erted)e
+(by)p Black 3798 5278 a Fr(36)p Black eop
+%%Page: 37 37
+37 36 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fv(in)m(v)n(oking)e Fq(to_html)i Fv(on)g(all)g
+(children)f(of)h(the)g(current)f(node)g(\(which)h(is)h(the)f(root)f
+(node\).)g(Then,)g(the)h(footnotes)f(are)396 687 y(appended)f(to)j
+(this)f(by)g(telling)g(the)g(global)f Fq(store)h Fv(object)g(to)g
+(print)g(the)g(footnotes.)f(Finally)-5 b(,)19 b(the)h(end)g(tags)g(of)g
+(the)396 795 y(HTML)g(pages)g(are)g(printed.)396 944
+y(This)h(class)g(is)g(an)f(e)o(xample)f(ho)n(w)g(to)i(access)g(the)f(v)
+n(alue)f(of)h(an)g(attrib)n(ute:)g(The)g(v)n(alue)g(is)h(determined)d
+(by)i(in)m(v)n(oking)396 1052 y Fq(self)44 b(#)h(node)f(#)h(attribute)e
+("title")p Fv(.)20 b(As)h(this)f(attrib)n(ute)g(has)g(been)g(declared)f
+(as)i(CD)m(A)-9 b(T)h(A)20 b(and)g(as)h(being)396 1160
+y(required,)d(the)j(v)n(alue)e(has)i(al)o(w)o(ays)f(the)g(form)g
+Fq(Value)44 b(s)20 b Fv(where)g Fq(s)g Fv(is)h(the)g(string)e(v)n(alue)
+h(of)g(the)g(attrib)n(ute.)396 1310 y(Y)-9 b(ou)20 b(can)g(also)g(see)h
+(ho)n(w)f(entity)g(contents)f(can)h(be)g(accessed.)g(A)h(parameter)e
+(entity)g(object)h(can)g(be)g(look)o(ed)f(up)h(by)396
+1417 y Fq(self)44 b(#)h(node)f(#)h(dtd)f(#)h(par_entity)e("name")p
+Fv(,)20 b(and)f(by)h(in)m(v)n(oking)e Fq(replacement_text)g
+Fv(the)i(v)n(alue)g(of)396 1525 y(the)g(entity)g(is)h(returned)e(after)
+h(inner)f(parameter)g(and)g(character)g(entities)i(ha)n(v)o(e)f(been)f
+(processed.)g(Note)h(that)g(you)396 1633 y(must)g(use)h
+Fq(gen_entity)e Fv(instead)h(of)g Fq(par_entity)f Fv(to)h(access)h
+(general)e(entities.)396 1855 y Fq(class)44 b(readme)g(=)486
+1952 y(object)g(\(self\))576 2049 y(inherit)f(shared)576
+2244 y(method)g(to_html)h(store)g(ch)h(=)665 2341 y(\(*)g(output)f
+(header)f(*\))665 2438 y(output_string)396 2535 y(ch)i(")p
+Fo(<)p Fq(!DOCTYPE)e(HTML)h(PUBLIC)g(\\"-//W3C//DTD)e(HTML)j(3.2)f
+(Final//EN\\")p Fo(>)p Fq(";)665 2632 y(output_string)396
+2729 y(ch)h(")p Fo(<)p Fq(!-)f(WARNING!)f(This)h(is)h(a)g(generated)e
+(file,)h(do)g(not)h(edit!)f(-)p Fo(>)p Fq(\\n";)665 2826
+y(let)h(title)f(=)396 2923 y(match)g(self)g(#)h(node)f(#)h(attribute)e
+("title")h(with)576 3021 y(Value)g(s)g(-)p Fo(>)h Fq(s)486
+3118 y(|)g(_)f(-)p Fo(>)h Fq(assert)e(false)665 3215
+y(in)665 3312 y(let)i(html_header,)d(_)j(=)396 3409 y(try)g(\(self)f(#)
+g(node)g(#)h(dtd)f(#)h(par_entity)e("readme:html:header"\))934
+3506 y(#)i(replacement_text)396 3603 y(with)f(WF_error)g(_)h(-)p
+Fo(>)f Fq("",)g(false)g(in)665 3701 y(let)h(html_trailer,)d(_)j(=)396
+3798 y(try)g(\(self)f(#)g(node)g(#)h(dtd)f(#)h(par_entity)e
+("readme:html:trailer"\))934 3895 y(#)i(replacement_text)396
+3992 y(with)f(WF_error)g(_)h(-)p Fo(>)f Fq("",)g(false)g(in)665
+4089 y(let)h(html_bgcolor,)d(_)j(=)396 4186 y(try)g(\(self)f(#)g(node)g
+(#)h(dtd)f(#)h(par_entity)e("readme:html:bgcolor"\))934
+4283 y(#)i(replacement_text)396 4380 y(with)f(WF_error)g(_)h(-)p
+Fo(>)f Fq("white",)f(false)h(in)665 4478 y(let)h(html_textcolor,)d(_)j
+(=)396 4575 y(try)g(\(self)f(#)g(node)g(#)h(dtd)f(#)h(par_entity)e
+("readme:html:textcolor"\))934 4672 y(#)i(replacement_text)396
+4769 y(with)f(WF_error)g(_)h(-)p Fo(>)f Fq("",)g(false)g(in)665
+4866 y(let)h(html_alinkcolor,)d(_)i(=)p Black 3797 5278
+a Fr(37)p Black eop
+%%Page: 38 38
+38 37 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fq(try)45 b(\(self)f(#)g(node)g(#)h(dtd)f(#)h
+(par_entity)e("readme:html:alinkcolor"\))934 676 y(#)i
+(replacement_text)396 773 y(with)f(WF_error)g(_)h(-)p
+Fo(>)f Fq("",)g(false)g(in)665 870 y(let)h(html_vlinkcolor,)d(_)i(=)396
+967 y(try)h(\(self)f(#)g(node)g(#)h(dtd)f(#)h(par_entity)e
+("readme:html:vlinkcolor"\))934 1065 y(#)i(replacement_text)396
+1162 y(with)f(WF_error)g(_)h(-)p Fo(>)f Fq("",)g(false)g(in)665
+1259 y(let)h(html_linkcolor,)d(_)j(=)396 1356 y(try)g(\(self)f(#)g
+(node)g(#)h(dtd)f(#)h(par_entity)e("readme:html:linkcolor"\))934
+1453 y(#)i(replacement_text)396 1550 y(with)f(WF_error)g(_)h(-)p
+Fo(>)f Fq("",)g(false)g(in)665 1647 y(let)h(html_background,)d(_)i(=)
+396 1745 y(try)h(\(self)f(#)g(node)g(#)h(dtd)f(#)h(par_entity)e
+("readme:html:background"\))934 1842 y(#)i(replacement_text)396
+1939 y(with)f(WF_error)g(_)h(-)p Fo(>)f Fq("",)g(false)g(in)665
+2133 y(output_string)f(ch)h(")p Fo(<)p Fq(html)p Fo(><)p
+Fq(header)p Fo(><)p Fq(title)p Fo(>)p Fq(\\n";)665 2230
+y(output_string)f(ch)h(\(escape_html)f(title\);)665 2327
+y(output_string)g(ch)h(")p Fo(<)p Fq(/title)p Fo(><)p
+Fq(/header)p Fo(>)p Fq(\\n";)665 2424 y(output_string)f(ch)h(")p
+Fo(<)p Fq(body)g(";)665 2522 y(List.iter)396 2619 y(\(fun)g
+(\(name,value\))f(-)p Fo(>)531 2716 y Fq(if)h(value)g
+Fo(<>)h Fq("")f(then)620 2813 y(output_string)f(ch)i(\(name)f(^)g
+("=\\"")g(^)h(escape_html)e(value)h(^)h("\\")f("\)\))396
+2910 y([)h("bgcolor",)178 b(html_bgcolor;)486 3007 y("text",)313
+b(html_textcolor;)486 3104 y("link",)g(html_linkcolor;)486
+3202 y("alink",)268 b(html_alinkcolor;)486 3299 y("vlink",)g
+(html_vlinkcolor;)396 3396 y(];)665 3493 y(output_string)43
+b(ch)h(")p Fo(>)p Fq(\\n";)665 3590 y(output_string)f(ch)h
+(html_header;)665 3687 y(output_string)f(ch)h(")p Fo(<)p
+Fq(h1)p Fo(>)p Fq(";)665 3784 y(output_string)f(ch)h(\(escape_html)f
+(title\);)665 3882 y(output_string)g(ch)h(")p Fo(<)p
+Fq(/h1)p Fo(>)p Fq(\\n";)665 3979 y(\(*)h(process)e(main)i(content:)e
+(*\))665 4076 y(List.iter)396 4173 y(\(fun)h(n)h(-)p
+Fo(>)f Fq(n)h(#)g(extension)e(#)i(to_html)e(store)h(ch\))396
+4270 y(\(self)g(#)h(node)f(#)h(sub_nodes\);)665 4367
+y(\(*)g(now)f(process)g(footnotes)f(*\))665 4464 y(store)h(#)h
+(print_footnotes)d(ch;)665 4561 y(\(*)j(trailer)e(*\))665
+4659 y(output_string)g(ch)h(html_trailer;)665 4756 y(output_string)f
+(ch)h(")p Fo(<)p Fq(/html)p Fo(>)p Fq(\\n";)p Black 3800
+5278 a Fr(38)p Black eop
+%%Page: 39 39
+39 38 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 486 579 a Fq(end)396 676 y(;;)-2 1129 y Fp(2.4.8.)35
+b(Classes)h Fc(section)p Fp(,)31 b Fc(sect1)p Fp(,)g
+Fc(sect2)p Fp(,)g(and)j Fc(sect3)396 1296 y Fv(As)21
+b(the)f(con)m(v)o(ersion)e(process)i(is)h(v)o(ery)e(similar)m(,)h(the)g
+(con)m(v)o(ersion)d(classes)22 b(of)e(the)g(three)g(section)f(le)n(v)o
+(els)i(are)f(deri)n(v)o(ed)396 1404 y(from)f(the)i(more)e(general)g
+Fq(section)h Fv(class.)h(The)e(HTML)h(code)g(of)g(the)g(section)g(le)n
+(v)o(els)g(only)f(dif)n(fers)h(in)g(the)g(type)g(of)396
+1512 y(the)g(headline,)f(and)h(because)f(of)h(this)h(the)f(classes)i
+(describing)c(the)i(section)g(le)n(v)o(els)g(can)g(be)h(computed)d(by)i
+(replacing)396 1620 y(the)g(class)i(ar)o(gument)17 b
+Fq(the_tag)j Fv(of)g Fq(section)g Fv(by)f(the)i(HTML)e(name)h(of)g(the)
+g(headline)f(tag.)396 1770 y(Section)h(elements)g(are)g(con)m(v)o
+(erted)e(to)i(HTML)g(by)g(printing)e(a)j(headline)e(and)h(then)f(con)m
+(v)o(erting)f(the)i(contents)f(of)h(the)396 1878 y(element)g(recursi)n
+(v)o(ely)-5 b(.)18 b(More)h(precisely)-5 b(,)19 b(the)h(\002rst)h
+(sub-element)e(is)i(al)o(w)o(ays)f(a)h Fq(title)f Fv(element,)f(and)h
+(the)g(other)396 1985 y(elements)g(are)g(the)g(contents)g(of)g(the)g
+(section.)g(This)g(structure)f(is)j(declared)c(in)j(the)f(DTD,)g(and)g
+(it)h(is)g(guaranteed)d(that)396 2093 y(the)i(document)f(matches)g(the)
+i(DTD.)f(Because)g(of)g(this)h(the)f(title)h(node)e(can)h(be)g
+(separated)f(from)g(the)h(rest)h(without)f(an)o(y)396
+2201 y(checks.)396 2351 y(Both)g(the)h(title)g(node,)e(and)g(the)h
+(body)f(nodes)h(are)g(then)f(con)m(v)o(erted)f(to)i(HTML)g(by)g
+(calling)g Fq(to_html)f Fv(on)h(them.)396 2572 y Fq(class)44
+b(section)g(the_tag)g(=)486 2670 y(object)g(\(self\))576
+2767 y(inherit)f(shared)576 2961 y(val)h(tag)g(=)h(the_tag)576
+3155 y(method)e(to_html)h(store)g(ch)h(=)665 3252 y(let)g(sub_nodes)e
+(=)i(self)f(#)g(node)h(#)f(sub_nodes)g(in)665 3350 y(match)g(sub_nodes)
+g(with)486 3447 y(title_node)f(::)i(rest)f(-)p Fo(>)576
+3544 y Fq(output_string)e(ch)j(\(")p Fo(<)p Fq(")f(^)g(tag)h(^)f(")p
+Fo(>)p Fq(\\n"\);)576 3641 y(title_node)f(#)h(extension)g(#)g(to_html)g
+(store)g(ch;)576 3738 y(output_string)e(ch)j(\("\\n)p
+Fo(<)p Fq(/")e(^)i(tag)f(^)h(")p Fo(>)p Fq("\);)576 3835
+y(List.iter)665 3932 y(\(fun)f(n)h(-)p Fo(>)f Fq(n)h(#)g(extension)e(#)
+i(to_html)e(store)h(ch\))665 4029 y(rest)396 4127 y(|)h(_)g(-)p
+Fo(>)576 4224 y Fq(assert)e(false)486 4321 y(end)396
+4418 y(;;)396 4612 y(class)h(sect1)g(=)h(section)f("h1";;)396
+4709 y(class)g(sect2)g(=)h(section)f("h3";;)396 4807
+y(class)g(sect3)g(=)h(section)f("h4";;)p Black 3800 5278
+a Fr(39)p Black eop
+%%Page: 40 40
+40 39 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black -2 583 a Fp(2.4.9.)35 b(Classes)h Fc(map_tag)p
+Fp(,)31 b Fc(p)p Fp(,)i Fc(em)p Fp(,)f Fc(ul)p Fp(,)h
+Fc(li)396 751 y Fv(Se)n(v)o(eral)20 b(element)f(types)h(are)g(con)m(v)o
+(erted)e(to)i(HTML)g(by)g(simply)g(mapping)e(them)i(to)g(corresponding)
+d(HTML)396 859 y(element)j(types.)g(The)f(class)j Fq(map_tag)d
+Fv(implements)g(this,)i(and)f(the)g(class)h(ar)o(gument)d
+Fq(the_target_tag)396 967 y Fv(determines)h(the)i(tag)f(name)f(to)i
+(map)e(to.)h(The)g(output)f(consists)i(of)f(the)g(start)h(tag,)f(the)g
+(recursi)n(v)o(ely)e(con)m(v)o(erted)g(inner)396 1075
+y(elements,)i(and)g(the)g(end)f(tag.)396 1255 y Fq(class)44
+b(map_tag)g(the_target_tag)e(=)486 1352 y(object)i(\(self\))576
+1449 y(inherit)f(shared)576 1643 y(val)h(target_tag)f(=)i
+(the_target_tag)576 1838 y(method)e(to_html)h(store)g(ch)h(=)665
+1935 y(output_string)e(ch)h(\(")p Fo(<)p Fq(")g(^)h(target_tag)e(^)i(")
+p Fo(>)p Fq(\\n"\);)665 2032 y(List.iter)396 2129 y(\(fun)f(n)h(-)p
+Fo(>)f Fq(n)h(#)g(extension)e(#)i(to_html)e(store)h(ch\))396
+2226 y(\(self)g(#)h(node)f(#)h(sub_nodes\);)665 2323
+y(output_string)e(ch)h(\("\\n)p Fo(<)p Fq(/")g(^)h(target_tag)e(^)h(")p
+Fo(>)p Fq("\);)486 2420 y(end)396 2518 y(;;)396 2712
+y(class)g(p)h(=)g(map_tag)e("p";;)396 2809 y(class)h(em)h(=)f(map_tag)g
+("b";;)396 2906 y(class)g(ul)h(=)f(map_tag)g("ul";;)396
+3003 y(class)g(li)h(=)f(map_tag)g("li";;)-2 3456 y Fp(2.4.10.)36
+b(Class)f Fc(br)396 3624 y Fv(Element)20 b(of)g(type)f
+Fq(br)i Fv(are)f(mapped)f(to)h(the)g(same)g(HTML)g(type.)g(Note)g(that)
+g(HTML)g(forbids)f(the)h(end)g(tag)g(of)g Fq(br)p Fv(.)396
+3804 y Fq(class)44 b(br)h(=)486 3901 y(object)f(\(self\))576
+3998 y(inherit)f(shared)576 4192 y(method)g(to_html)h(store)g(ch)h(=)
+665 4289 y(output_string)e(ch)h(")p Fo(<)p Fq(br)p Fo(>)p
+Fq(\\n";)665 4387 y(List.iter)396 4484 y(\(fun)g(n)h(-)p
+Fo(>)f Fq(n)h(#)g(extension)e(#)i(to_html)e(store)h(ch\))396
+4581 y(\(self)g(#)h(node)f(#)h(sub_nodes\);)486 4678
+y(end)396 4775 y(;;)p Black 3800 5278 a Fr(40)p Black
+eop
+%%Page: 41 41
+41 40 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black -2 583 a Fp(2.4.11.)36 b(Class)f Fc(code)396 751
+y Fv(The)20 b Fq(code)g Fv(type)g(is)h(con)m(v)o(erted)d(to)i(a)h
+Fq(pre)f Fv(section)g(\(preformatted)d(te)o(xt\).)i(As)i(the)g(meaning)
+d(of)i(tabs)h(is)g(unspeci\002ed)e(in)396 859 y(HTML,)h(tabs)g(are)h(e)
+o(xpanded)c(to)k(spaces.)396 1039 y Fq(class)44 b(code)g(=)486
+1136 y(object)g(\(self\))576 1233 y(inherit)f(shared)576
+1427 y(method)g(to_html)h(store)g(ch)h(=)665 1525 y(let)g(data)f(=)g
+(self)h(#)f(node)g(#)h(data)f(in)665 1622 y(\(*)h(convert)e(tabs)i(*\))
+665 1719 y(let)g(l)f(=)h(String.length)e(data)h(in)665
+1816 y(let)h(rec)f(preprocess)f(i)i(column)f(=)396 1913
+y(\(*)h(this)f(is)g(very)h(ineffective)e(but)h(comprehensive:)e(*\))396
+2010 y(if)j(i)f Fo(<)h Fq(l)g(then)486 2107 y(match)f(data.[i])f(with)
+665 2205 y('\\t')h(-)p Fo(>)396 2302 y Fq(let)h(n)f(=)h(8)g(-)f
+(\(column)g(mod)g(8\))h(in)396 2399 y(String.make)e(n)i(')g(')f(^)h
+(preprocess)e(\(i+1\))h(\(column)g(+)g(n\))576 2496 y(|)g('\\n')g(-)p
+Fo(>)396 2593 y Fq("\\n")g(^)h(preprocess)e(\(i+1\))h(0)576
+2690 y(|)g(c)h(-)p Fo(>)396 2787 y Fq(String.make)e(1)i(c)g(^)f
+(preprocess)f(\(i+1\))h(\(column)g(+)h(1\))396 2884 y(else)486
+2982 y("")665 3079 y(in)665 3176 y(output_string)e(ch)h(")p
+Fo(<)p Fq(p)p Fo(><)p Fq(pre)p Fo(>)p Fq(";)665 3273
+y(output_string)f(ch)h(\(escape_html)f(\(preprocess)g(0)i(0\)\);)665
+3370 y(output_string)e(ch)h(")p Fo(<)p Fq(/pre)p Fo(><)p
+Fq(/p)p Fo(>)p Fq(";)486 3564 y(end)396 3662 y(;;)-2
+4114 y Fp(2.4.12.)36 b(Class)f Fc(a)396 4282 y Fv(Hyperlinks,)19
+b(e)o(xpressed)g(by)g(the)i Fq(a)f Fv(element)g(type,)f(are)h(con)m(v)o
+(erted)e(to)i(the)g(HTML)g Fq(a)h Fv(type.)e(If)i(the)f(tar)o(get)f(of)
+h(the)396 4390 y(hyperlink)e(is)j(gi)n(v)o(en)d(by)i
+Fq(href)p Fv(,)g(the)g(URL)g(of)g(this)g(attrib)n(ute)g(can)g(be)g
+(used)g(directly)-5 b(.)18 b(Alternati)n(v)o(ely)-5 b(,)18
+b(the)i(tar)o(get)f(can)h(be)396 4498 y(gi)n(v)o(en)f(by)h
+Fq(readmeref)f Fv(in)i(which)e(case)i(the)f(".html")g(suf)n(\002x)f
+(must)i(be)f(added)f(to)h(the)g(\002le)h(name.)396 4647
+y(Note)f(that)h(within)f Fq(a)g Fv(only)g(#PCD)m(A)-9
+b(T)h(A)20 b(is)h(allo)n(wed,)e(so)i(the)f(contents)f(can)h(be)g(con)m
+(v)o(erted)e(directly)h(by)h(applying)396 4755 y Fq(escape_html)f
+Fv(to)i(the)f(character)f(data)h(contents.)p Black 3800
+5278 a Fr(41)p Black eop
+%%Page: 42 42
+42 41 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fq(class)44 b(a)h(=)486 676 y(object)f(\(self\))576
+773 y(inherit)f(shared)576 967 y(method)g(to_html)h(store)g(ch)h(=)665
+1065 y(output_string)e(ch)h(")p Fo(<)p Fq(a)h(";)665
+1162 y(let)g(href)f(=)396 1259 y(match)g(self)g(#)h(node)f(#)h
+(attribute)e("href")h(with)576 1356 y(Value)g(v)g(-)p
+Fo(>)h Fq(escape_html)e(v)486 1453 y(|)i(Valuelist)e(_)i(-)p
+Fo(>)f Fq(assert)g(false)486 1550 y(|)h(Implied_value)d(-)p
+Fo(>)665 1647 y Fq(begin)i(match)g(self)g(#)h(node)f(#)h(attribute)e
+("readmeref")g(with)486 1745 y(Value)h(v)h(-)p Fo(>)f
+Fq(escape_html)f(v)i(^)f(".html")396 1842 y(|)h(Valuelist)e(_)i(-)p
+Fo(>)f Fq(assert)g(false)396 1939 y(|)h(Implied_value)e(-)p
+Fo(>)576 2036 y Fq("")665 2133 y(end)665 2230 y(in)665
+2327 y(if)i(href)f Fo(<>)g Fq("")h(then)396 2424 y(output_string)e(ch)h
+(\("href=\\"")88 b(^)45 b(href)f(^)h("\\""\);)665 2522
+y(output_string)e(ch)h(")p Fo(>)p Fq(";)665 2619 y(output_string)f(ch)h
+(\(escape_html)f(\(self)h(#)h(node)f(#)h(data\)\);)665
+2716 y(output_string)e(ch)h(")p Fo(<)p Fq(/a)p Fo(>)p
+Fq(";)486 2910 y(end)396 3007 y(;;)-2 3460 y Fp(2.4.13.)36
+b(Class)f Fc(footnote)396 3628 y Fv(The)20 b Fq(footnote)g
+Fv(class)h(has)f(tw)o(o)h(methods:)e Fq(to_html)g Fv(to)i(con)m(v)o
+(ert)d(the)i(footnote)f(reference)f(to)i(HTML,)g(and)396
+3736 y Fq(footnote_to_html)e Fv(to)j(con)m(v)o(ert)d(the)i(footnote)f
+(te)o(xt)h(itself.)396 3885 y(The)g(footnote)f(reference)f(is)j(con)m
+(v)o(erted)d(to)i(a)h(local)f(hyperlink;)e(more)h(precisely)-5
+b(,)19 b(to)h(tw)o(o)h(anchor)d(tags)j(which)e(are)396
+3993 y(connected)g(with)h(each)g(other)-5 b(.)19 b(The)h(te)o(xt)g
+(anchor)f(points)h(to)g(the)g(footnote)f(anchor)m(,)f(and)h(the)i
+(footnote)d(anchor)h(points)396 4101 y(to)i(the)f(te)o(xt)g(anchor)-5
+b(.)396 4250 y(The)20 b(footnote)f(must)h(be)g(allocated)f(in)i(the)f
+Fq(store)g Fv(object.)f(By)i(allocating)e(the)h(footnote,)f(you)g(get)h
+(the)g(number)f(of)396 4358 y(the)h(footnote,)f(and)g(the)i(te)o(xt)f
+(of)f(the)i(footnote)d(is)j(stored)f(until)g(the)g(end)g(of)g(the)g
+(HTML)g(page)f(is)j(reached)c(when)i(the)396 4466 y(footnotes)f(can)h
+(be)g(printed.)f(The)h Fq(to_html)f Fv(method)g(stores)i(simply)e(the)i
+(object)e(itself,)i(such)f(that)g(the)396 4574 y Fq(footnote_to_html)e
+Fv(method)h(is)i(in)m(v)n(ok)o(ed)e(on)g(the)i(same)f(object)g(that)g
+(encountered)d(the)k(footnote.)p Black 3800 5278 a Fr(42)p
+Black eop
+%%Page: 43 43
+43 42 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fv(The)g Fq(to_html)g Fv(only)f(allocates)h(the)h
+(footnote,)d(and)h(prints)h(the)g(reference)f(anchor)m(,)f(b)n(ut)i(it)
+h(does)f(not)g(print)g(nor)396 687 y(con)m(v)o(ert)e(the)j(contents)e
+(of)h(the)g(note.)g(This)g(is)h(deferred)d(until)j(the)f(footnotes)e
+(actually)i(get)g(printed,)f(i.e.)h(the)g(recursi)n(v)o(e)396
+795 y(call)h(of)f Fq(to_html)f Fv(on)h(the)g(sub)g(nodes)g(is)h(done)e
+(by)h Fq(footnote_to_html)p Fv(.)396 944 y(Note)g(that)h(this)f
+(technique)f(does)h(not)g(w)o(ork)f(if)i(you)e(mak)o(e)h(another)f
+(footnote)f(within)i(a)h(footnote;)d(the)i(second)396
+1052 y(footnote)f(gets)h(allocated)g(b)n(ut)g(not)g(printed.)396
+1274 y Fq(class)44 b(footnote)g(=)486 1371 y(object)g(\(self\))576
+1468 y(inherit)f(shared)576 1662 y(val)h(mutable)g(footnote_number)e(=)
+j(0)576 1857 y(method)e(to_html)h(store)g(ch)h(=)665
+1954 y(let)g(number)e(=)396 2051 y(store)h(#)h(alloc_footnote)d(\(self)
+i(:)h(#shared)f(:)p Fo(>)g Fq(footnote_printer\))e(in)665
+2148 y(let)j(foot_anchor)e(=)396 2245 y("footnote")g(^)i(string_of_int)
+e(number)h(in)665 2342 y(let)h(text_anchor)e(=)396 2439
+y("textnote")g(^)i(string_of_int)e(number)h(in)665 2537
+y(footnote_number)f Fo(<)p Fq(-)h(number;)665 2634 y(output_string)f
+(ch)h(\()h(")p Fo(<)p Fq(a)f(name=\\"")g(^)g(text_anchor)f(^)i("\\")f
+(href=\\"#")g(^)441 2731 y(foot_anchor)f(^)i("\\")p Fo(>)p
+Fq([")e(^)i(string_of_int)e(number)h(^)441 2828 y("])p
+Fo(<)p Fq(/a)p Fo(>)p Fq(")g(\))576 3022 y(method)f(footnote_to_html)g
+(store)h(ch)g(=)665 3119 y(\(*)h(prerequisite:)d(we)j(are)f(in)h(a)f
+(definition)g(list)g Fo(<)p Fq(dl)p Fo(>)p Fq(...)p Fo(<)p
+Fq(/dl)p Fo(>)e Fq(*\))665 3217 y(let)j(foot_anchor)e(=)396
+3314 y("footnote")g(^)i(string_of_int)e(footnote_number)f(in)665
+3411 y(let)j(text_anchor)e(=)396 3508 y("textnote")g(^)i(string_of_int)
+e(footnote_number)f(in)665 3605 y(output_string)h(ch)h(\(")p
+Fo(<)p Fq(dt)p Fo(><)p Fq(a)g(name=\\"")f(^)i(foot_anchor)e(^)h("\\")h
+(href=\\"#")e(^)396 3702 y(text_anchor)g(^)i("\\")p Fo(>)p
+Fq([")f(^)g(string_of_int)f(footnote_number)f(^)396 3799
+y("])p Fo(<)p Fq(/a)p Fo(><)p Fq(/dt)p Fo(>)p Fq(\\n)p
+Fo(<)p Fq(dd)p Fo(>)p Fq("\);)665 3896 y(List.iter)396
+3994 y(\(fun)i(n)h(-)p Fo(>)f Fq(n)h(#)g(extension)e(#)i(to_html)e
+(store)h(ch\))396 4091 y(\(self)g(#)h(node)f(#)h(sub_nodes\);)665
+4188 y(output_string)e(ch)h(\("\\n)p Fo(<)p Fq(/dd)p
+Fo(>)p Fq("\))486 4382 y(end)396 4479 y(;;)p Black 3800
+5278 a Fr(43)p Black eop
+%%Page: 44 44
+44 43 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black -2 583 a Fp(2.4.14.)36 b(The)d(speci\002cation)j(of)e(the)f
+(document)i(model)396 751 y Fv(This)21 b(code)e(sets)i(up)f(the)g(hash)
+g(table)g(that)h(connects)e(element)h(types)g(with)g(the)g(e)o(x)o
+(emplars)f(of)h(the)g(e)o(xtension)f(classes)396 859
+y(that)i(con)m(v)o(ert)d(the)i(elements)g(to)g(HTML.)396
+1039 y Fq(open)44 b(Pxp_yacc)396 1233 y(let)h(tag_map)e(=)486
+1330 y(make_spec_from_alist)576 1427 y(~data_exemplar:\(new)e
+(data_impl)j(\(new)g(only_data\)\))576 1525 y
+(~default_element_exemplar:\(new)39 b(element_impl)k(\(new)h
+(no_markup\)\))576 1622 y(~element_alist:)665 1719 y([)h("readme",)e
+(\(new)h(element_impl)f(\(new)h(readme\)\);)396 1816
+y("sect1",)89 b(\(new)44 b(element_impl)f(\(new)h(sect1\)\);)396
+1913 y("sect2",)89 b(\(new)44 b(element_impl)f(\(new)h(sect2\)\);)396
+2010 y("sect3",)89 b(\(new)44 b(element_impl)f(\(new)h(sect3\)\);)396
+2107 y("title",)89 b(\(new)44 b(element_impl)f(\(new)h(no_markup\)\);)
+396 2205 y("p",)269 b(\(new)44 b(element_impl)f(\(new)h(p\)\);)396
+2302 y("br",)224 b(\(new)44 b(element_impl)f(\(new)h(br\)\);)396
+2399 y("code",)134 b(\(new)44 b(element_impl)f(\(new)h(code\)\);)396
+2496 y("em",)224 b(\(new)44 b(element_impl)f(\(new)h(em\)\);)396
+2593 y("ul",)224 b(\(new)44 b(element_impl)f(\(new)h(ul\)\);)396
+2690 y("li",)224 b(\(new)44 b(element_impl)f(\(new)h(li\)\);)396
+2787 y("footnote",)f(\(new)h(element_impl)f(\(new)h(footnote)g(:)h
+(#shared)e(:)p Fo(>)i Fq(shared\)\);)396 2884 y("a",)269
+b(\(new)44 b(element_impl)f(\(new)h(a\)\);)665 2982 y(])576
+3079 y(\(\))396 3176 y(;;)-2 3678 y Fx(Notes)p Black
+396 3857 a Fv(1.)p Black 70 w(Elements)20 b(may)g(also)g(contain)f
+(processing)g(instructions.)g(Unlik)o(e)h(other)f(document)g(models,)g
+(PXP)i(separates)529 3965 y(processing)e(instructions)g(from)g(the)i
+(rest)f(of)g(the)g(te)o(xt)g(and)g(pro)o(vides)e(a)j(second)e(interf)o
+(ace)h(to)g(access)h(them)529 4073 y(\(method)e Fq(pinstr)p
+Fv(\).)g(Ho)n(we)n(v)o(er)m(,)f(there)h(is)j(a)e(parser)g(option)f(\()p
+Fq(enable_pinstr_nodes)p Fv(\))e(which)i(changes)g(the)529
+4181 y(beha)n(viour)f(of)i(the)g(parser)g(such)g(that)g(e)o(xtra)g
+(nodes)f(for)h(processing)e(instructions)i(are)g(included)e(into)i(the)
+h(tree.)529 4320 y Fi(Furthermore,)e(the)g(tree)g(does)g(normally)h
+(not)f(contain)h(nodes)g(for)e(XML)h(comments;)h(the)o(y)f(are)g
+(ignored)h(by)f(def)o(ault.)g(Again,)529 4417 y(there)g(is)g(an)g
+(option)h(\()p Fh(enable_comment_nodes)p Fi(\))25 b(changing)c(this.)p
+Black 396 4566 a Fv(2.)p Black 70 w(Due)f(to)h(the)f(typing)f(system)h
+(it)h(is)g(more)e(or)h(less)i(impossible)d(to)i(deri)n(v)o(e)d(recursi)
+n(v)o(e)h(classes)i(in)g(O'Caml.)f(T)-7 b(o)20 b(get)529
+4674 y(around)e(this,)j(it)g(is)g(common)d(practice)i(to)g(put)g(the)g
+(modi\002able)f(or)h(e)o(xtensible)f(part)h(of)g(recursi)n(v)o(e)f
+(objects)h(into)529 4782 y(parallel)g(objects.)p Black
+3800 5278 a Fr(44)p Black eop
+%%Page: 45 45
+45 44 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black Black 396 579 a Fv(3.)p Black 70 w(The)g(problem)e(is)k(that)e
+(the)g(subclass)h(is)g(usually)e(not)h(a)h(subtype)e(in)h(this)h(case)f
+(because)g(O'Caml)g(has)h(a)529 687 y(contra)n(v)n(ariant)d(subtyping)g
+(rule.)p Black 3800 5278 a Fr(45)p Black eop
+%%Page: 46 46
+46 45 bop Black Black -2 621 a Fs(Chapter)48 b(3.)f(The)h(objects)g
+(representing)g(the)-2 845 y(document)396 1093 y Fr(This)21
+b(description)e(might)h(be)g(out-of-date)o(.)e(See)i(the)g(module)f
+(interface)h(\002les)g(for)h(updated)d(information.)-2
+1470 y Fx(3.1.)39 b(The)g Fb(document)44 b Fx(c)m(lass)396
+1722 y Fq(class)g([)h('ext)f(])h(document)e(:)486 1819
+y(Pxp_types.collect_warnings)d(->)486 1916 y(object)576
+2013 y(method)j(init_xml_version)g(:)h(string)g(->)h(unit)576
+2111 y(method)e(init_root)h(:)g('ext)h(node)f(->)g(unit)576
+2305 y(method)f(xml_version)g(:)i(string)576 2402 y(method)e
+(xml_standalone)g(:)i(bool)576 2499 y(method)e(dtd)i(:)f(dtd)576
+2596 y(method)f(root)i(:)f('ext)g(node)576 2791 y(method)f(encoding)h
+(:)h(Pxp_types.rep_encoding)576 2985 y(method)e(add_pinstr)h(:)g
+(proc_instruction)e(->)j(unit)576 3082 y(method)e(pinstr)h(:)h(string)f
+(->)g(proc_instruction)e(list)576 3179 y(method)h(pinstr_names)g(:)i
+(string)f(list)576 3373 y(method)f(write)h(:)h(Pxp_types.output_stream)
+c(->)k(Pxp_types.encoding)c(->)k(unit)486 3568 y(end)396
+3665 y(;;)396 3856 y Fv(The)20 b(methods)f(be)o(ginning)f(with)i
+Fq(init_)g Fv(are)g(only)g(for)f(internal)h(use)g(of)g(the)g(parser)-5
+b(.)p Black 396 4088 a Ft(\225)p Black 60 w Fq(xml_version)p
+Fv(:)19 b(returns)h(the)g(v)o(ersion)f(string)h(at)g(the)g(be)o
+(ginning)e(of)i(the)g(document.)e(F)o(or)i(e)o(xample,)f("1.0")g(is)479
+4196 y(returned)g(if)h(the)g(document)f(be)o(gins)g(with)h
+Fo(<)p Fq(?xml)44 b(version="1.0"?)p Fo(>)p Fv(.)p Black
+396 4304 a Ft(\225)p Black 60 w Fq(xml_standalone)p Fv(:)19
+b(returns)g(the)h(boolean)f(v)n(alue)g(of)h Fq(standalone)f
+Fv(declaration)g(in)h(the)h(XML)f(declaration.)e(If)479
+4412 y(the)i Fq(standalone)g Fv(attrib)n(ute)f(is)i(missing,)f
+Fq(false)g Fv(is)h(returned.)p Black 396 4520 a Ft(\225)p
+Black 60 w Fq(dtd)p Fv(:)g(returns)e(a)i(reference)d(to)i(the)h(global)
+e(DTD)h(object.)p Black 396 4628 a Ft(\225)p Black 60
+w Fq(root)p Fv(:)g(returns)g(a)g(reference)f(to)h(the)g(root)g
+(element.)p Black 396 4736 a Ft(\225)p Black 60 w Fq(encoding)p
+Fv(:)g(returns)f(the)h(internal)g(encoding)e(of)i(the)g(document.)e
+(This)i(means)g(that)g(all)h(strings)f(of)g(which)g(the)479
+4844 y(document)e(consists)j(are)f(encoded)f(in)h(this)h(character)e
+(set.)p Black 3798 5278 a Fr(46)p Black eop
+%%Page: 47 47
+47 46 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black Black 396 579 a Ft(\225)p
+Black 60 w Fq(pinstr)p Fv(:)g(returns)f(the)i(processing)d
+(instructions)i(outside)f(the)h(DTD)h(and)e(outside)h(the)g(root)g
+(element.)f(The)479 687 y(ar)o(gument)f(passed)i(to)h(the)f(method)f
+(names)g(a)i Fr(tar)m(g)o(et)q Fv(,)g(and)e(the)h(method)f(returns)g
+(all)i(instructions)e(with)i(this)g(tar)o(get.)479 795
+y(The)f(tar)o(get)f(is)j(the)e(\002rst)h(w)o(ord)e(inside)h
+Fo(<)p Fq(?)h Fv(and)e Fq(?)p Fo(>)p Fv(.)p Black 396
+903 a Ft(\225)p Black 60 w Fq(pinstr_names)p Fv(:)g(returns)g(the)i
+(names)e(of)h(the)h(processing)d(instructions)p Black
+396 1011 a Ft(\225)p Black 60 w Fq(add_pinstr)p Fv(:)h(adds)h(another)f
+(processing)g(instruction.)f(This)j(method)e(is)i(used)f(by)f(the)h
+(parser)g(itself)h(to)f(enter)g(the)479 1119 y(instructions)f(returned)
+g(by)h Fq(pinstr)p Fv(,)f(b)n(ut)h(you)g(can)g(also)g(enter)g
+(additional)f(instructions.)p Black 396 1226 a Ft(\225)p
+Black 60 w Fq(write)p Fv(:)h(writes)h(the)f(document)e(to)j(the)f
+(passed)g(stream)g(as)h(XML)f(te)o(xt)g(using)g(the)g(passed)g(\(e)o
+(xternal\))e(encoding.)479 1334 y(The)i(generated)f(te)o(xt)h(is)h(al)o
+(w)o(ays)f(v)n(alid)g(XML)g(and)g(can)g(be)g(parsed)g(by)f(PXP;)i(ho)n
+(we)n(v)o(er)m(,)d(the)i(te)o(xt)g(is)h(badly)479 1442
+y(formatted)e(\(this)h(is)h(not)f(a)h(pretty)e(printer\).)-2
+1861 y Fx(3.2.)39 b(The)g(c)m(lass)g(type)g Fb(node)396
+2041 y Fv(From)20 b Fq(Pxp_document)p Fv(:)396 2221 y
+Fq(type)44 b(node_type)g(=)486 2318 y(T_data)396 2415
+y(|)h(T_element)e(of)i(string)396 2512 y(|)g(T_super_root)396
+2609 y(|)g(T_pinstr)e(of)i(string)396 2706 y(|)g(T_comment)396
+2804 y Fn(and)g(some)f(other,)g(reserved)f(types)396
+2901 y Fq(;;)396 3095 y(class)h(type)g([)h('ext)f(])h(node)f(=)486
+3192 y(object)g(\('self\))576 3289 y(constraint)f('ext)h(=)h('ext)f
+(node)g(#extension)576 3484 y(\(*)g Fn(General)g(observers)f
+Fq(*\))576 3678 y(method)g(extension)h(:)g('ext)576 3775
+y(method)f(dtd)i(:)f(dtd)576 3872 y(method)f(parent)h(:)h('ext)f(node)
+576 3969 y(method)f(root)i(:)f('ext)g(node)576 4066 y(method)f
+(sub_nodes)h(:)g('ext)h(node)f(list)576 4164 y(method)f(iter_nodes)h(:)
+g(\('ext)g(node)g(-)p Fo(>)h Fq(unit\))f(-)p Fo(>)g Fq(unit)576
+4261 y(method)f(iter_nodes_sibl)g(:)889 4358 y(\('ext)h(node)h(option)e
+(-)p Fo(>)i Fq('ext)f(node)g(-)p Fo(>)g Fq('ext)h(node)f(option)g(-)p
+Fo(>)g Fq(unit\))g(-)396 4455 y Fo(>)h Fq(unit)576 4552
+y(method)e(node_type)h(:)g(node_type)576 4649 y(method)f(encoding)h(:)h
+(Pxp_types.rep_encoding)576 4746 y(method)e(data)i(:)f(string)576
+4843 y(method)f(position)h(:)h(\(string)e(*)i(int)f(*)h(int\))p
+Black 3797 5278 a Fr(47)p Black eop
+%%Page: 48 48
+48 47 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 576 579 a Fq(method)43
+b(comment)h(:)h(string)f(option)576 676 y(method)f(pinstr)h(:)h(string)
+f(-)p Fo(>)g Fq(proc_instruction)e(list)576 773 y(method)h
+(pinstr_names)g(:)i(string)f(list)576 870 y(method)f(write)h(:)h
+(Pxp_types.output_stream)c(->)k(Pxp_types.encoding)c(->)k(unit)576
+1065 y(\(*)f Fn(Attribute)f(observers)h Fq(*\))576 1259
+y(method)f(attribute)h(:)g(string)g(-)p Fo(>)h Fq(Pxp_types.att_value)
+576 1356 y(method)e(required_string_attribute)e(:)k(string)f(-)p
+Fo(>)g Fq(string)576 1453 y(method)f(optional_string_attribute)e(:)k
+(string)f(-)p Fo(>)g Fq(string)g(option)576 1550 y(method)f
+(required_list_attribute)e(:)k(string)f(-)p Fo(>)g Fq(string)g(list)576
+1647 y(method)f(optional_list_attribute)e(:)k(string)f(-)p
+Fo(>)g Fq(string)g(list)576 1745 y(method)f(attribute_names)g(:)h
+(string)g(list)576 1842 y(method)f(attribute_type)g(:)i(string)e(-)p
+Fo(>)i Fq(Pxp_types.att_type)576 1939 y(method)e(attributes)h(:)g
+(\(string)g(*)h(Pxp_types.att_value\))c(list)576 2036
+y(method)i(id_attribute_name)f(:)j(string)576 2133 y(method)e
+(id_attribute_value)f(:)j(string)576 2230 y(method)e
+(idref_attribute_names)f(:)i(string)576 2424 y(\(*)g
+Fn(Modifying)f(methods)h Fq(*\))576 2619 y(method)f(add_node)h(:)h
+(?force:bool)e(-)p Fo(>)h Fq('ext)g(node)g(-)p Fo(>)h
+Fq(unit)576 2716 y(method)e(add_pinstr)h(:)g(proc_instruction)e(-)p
+Fo(>)j Fq(unit)576 2813 y(method)e(delete)h(:)h(unit)576
+2910 y(method)e(set_nodes)h(:)g('ext)h(node)f(list)g(-)p
+Fo(>)g Fq(unit)576 3007 y(method)f(quick_set_attributes)f(:)j(\(string)
+e(*)i(Pxp_types.att_value\))c(list)j(-)p Fo(>)h Fq(unit)576
+3104 y(method)e(set_comment)g(:)i(string)f(option)g(-)p
+Fo(>)g Fq(unit)576 3299 y(\(*)g Fn(Cloning)g(methods)f
+Fq(*\))576 3493 y(method)g(orphaned_clone)g(:)i('self)576
+3590 y(method)e(orphaned_flat_clone)f(:)j('self)576 3687
+y(method)e(create_element)g(:)1024 3784 y(?position:\(string)f(*)j(int)
+f(*)h(int\))f(-)p Fo(>)1024 3882 y Fq(dtd)g(-)p Fo(>)h
+Fq(node_type)e(-)p Fo(>)h Fq(\(string)g(*)h(string\))e(list)h(-)p
+Fo(>)1203 3979 y Fq('ext)g(node)576 4076 y(method)f(create_data)g(:)i
+(dtd)f(-)p Fo(>)h Fq(string)f(-)p Fo(>)g Fq('ext)g(node)576
+4173 y(method)f(keep_always_whitespace_mode)e(:)j(unit)576
+4367 y(\(*)g Fn(Validating)f(methods)h Fq(*\))576 4561
+y(method)f(local_validate)g(:)i(?use_dfa:bool)d(->)j(unit)f(->)g(unit)
+576 4756 y(\(*)g(...)g(Internal)g(methods)g(are)g(undocumented.)f(*\))p
+Black 3800 5278 a Fr(48)p Black eop
+%%Page: 49 49
+49 48 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 486 579 a Fq(end)396
+676 y(;;)396 867 y Fv(In)g(the)g(module)f Fq(Pxp_types)g
+Fv(you)h(can)g(\002nd)g(another)e(type)i(de\002nition)f(that)h(is)i
+(important)c(in)j(this)f(conte)o(xt:)396 1047 y Fq(type)44
+b(Pxp_types.att_value)e(=)576 1144 y(Value)223 b(of)44
+b(string)486 1241 y(|)h(Valuelist)e(of)h(string)g(list)486
+1339 y(|)h(Implied_value)396 1436 y(;;)-2 1847 y Fp(3.2.1.)35
+b(The)f(structure)f(of)g(document)i(trees)396 2015 y
+Fv(A)21 b(node)e(represents)g(either)h(an)g(element)g(or)g(a)g
+(character)f(data)h(section.)g(There)g(are)g(tw)o(o)g(classes)h
+(implementing)d(the)396 2122 y(tw)o(o)j(aspects)f(of)g(nodes:)g
+Fq(element_impl)e Fv(and)i Fq(data_impl)p Fv(.)f(The)h(latter)g(class)h
+(does)f(not)g(implement)f(all)i(methods)396 2230 y(because)f(some)g
+(methods)f(do)h(not)g(mak)o(e)f(sense)i(for)e(data)h(nodes.)396
+2380 y(\(Note:)g(PXP)h(also)g(supports)e(a)h(mode)g(which)f(forces)h
+(that)g(processing)f(instructions)g(and)h(comments)f(are)396
+2488 y(represented)g(as)i(nodes)e(of)h(the)g(document)e(tree.)i(Ho)n
+(we)n(v)o(er)m(,)e(these)j(nodes)e(are)h(instances)g(of)g
+Fq(element_impl)f Fv(with)396 2596 y(node)g(types)h Fq(T_pinstr)g
+Fv(and)f Fq(T_comment)p Fv(,)g(respecti)n(v)o(ely)-5
+b(.)18 b(This)j(mode)e(must)h(be)g(e)o(xplicitly)g(con\002gured;)d(the)
+k(basic)396 2704 y(representation)d(kno)n(ws)i(only)f(element)h(and)f
+(data)h(nodes.\))396 2853 y(The)g(follo)n(wing)f(\002gure)g(\()p
+Fr(A)h(tr)m(ee)h(with)g(element)f(nodes,)f(data)g(nodes,)h(and)f
+(attrib)n(utes)p Fv(\))h(sho)n(ws)g(an)g(e)o(xample)f(ho)n(w)h(a)396
+2961 y(tree)g(is)i(constructed)c(from)h(element)h(and)f(data)i(nodes.)e
+(The)h(circular)f(areas)h(represent)f(element)h(nodes)f(whereas)h(the)
+396 3069 y(o)o(v)n(als)f(denote)f(data)i(nodes.)e(Only)h(elements)g
+(may)g(ha)n(v)o(e)g(subnodes;)f(data)h(nodes)g(are)g(al)o(w)o(ays)h
+(lea)n(v)o(es)f(of)h(the)f(tree.)g(The)396 3177 y(subnodes)g(of)h(an)g
+(element)g(can)g(be)g(either)g(element)f(or)h(data)g(nodes;)g(in)g
+(both)f(cases)i(the)g(O'Caml)f(objects)g(storing)f(the)396
+3285 y(nodes)h(ha)n(v)o(e)f(the)i(class)g(type)e Fq(node)p
+Fv(.)396 3434 y(Attrib)n(utes)h(\(the)g(clouds)g(in)g(the)g(picture\))f
+(are)h(not)g(directly)g(inte)o(grated)e(into)i(the)g(tree;)h(there)e
+(is)i(al)o(w)o(ays)g(an)f(e)o(xtra)g(link)396 3542 y(to)h(the)f(attrib)
+n(ute)g(list.)h(This)f(is)h(also)g(true)f(for)f(processing)g
+(instructions)g(\(not)h(sho)n(wn)f(in)h(the)h(picture\).)d(This)j
+(means)396 3650 y(that)g(there)e(are)h(separated)g(access)g(methods)g
+(for)f(attrib)n(utes)h(and)g(processing)f(instructions.)p
+Black 3800 5278 a Fr(49)p Black eop
+%%Page: 50 50
+50 49 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fu(Figur)o(e)g(3-1.)f(A)i
+(tr)o(ee)e(with)i(element)f(nodes,)h(data)e(nodes,)i(and)f(attrib)n
+(utes)396 2578 y
+ currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
+ 396 2578 a @beginspecial 0 @llx 0 @lly
+329 @urx 218 @ury 3290 @rwi @setspecial
+%%BeginDocument: pic/node_term.ps
+%!PS-Adobe-2.0 EPSF-2.0
+%%Title: src/pic/node_term.fig
+%%Creator: fig2dev Version 3.2 Patchlevel 1
+%%CreationDate: Sun Aug 27 02:05:42 2000
+%%For: gerd@ice (Gerd Stolpmann)
+%%Orientation: Portrait
+%%BoundingBox: 0 0 329 218
+%%Pages: 0
+%%BeginSetup
+%%EndSetup
+%%Magnification: 0.8000
+%%EndComments
+/$F2psDict 200 dict def
+$F2psDict begin
+$F2psDict /mtrx matrix put
+/col-1 {0 setgray} bind def
+/col0 {0.000 0.000 0.000 srgb} bind def
+/col1 {0.000 0.000 1.000 srgb} bind def
+/col2 {0.000 1.000 0.000 srgb} bind def
+/col3 {0.000 1.000 1.000 srgb} bind def
+/col4 {1.000 0.000 0.000 srgb} bind def
+/col5 {1.000 0.000 1.000 srgb} bind def
+/col6 {1.000 1.000 0.000 srgb} bind def
+/col7 {1.000 1.000 1.000 srgb} bind def
+/col8 {0.000 0.000 0.560 srgb} bind def
+/col9 {0.000 0.000 0.690 srgb} bind def
+/col10 {0.000 0.000 0.820 srgb} bind def
+/col11 {0.530 0.810 1.000 srgb} bind def
+/col12 {0.000 0.560 0.000 srgb} bind def
+/col13 {0.000 0.690 0.000 srgb} bind def
+/col14 {0.000 0.820 0.000 srgb} bind def
+/col15 {0.000 0.560 0.560 srgb} bind def
+/col16 {0.000 0.690 0.690 srgb} bind def
+/col17 {0.000 0.820 0.820 srgb} bind def
+/col18 {0.560 0.000 0.000 srgb} bind def
+/col19 {0.690 0.000 0.000 srgb} bind def
+/col20 {0.820 0.000 0.000 srgb} bind def
+/col21 {0.560 0.000 0.560 srgb} bind def
+/col22 {0.690 0.000 0.690 srgb} bind def
+/col23 {0.820 0.000 0.820 srgb} bind def
+/col24 {0.500 0.190 0.000 srgb} bind def
+/col25 {0.630 0.250 0.000 srgb} bind def
+/col26 {0.750 0.380 0.000 srgb} bind def
+/col27 {1.000 0.500 0.500 srgb} bind def
+/col28 {1.000 0.630 0.630 srgb} bind def
+/col29 {1.000 0.750 0.750 srgb} bind def
+/col30 {1.000 0.880 0.880 srgb} bind def
+/col31 {1.000 0.840 0.000 srgb} bind def
+
+end
+save
+-1.0 251.0 translate
+1 -1 scale
+
+/cp {closepath} bind def
+/ef {eofill} bind def
+/gr {grestore} bind def
+/gs {gsave} bind def
+/sa {save} bind def
+/rs {restore} bind def
+/l {lineto} bind def
+/m {moveto} bind def
+/rm {rmoveto} bind def
+/n {newpath} bind def
+/s {stroke} bind def
+/sh {show} bind def
+/slc {setlinecap} bind def
+/slj {setlinejoin} bind def
+/slw {setlinewidth} bind def
+/srgb {setrgbcolor} bind def
+/rot {rotate} bind def
+/sc {scale} bind def
+/sd {setdash} bind def
+/ff {findfont} bind def
+/sf {setfont} bind def
+/scf {scalefont} bind def
+/sw {stringwidth} bind def
+/tr {translate} bind def
+/tnt {dup dup currentrgbcolor
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
+ bind def
+/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
+ 4 -2 roll mul srgb} bind def
+/reencdict 12 dict def /ReEncode { reencdict begin
+/newcodesandnames exch def /newfontname exch def /basefontname exch def
+/basefontdict basefontname findfont def /newfont basefontdict maxlength dict def
+basefontdict { exch dup /FID ne { dup /Encoding eq
+{ exch dup length array copy newfont 3 1 roll put }
+{ exch newfont 3 1 roll put } ifelse } { pop pop } ifelse } forall
+newfont /FontName newfontname put newcodesandnames aload pop
+128 1 255 { newfont /Encoding get exch /.notdef put } for
+newcodesandnames length 2 idiv { newfont /Encoding get 3 1 roll put } repeat
+newfontname newfont definefont pop end } def
+/isovec [
+8#200 /grave 8#201 /acute 8#202 /circumflex 8#203 /tilde
+8#204 /macron 8#205 /breve 8#206 /dotaccent 8#207 /dieresis
+8#210 /ring 8#211 /cedilla 8#212 /hungarumlaut 8#213 /ogonek 8#214 /caron
+8#220 /dotlessi 8#230 /oe 8#231 /OE
+8#240 /space 8#241 /exclamdown 8#242 /cent 8#243 /sterling
+8#244 /currency 8#245 /yen 8#246 /brokenbar 8#247 /section 8#250 /dieresis
+8#251 /copyright 8#252 /ordfeminine 8#253 /guillemotleft 8#254 /logicalnot
+8#255 /endash 8#256 /registered 8#257 /macron 8#260 /degree 8#261 /plusminus
+8#262 /twosuperior 8#263 /threesuperior 8#264 /acute 8#265 /mu 8#266 /paragraph
+8#267 /periodcentered 8#270 /cedilla 8#271 /onesuperior 8#272 /ordmasculine
+8#273 /guillemotright 8#274 /onequarter 8#275 /onehalf
+8#276 /threequarters 8#277 /questiondown 8#300 /Agrave 8#301 /Aacute
+8#302 /Acircumflex 8#303 /Atilde 8#304 /Adieresis 8#305 /Aring
+8#306 /AE 8#307 /Ccedilla 8#310 /Egrave 8#311 /Eacute
+8#312 /Ecircumflex 8#313 /Edieresis 8#314 /Igrave 8#315 /Iacute
+8#316 /Icircumflex 8#317 /Idieresis 8#320 /Eth 8#321 /Ntilde 8#322 /Ograve
+8#323 /Oacute 8#324 /Ocircumflex 8#325 /Otilde 8#326 /Odieresis 8#327 /multiply
+8#330 /Oslash 8#331 /Ugrave 8#332 /Uacute 8#333 /Ucircumflex
+8#334 /Udieresis 8#335 /Yacute 8#336 /Thorn 8#337 /germandbls 8#340 /agrave
+8#341 /aacute 8#342 /acircumflex 8#343 /atilde 8#344 /adieresis 8#345 /aring
+8#346 /ae 8#347 /ccedilla 8#350 /egrave 8#351 /eacute
+8#352 /ecircumflex 8#353 /edieresis 8#354 /igrave 8#355 /iacute
+8#356 /icircumflex 8#357 /idieresis 8#360 /eth 8#361 /ntilde 8#362 /ograve
+8#363 /oacute 8#364 /ocircumflex 8#365 /otilde 8#366 /odieresis 8#367 /divide
+8#370 /oslash 8#371 /ugrave 8#372 /uacute 8#373 /ucircumflex
+8#374 /udieresis 8#375 /yacute 8#376 /thorn 8#377 /ydieresis] def
+/Helvetica-Bold /Helvetica-Bold-iso isovec ReEncode
+/Helvetica /Helvetica-iso isovec ReEncode
+/Helvetica-Oblique /Helvetica-Oblique-iso isovec ReEncode
+ /DrawEllipse {
+ /endangle exch def
+ /startangle exch def
+ /yrad exch def
+ /xrad exch def
+ /y exch def
+ /x exch def
+ /savematrix mtrx currentmatrix def
+ x y tr xrad yrad sc 0 0 1 startangle endangle arc
+ closepath
+ savematrix setmatrix
+ } def
+
+/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
+/$F2psEnd {$F2psEnteredState restore end} def
+%%EndProlog
+
+$F2psBegin
+10 setmiterlimit
+n -1000 5962 m -1000 -1000 l 7537 -1000 l 7537 5962 l cp clip
+ 0.05039 0.05039 sc
+% Polyline
+7.500 slw
+n 1770 2700 m 1665 2700 1665 3045 105 arcto 4 {pop} repeat
+ 1665 3150 2730 3150 105 arcto 4 {pop} repeat
+ 2835 3150 2835 2805 105 arcto 4 {pop} repeat
+ 2835 2700 1770 2700 105 arcto 4 {pop} repeat
+ cp gs col7 0.75 shd ef gr gs col0 s gr
+% Ellipse
+n 2250 1125 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 1575 2025 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2925 2025 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 900 2925 242 242 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Polyline
+n 420 3825 m 315 3825 315 4170 105 arcto 4 {pop} repeat
+ 315 4275 1380 4275 105 arcto 4 {pop} repeat
+ 1485 4275 1485 3930 105 arcto 4 {pop} repeat
+ 1485 3825 420 3825 105 arcto 4 {pop} repeat
+ cp gs col7 0.75 shd ef gr gs col0 s gr
+% Polyline
+n 2085 1275 m 1582 1807 l gs col0 s gr
+% Polyline
+n 2407 1297 m 2940 1800 l gs col0 s gr
+% Polyline
+n 1417 2190 m 900 2692 l gs col0 s gr
+% Polyline
+n 1740 2190 m 2257 2700 l gs col0 s gr
+% Polyline
+n 892 3180 m 892 3825 l gs col0 s gr
+% Polyline
+n 45 675 m 6525 675 l 6525 4950 l 45 4950 l cp gs col0 s gr
+% Polyline
+n 2250 3600 m 2263 3597 l 2277 3594 l 2293 3592 l 2309 3589 l 2326 3586 l
+ 2344 3583 l 2362 3580 l 2381 3578 l 2399 3575 l 2418 3572 l
+ 2436 3569 l 2454 3566 l 2471 3563 l 2488 3561 l 2504 3558 l
+ 2520 3555 l 2537 3552 l 2555 3548 l 2571 3545 l 2588 3541 l
+ 2604 3537 l 2621 3533 l 2637 3528 l 2653 3524 l 2669 3520 l
+ 2684 3517 l 2700 3514 l 2715 3512 l 2730 3510 l 2745 3510 l
+ 2762 3511 l 2777 3512 l 2793 3514 l 2807 3517 l 2821 3520 l
+ 2835 3524 l 2849 3528 l 2863 3532 l 2877 3537 l 2893 3542 l
+ 2908 3548 l 2925 3555 l 2938 3561 l 2951 3568 l 2965 3575 l
+ 2978 3584 l 2992 3593 l 3007 3602 l 3021 3612 l 3035 3623 l
+ 3050 3633 l 3064 3643 l 3079 3652 l 3093 3661 l 3108 3670 l
+ 3122 3677 l 3136 3684 l 3150 3690 l 3166 3696 l 3182 3701 l
+ 3198 3706 l 3214 3710 l 3230 3713 l 3246 3716 l 3263 3719 l
+ 3279 3721 l 3295 3724 l 3311 3726 l 3327 3729 l 3343 3731 l
+ 3359 3733 l 3375 3735 l 3391 3736 l 3407 3737 l 3423 3738 l
+ 3439 3738 l 3455 3738 l 3471 3738 l 3488 3737 l 3504 3737 l
+ 3520 3736 l 3536 3736 l 3552 3735 l 3568 3735 l 3584 3735 l
+ 3600 3735 l 3616 3735 l 3632 3735 l 3648 3734 l 3663 3734 l
+ 3678 3733 l 3693 3732 l 3708 3731 l 3723 3730 l 3739 3729 l
+ 3755 3729 l 3771 3729 l 3788 3730 l 3806 3732 l 3825 3735 l
+ 3840 3738 l 3856 3741 l 3874 3745 l 3892 3749 l 3911 3753 l
+ 3931 3757 l 3951 3762 l 3972 3767 l 3993 3772 l 4014 3777 l
+ 4034 3782 l 4054 3787 l 4072 3793 l 4089 3799 l 4105 3805 l
+ 4119 3811 l 4130 3818 l 4140 3825 l 4150 3835 l 4157 3846 l
+ 4161 3858 l 4163 3870 l 4164 3883 l 4163 3897 l 4161 3911 l
+ 4159 3925 l 4156 3939 l 4154 3952 l 4151 3966 l 4148 3979 l
+ 4144 3992 l 4140 4005 l 4135 4018 l 4128 4031 l 4121 4045 l
+ 4112 4058 l 4104 4073 l 4095 4087 l 4085 4101 l 4075 4116 l
+ 4065 4129 l 4055 4143 l 4043 4155 l 4032 4166 l 4019 4176 l
+ 4005 4185 l 3992 4192 l 3978 4197 l 3963 4202 l 3947 4206 l
+ 3930 4210 l 3913 4213 l 3896 4216 l 3878 4218 l 3861 4220 l
+ 3843 4222 l 3825 4224 l 3807 4226 l 3789 4228 l 3771 4229 l
+ 3753 4230 l 3735 4230 l 3717 4230 l 3698 4228 l 3678 4226 l
+ 3659 4224 l 3639 4220 l 3619 4216 l 3598 4212 l 3578 4208 l
+ 3557 4203 l 3536 4199 l 3516 4195 l 3496 4191 l 3477 4189 l
+ 3457 4187 l 3438 4185 l 3420 4185 l 3402 4185 l 3384 4186 l
+ 3367 4188 l 3350 4190 l 3333 4193 l 3317 4196 l 3301 4200 l
+ 3285 4203 l 3269 4207 l 3253 4211 l 3237 4214 l 3220 4218 l
+ 3203 4221 l 3186 4224 l 3168 4227 l 3150 4230 l 3132 4233 l
+ 3113 4236 l 3094 4239 l 3074 4242 l 3055 4246 l 3035 4249 l
+ 3015 4253 l 2995 4257 l 2974 4260 l 2954 4264 l 2934 4267 l
+ 2914 4270 l 2894 4272 l 2874 4274 l 2855 4275 l 2835 4275 l
+ 2815 4275 l 2795 4274 l 2775 4272 l 2755 4270 l 2734 4268 l
+ 2713 4265 l 2692 4262 l 2671 4259 l 2650 4256 l 2630 4252 l
+ 2609 4249 l 2590 4245 l 2571 4242 l 2553 4238 l 2536 4234 l
+ 2520 4230 l 2503 4225 l 2487 4219 l 2473 4213 l 2460 4207 l
+ 2448 4200 l 2437 4192 l 2426 4185 l 2415 4178 l 2404 4170 l
+ 2393 4163 l 2380 4157 l 2368 4151 l 2354 4145 l 2340 4140 l
+ 2325 4135 l 2310 4131 l 2294 4128 l 2277 4125 l 2260 4122 l
+ 2243 4120 l 2225 4118 l 2208 4115 l 2191 4113 l 2174 4110 l
+ 2158 4107 l 2143 4104 l 2128 4100 l 2115 4095 l 2101 4089 l
+ 2087 4083 l 2074 4076 l 2061 4070 l 2049 4063 l 2037 4056 l
+ 2025 4049 l 2014 4042 l 2004 4034 l 1995 4025 l 1987 4016 l
+ 1980 4005 l 1975 3993 l 1972 3980 l 1971 3965 l 1970 3949 l
+ 1971 3932 l 1972 3915 l 1973 3898 l 1974 3881 l 1976 3865 l
+ 1977 3850 l 1978 3837 l 1980 3825 l 1983 3812 l 1986 3801 l
+ 1990 3792 l 1994 3784 l 1998 3776 l 2003 3768 l 2008 3761 l
+ 2013 3752 l 2019 3744 l 2025 3735 l 2032 3726 l 2040 3717 l
+ 2048 3707 l 2057 3698 l 2066 3688 l 2075 3678 l 2084 3669 l
+ 2094 3660 l 2104 3652 l 2115 3645 l 2127 3639 l 2138 3633 l
+ 2150 3628 l 2162 3624 l 2174 3620 l 2186 3617 l 2200 3613 l
+ 2214 3609 l 2231 3604 l cp gs col0 s gr
+% Polyline
+n 3645 1080 m 3660 1077 l 3677 1074 l 3694 1071 l 3713 1068 l 3733 1065 l
+ 3754 1063 l 3775 1060 l 3798 1058 l 3820 1056 l 3843 1053 l
+ 3866 1051 l 3889 1049 l 3912 1047 l 3934 1045 l 3955 1043 l
+ 3976 1041 l 3996 1039 l 4015 1038 l 4033 1036 l 4050 1035 l
+ 4071 1034 l 4090 1033 l 4109 1032 l 4127 1032 l 4144 1031 l
+ 4161 1031 l 4177 1031 l 4193 1031 l 4209 1031 l 4225 1031 l
+ 4241 1031 l 4257 1032 l 4273 1032 l 4289 1033 l 4304 1034 l
+ 4320 1035 l 4337 1037 l 4354 1039 l 4371 1041 l 4387 1044 l
+ 4403 1047 l 4419 1050 l 4435 1053 l 4450 1057 l 4466 1060 l
+ 4481 1063 l 4497 1067 l 4513 1071 l 4529 1075 l 4545 1080 l
+ 4561 1085 l 4577 1091 l 4592 1097 l 4607 1103 l 4622 1110 l
+ 4637 1118 l 4651 1125 l 4666 1132 l 4681 1140 l 4697 1147 l
+ 4713 1153 l 4731 1159 l 4750 1165 l 4770 1170 l 4787 1174 l
+ 4804 1177 l 4823 1180 l 4842 1182 l 4863 1184 l 4884 1186 l
+ 4906 1188 l 4928 1189 l 4950 1190 l 4972 1192 l 4994 1193 l
+ 5016 1195 l 5037 1197 l 5058 1200 l 5077 1203 l 5096 1206 l
+ 5113 1210 l 5130 1215 l 5148 1221 l 5165 1228 l 5181 1235 l
+ 5197 1242 l 5212 1250 l 5228 1259 l 5243 1267 l 5257 1276 l
+ 5272 1285 l 5286 1294 l 5299 1303 l 5312 1312 l 5324 1322 l
+ 5336 1331 l 5346 1340 l 5355 1350 l 5365 1363 l 5373 1378 l
+ 5380 1392 l 5386 1408 l 5390 1424 l 5394 1440 l 5398 1456 l
+ 5401 1472 l 5402 1488 l 5403 1502 l 5403 1517 l 5400 1530 l
+ 5395 1543 l 5389 1555 l 5381 1568 l 5372 1580 l 5363 1592 l
+ 5354 1604 l 5343 1616 l 5331 1627 l 5318 1638 l 5303 1648 l
+ 5286 1657 l 5265 1665 l 5251 1669 l 5235 1673 l 5219 1677 l
+ 5201 1680 l 5182 1683 l 5162 1685 l 5141 1688 l 5119 1690 l
+ 5097 1692 l 5075 1694 l 5053 1696 l 5030 1697 l 5008 1699 l
+ 4986 1701 l 4964 1703 l 4943 1704 l 4921 1706 l 4901 1707 l
+ 4880 1709 l 4860 1710 l 4840 1711 l 4819 1712 l 4799 1713 l
+ 4779 1713 l 4758 1713 l 4738 1714 l 4717 1714 l 4697 1714 l
+ 4676 1714 l 4655 1714 l 4635 1714 l 4614 1714 l 4594 1714 l
+ 4573 1714 l 4553 1713 l 4533 1713 l 4513 1713 l 4494 1712 l
+ 4474 1711 l 4455 1710 l 4434 1709 l 4413 1707 l 4392 1705 l
+ 4372 1703 l 4351 1701 l 4331 1698 l 4311 1695 l 4291 1692 l
+ 4271 1690 l 4251 1687 l 4231 1684 l 4211 1681 l 4191 1678 l
+ 4172 1675 l 4152 1673 l 4133 1670 l 4114 1668 l 4095 1665 l
+ 4074 1662 l 4053 1659 l 4033 1657 l 4012 1654 l 3992 1651 l
+ 3972 1648 l 3951 1645 l 3931 1643 l 3911 1640 l 3891 1637 l
+ 3872 1634 l 3852 1631 l 3833 1628 l 3815 1626 l 3797 1623 l
+ 3780 1620 l 3761 1617 l 3743 1614 l 3725 1611 l 3708 1608 l
+ 3692 1605 l 3675 1602 l 3659 1600 l 3643 1597 l 3627 1594 l
+ 3612 1591 l 3597 1587 l 3582 1584 l 3568 1580 l 3555 1575 l
+ 3541 1569 l 3527 1563 l 3514 1556 l 3501 1550 l 3489 1543 l
+ 3477 1536 l 3465 1529 l 3454 1522 l 3444 1514 l 3435 1505 l
+ 3427 1496 l 3420 1485 l 3415 1473 l 3412 1460 l 3411 1445 l
+ 3410 1430 l 3411 1414 l 3412 1397 l 3413 1380 l 3414 1364 l
+ 3416 1348 l 3417 1333 l 3418 1318 l 3420 1305 l 3423 1290 l
+ 3425 1275 l 3428 1261 l 3431 1247 l 3434 1233 l 3437 1220 l
+ 3442 1207 l 3447 1194 l 3455 1182 l 3465 1170 l 3474 1162 l
+ 3483 1155 l 3493 1148 l 3504 1141 l 3515 1134 l 3526 1127 l
+ 3538 1121 l 3550 1114 l 3563 1108 l 3577 1102 l 3591 1096 l
+ 3607 1090 l 3625 1085 l cp gs col0 s gr
+% Polyline
+n 2475 1215 m 2477 1217 l 2482 1221 l 2491 1229 l 2503 1239 l 2517 1252 l
+ 2534 1267 l 2552 1282 l 2570 1296 l 2588 1310 l 2605 1322 l
+ 2621 1332 l 2638 1342 l 2655 1350 l 2669 1356 l 2684 1362 l
+ 2700 1368 l 2717 1374 l 2734 1380 l 2752 1386 l 2770 1392 l
+ 2789 1398 l 2808 1403 l 2827 1409 l 2846 1415 l 2865 1420 l
+ 2884 1425 l 2902 1429 l 2920 1433 l 2937 1436 l 2954 1438 l
+ 2970 1440 l 2988 1441 l 3006 1441 l 3024 1440 l 3041 1439 l
+ 3059 1437 l 3076 1434 l 3094 1431 l 3111 1428 l 3129 1425 l
+ 3146 1421 l 3162 1417 l 3179 1414 l 3195 1409 l 3211 1405 l
+ 3226 1400 l 3240 1395 l 3256 1388 l 3271 1380 l 3287 1370 l
+ 3304 1358 l 3322 1344 l 3340 1329 l 3359 1314 l 3376 1299 l
+ 3391 1286 l 3404 1275 l 3412 1267 l 3418 1262 l 3420 1260 l gs col0 s gr
+% Polyline
+n 1125 3060 m 1126 3063 l 1127 3068 l 1129 3078 l 1132 3093 l 1136 3112 l
+ 1141 3135 l 1146 3162 l 1153 3190 l 1159 3219 l 1166 3248 l
+ 1173 3275 l 1180 3301 l 1187 3324 l 1193 3345 l 1200 3364 l
+ 1207 3381 l 1215 3397 l 1224 3414 l 1234 3429 l 1245 3444 l
+ 1256 3459 l 1267 3473 l 1279 3486 l 1291 3499 l 1304 3512 l
+ 1316 3525 l 1329 3537 l 1342 3550 l 1355 3562 l 1368 3574 l
+ 1382 3585 l 1396 3596 l 1410 3607 l 1425 3617 l 1441 3626 l
+ 1457 3635 l 1473 3644 l 1490 3653 l 1507 3661 l 1524 3669 l
+ 1542 3677 l 1559 3685 l 1577 3692 l 1595 3700 l 1613 3706 l
+ 1631 3713 l 1649 3718 l 1668 3723 l 1687 3727 l 1704 3730 l
+ 1723 3732 l 1743 3733 l 1764 3734 l 1788 3734 l 1814 3733 l
+ 1841 3732 l 1869 3731 l 1898 3729 l 1926 3727 l 1952 3725 l
+ 1975 3724 l 1993 3722 l 2008 3721 l 2017 3721 l 2022 3720 l
+ 2025 3720 l gs col0 s gr
+/Helvetica-iso ff 180.00 scf sf
+3600 1260 m
+gs 1 -1 sc (attributes:) col0 sh gr
+/Helvetica-iso ff 180.00 scf sf
+3600 1485 m
+gs 1 -1 sc ("att" -> Value "apple") col0 sh gr
+/Helvetica-iso ff 180.00 scf sf
+2250 3780 m
+gs 1 -1 sc (attributes:) col0 sh gr
+/Helvetica-Oblique-iso ff 180.00 scf sf
+390 4725 m
+gs 1 -1 sc (An orange Cherries ) col0 sh gr
+/Helvetica-iso ff 180.00 scf sf
+2250 4005 m
+gs 1 -1 sc ("att" -> Value "orange") col0 sh gr
+/Helvetica-Bold-iso ff 180.00 scf sf
+1815 3015 m
+gs 1 -1 sc ("Cherries") col0 sh gr
+/Helvetica-Bold-iso ff 180.00 scf sf
+375 4125 m
+gs 1 -1 sc ("An orange") col0 sh gr
+/Helvetica-Bold-iso ff 180.00 scf sf
+750 2985 m
+gs 1 -1 sc () col0 sh gr
+/Helvetica-Bold-iso ff 180.00 scf sf
+1410 2085 m
+gs 1 -1 sc () col0 sh gr
+/Helvetica-Bold-iso ff 180.00 scf sf
+2790 2070 m
+gs 1 -1 sc () col0 sh gr
+/Helvetica-Bold-iso ff 180.00 scf sf
+2100 1200 m
+gs 1 -1 sc () col0 sh gr
+$F2psEnd
+rs
+
+%%EndDocument
+ @endspecial 396 2578 a
+ currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
+neg exch translate
+ 396 2578 a 357 x Fv(Only)g(elements,)g(data)g
+(sections,)g(attrib)n(utes)g(and)g(processing)e(instructions)i(\(and)f
+(comments,)g(if)h(con\002gured\))e(can,)396 3043 y(directly)i(or)g
+(indirectly)-5 b(,)18 b(occur)h(in)h(the)h(document)d(tree.)i(It)g(is)h
+(impossible)f(to)g(add)g(entity)g(references)f(to)h(the)g(tree;)g(if)
+396 3151 y(the)g(parser)g(\002nds)g(such)g(a)h(reference,)d(not)i(the)g
+(reference)f(as)i(such)f(b)n(ut)g(the)g(referenced)e(te)o(xt)i(\(i.e.)g
+(the)g(tree)396 3259 y(representing)e(the)j(structured)d(te)o(xt\))i
+(is)h(included)e(in)h(the)g(tree.)396 3409 y(Note)g(that)h(the)f
+(parser)f(collapses)i(as)g(much)e(data)h(material)g(into)g(one)f(data)h
+(node)f(as)i(possible)f(such)g(that)g(there)g(are)396
+3517 y(normally)f(ne)n(v)o(er)g(tw)o(o)h(adjacent)f(data)i(nodes.)e
+(This)h(in)m(v)n(ariant)f(is)i(enforced)d(e)n(v)o(en)h(if)i(data)f
+(material)f(is)j(included)c(by)396 3625 y(entity)i(references)f(or)h
+(CD)m(A)-9 b(T)h(A)20 b(sections,)g(or)g(if)h(a)f(data)g(sequence)f(is)
+j(interrupted)c(by)h(comments.)g(So)i Fq(a)44 b(&)g(b)396
+3732 y Fo(<)p Fq(-)h(comment)e(-)p Fo(>)i Fq(c)f Fo(<)p
+Fq(![CDATA[)g Fo(<>)g Fq(d]])p Fo(>)20 b Fv(is)h(represented)d(by)i
+(only)g(one)f(data)h(node,)f(for)h(instance.)396 3840
+y(Ho)n(we)n(v)o(er)m(,)e(you)i(can)g(create)g(document)e(trees)i
+(manually)f(which)h(break)f(this)i(in)m(v)n(ariant;)d(it)j(is)g(only)f
+(the)g(w)o(ay)g(the)396 3948 y(parser)g(forms)f(the)h(tree.)p
+Black 3800 5278 a Fr(50)p Black eop
+%%Page: 51 51
+51 50 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fu(Figur)o(e)g(3-2.)f
+(Nodes)h(ar)o(e)g(doubly)g(link)o(ed)i(tr)o(ees)396 1537
+y
+ currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
+ 396 1537 a @beginspecial 0 @llx 0 @lly 138 @urx 93
+@ury 1380 @rwi @setspecial
+%%BeginDocument: pic/node_general.ps
+%!PS-Adobe-2.0 EPSF-2.0
+%%Title: src/pic/node_general.fig
+%%Creator: fig2dev Version 3.2 Patchlevel 1
+%%CreationDate: Sun Aug 27 02:05:42 2000
+%%For: gerd@ice (Gerd Stolpmann)
+%%Orientation: Portrait
+%%BoundingBox: 0 0 138 93
+%%Pages: 0
+%%BeginSetup
+%%EndSetup
+%%Magnification: 0.8000
+%%EndComments
+/$F2psDict 200 dict def
+$F2psDict begin
+$F2psDict /mtrx matrix put
+/col-1 {0 setgray} bind def
+/col0 {0.000 0.000 0.000 srgb} bind def
+/col1 {0.000 0.000 1.000 srgb} bind def
+/col2 {0.000 1.000 0.000 srgb} bind def
+/col3 {0.000 1.000 1.000 srgb} bind def
+/col4 {1.000 0.000 0.000 srgb} bind def
+/col5 {1.000 0.000 1.000 srgb} bind def
+/col6 {1.000 1.000 0.000 srgb} bind def
+/col7 {1.000 1.000 1.000 srgb} bind def
+/col8 {0.000 0.000 0.560 srgb} bind def
+/col9 {0.000 0.000 0.690 srgb} bind def
+/col10 {0.000 0.000 0.820 srgb} bind def
+/col11 {0.530 0.810 1.000 srgb} bind def
+/col12 {0.000 0.560 0.000 srgb} bind def
+/col13 {0.000 0.690 0.000 srgb} bind def
+/col14 {0.000 0.820 0.000 srgb} bind def
+/col15 {0.000 0.560 0.560 srgb} bind def
+/col16 {0.000 0.690 0.690 srgb} bind def
+/col17 {0.000 0.820 0.820 srgb} bind def
+/col18 {0.560 0.000 0.000 srgb} bind def
+/col19 {0.690 0.000 0.000 srgb} bind def
+/col20 {0.820 0.000 0.000 srgb} bind def
+/col21 {0.560 0.000 0.560 srgb} bind def
+/col22 {0.690 0.000 0.690 srgb} bind def
+/col23 {0.820 0.000 0.820 srgb} bind def
+/col24 {0.500 0.190 0.000 srgb} bind def
+/col25 {0.630 0.250 0.000 srgb} bind def
+/col26 {0.750 0.380 0.000 srgb} bind def
+/col27 {1.000 0.500 0.500 srgb} bind def
+/col28 {1.000 0.630 0.630 srgb} bind def
+/col29 {1.000 0.750 0.750 srgb} bind def
+/col30 {1.000 0.880 0.880 srgb} bind def
+/col31 {1.000 0.840 0.000 srgb} bind def
+
+end
+save
+-22.0 126.0 translate
+1 -1 scale
+
+/cp {closepath} bind def
+/ef {eofill} bind def
+/gr {grestore} bind def
+/gs {gsave} bind def
+/sa {save} bind def
+/rs {restore} bind def
+/l {lineto} bind def
+/m {moveto} bind def
+/rm {rmoveto} bind def
+/n {newpath} bind def
+/s {stroke} bind def
+/sh {show} bind def
+/slc {setlinecap} bind def
+/slj {setlinejoin} bind def
+/slw {setlinewidth} bind def
+/srgb {setrgbcolor} bind def
+/rot {rotate} bind def
+/sc {scale} bind def
+/sd {setdash} bind def
+/ff {findfont} bind def
+/sf {setfont} bind def
+/scf {scalefont} bind def
+/sw {stringwidth} bind def
+/tr {translate} bind def
+/tnt {dup dup currentrgbcolor
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
+ bind def
+/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
+ 4 -2 roll mul srgb} bind def
+ /DrawEllipse {
+ /endangle exch def
+ /startangle exch def
+ /yrad exch def
+ /xrad exch def
+ /y exch def
+ /x exch def
+ /savematrix mtrx currentmatrix def
+ x y tr xrad yrad sc 0 0 1 startangle endangle arc
+ closepath
+ savematrix setmatrix
+ } def
+
+/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
+/$F2psEnd {$F2psEnteredState restore end} def
+%%EndProlog
+
+$F2psBegin
+10 setmiterlimit
+n -1000 3487 m -1000 -1000 l 4162 -1000 l 4162 3487 l cp clip
+ 0.05039 0.05039 sc
+7.500 slw
+% Ellipse
+n 2025 2025 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 1350 2025 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2700 2025 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2025 1125 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Polyline
+gs clippath
+1743 1345 m 1845 1275 l 1788 1385 l 1877 1284 l 1832 1244 l cp
+clip
+n 1380 1800 m 1845 1275 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 1743 1345 m 1845 1275 l 1788 1385 l 1765 1365 l 1743 1345 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+1384 1745 m 1282 1815 l 1339 1705 l 1250 1807 l 1295 1846 l cp
+clip
+n 1815 1207 m 1282 1815 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 1384 1745 m 1282 1815 l 1339 1705 l 1361 1725 l 1384 1745 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+2025 1470 m 2055 1350 l 2085 1470 l 2085 1335 l 2025 1335 l cp
+clip
+n 2055 1792 m 2055 1350 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 2025 1470 m 2055 1350 l 2085 1470 l 2055 1470 l 2025 1470 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+2010 1687 m 1980 1807 l 1950 1687 l 1950 1822 l 2010 1822 l cp
+clip
+n 1980 1350 m 1980 1807 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 2010 1687 m 1980 1807 l 1950 1687 l 1980 1687 l 2010 1687 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+2511 1750 m 2550 1867 l 2461 1782 l 2533 1896 l 2583 1864 l cp
+clip
+n 2190 1297 m 2550 1867 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 2511 1750 m 2550 1867 l 2461 1782 l 2486 1766 l 2511 1750 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+2262 1353 m 2220 1237 l 2312 1320 l 2237 1208 l 2187 1241 l cp
+clip
+n 2602 1807 m 2220 1237 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 2262 1353 m 2220 1237 l 2312 1320 l 2287 1337 l 2262 1353 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+n 450 675 m 3150 675 l 3150 2475 l 450 2475 l cp gs col0 s gr
+/Courier ff 150.00 scf sf
+2377 1342 m
+gs 1 -1 sc (parent) col0 sh gr
+/Courier ff 150.00 scf sf
+645 1628 m
+gs 1 -1 sc (sub_nodes) col0 sh gr
+$F2psEnd
+rs
+
+%%EndDocument
+ @endspecial 396 1537 a
+ currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
+neg exch translate
+ 396 1537 a 357 x Fv(The)e(node)f(tree)h(has)h
+(links)f(in)g(both)g(directions:)f(Ev)o(ery)g(node)g(has)h(a)h(link)f
+(to)g(its)i(parent)d(\(if)h(an)o(y\),)f(and)g(it)i(has)g(links)f(to)396
+2002 y(the)g(subnodes)f(\(see)i(\002gure)e Fr(Nodes)h(ar)m(e)h(doubly)d
+(link)o(ed)i(tr)m(ees)p Fv(\).)h(Ob)o(viously)-5 b(,)18
+b(this)i(doubly-link)o(ed)d(structure)396 2110 y(simpli\002es)k(the)f
+(na)n(vigation)e(in)j(the)f(tree;)g(b)n(ut)g(has)h(also)f(some)g
+(consequences)f(for)g(the)h(possible)g(operations)f(on)h(trees.)396
+2259 y(Because)h(e)n(v)o(ery)d(node)i(must)g(ha)n(v)o(e)f(at)i(most)f
+Fr(one)g Fv(parent)f(node,)g(operations)g(are)h(ille)o(gal)g(if)g(the)o
+(y)f(violate)h(this)396 2367 y(condition.)e(The)i(follo)n(wing)f
+(\002gure)g(\()p Fr(A)h(node)g(can)f(only)h(be)g(added)f(if)i(it)g(is)g
+(a)f(r)l(oot)q Fv(\))g(sho)n(ws)h(on)e(the)i(left)f(side)h(that)f(node)
+396 2475 y Fq(y)h Fv(is)g(added)e(to)h Fq(x)h Fv(as)g(ne)n(w)f(subnode)
+e(which)i(is)h(allo)n(wed)f(because)f Fq(y)i Fv(does)f(not)g(ha)n(v)o
+(e)f(a)i(parent)e(yet.)h(The)g(right)f(side)i(of)396
+2583 y(the)f(picture)g(illustrates)g(what)h(w)o(ould)e(happen)g(if)h
+Fq(y)h Fv(had)e(a)i(parent)e(node;)g(this)i(is)g(ille)o(gal)f(because)f
+Fq(y)i Fv(w)o(ould)e(ha)n(v)o(e)h(tw)o(o)396 2691 y(parents)g(after)g
+(the)g(operation.)396 2923 y Fu(Figur)o(e)g(3-3.)f(A)i(node)f(can)g
+(only)g(be)h(added)g(if)f(it)h(is)g(a)f(r)o(oot)396 4165
+y
+ currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
+ 396 4165 a @beginspecial 0 @llx 0 @lly 422 @urx 127
+@ury 4220 @rwi @setspecial
+%%BeginDocument: pic/node_add.ps
+%!PS-Adobe-2.0 EPSF-2.0
+%%Title: src/pic/node_add.fig
+%%Creator: fig2dev Version 3.2 Patchlevel 1
+%%CreationDate: Sun Aug 27 02:05:42 2000
+%%For: gerd@ice (Gerd Stolpmann)
+%%Orientation: Portrait
+%%BoundingBox: 0 0 422 127
+%%Pages: 0
+%%BeginSetup
+%%EndSetup
+%%Magnification: 0.8000
+%%EndComments
+/$F2psDict 200 dict def
+$F2psDict begin
+$F2psDict /mtrx matrix put
+/col-1 {0 setgray} bind def
+/col0 {0.000 0.000 0.000 srgb} bind def
+/col1 {0.000 0.000 1.000 srgb} bind def
+/col2 {0.000 1.000 0.000 srgb} bind def
+/col3 {0.000 1.000 1.000 srgb} bind def
+/col4 {1.000 0.000 0.000 srgb} bind def
+/col5 {1.000 0.000 1.000 srgb} bind def
+/col6 {1.000 1.000 0.000 srgb} bind def
+/col7 {1.000 1.000 1.000 srgb} bind def
+/col8 {0.000 0.000 0.560 srgb} bind def
+/col9 {0.000 0.000 0.690 srgb} bind def
+/col10 {0.000 0.000 0.820 srgb} bind def
+/col11 {0.530 0.810 1.000 srgb} bind def
+/col12 {0.000 0.560 0.000 srgb} bind def
+/col13 {0.000 0.690 0.000 srgb} bind def
+/col14 {0.000 0.820 0.000 srgb} bind def
+/col15 {0.000 0.560 0.560 srgb} bind def
+/col16 {0.000 0.690 0.690 srgb} bind def
+/col17 {0.000 0.820 0.820 srgb} bind def
+/col18 {0.560 0.000 0.000 srgb} bind def
+/col19 {0.690 0.000 0.000 srgb} bind def
+/col20 {0.820 0.000 0.000 srgb} bind def
+/col21 {0.560 0.000 0.560 srgb} bind def
+/col22 {0.690 0.000 0.690 srgb} bind def
+/col23 {0.820 0.000 0.820 srgb} bind def
+/col24 {0.500 0.190 0.000 srgb} bind def
+/col25 {0.630 0.250 0.000 srgb} bind def
+/col26 {0.750 0.380 0.000 srgb} bind def
+/col27 {1.000 0.500 0.500 srgb} bind def
+/col28 {1.000 0.630 0.630 srgb} bind def
+/col29 {1.000 0.750 0.750 srgb} bind def
+/col30 {1.000 0.880 0.880 srgb} bind def
+/col31 {1.000 0.840 0.000 srgb} bind def
+
+end
+save
+-33.0 171.0 translate
+1 -1 scale
+
+/cp {closepath} bind def
+/ef {eofill} bind def
+/gr {grestore} bind def
+/gs {gsave} bind def
+/sa {save} bind def
+/rs {restore} bind def
+/l {lineto} bind def
+/m {moveto} bind def
+/rm {rmoveto} bind def
+/n {newpath} bind def
+/s {stroke} bind def
+/sh {show} bind def
+/slc {setlinecap} bind def
+/slj {setlinejoin} bind def
+/slw {setlinewidth} bind def
+/srgb {setrgbcolor} bind def
+/rot {rotate} bind def
+/sc {scale} bind def
+/sd {setdash} bind def
+/ff {findfont} bind def
+/sf {setfont} bind def
+/scf {scalefont} bind def
+/sw {stringwidth} bind def
+/tr {translate} bind def
+/tnt {dup dup currentrgbcolor
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
+ bind def
+/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
+ 4 -2 roll mul srgb} bind def
+ /DrawEllipse {
+ /endangle exch def
+ /startangle exch def
+ /yrad exch def
+ /xrad exch def
+ /y exch def
+ /x exch def
+ /savematrix mtrx currentmatrix def
+ x y tr xrad yrad sc 0 0 1 startangle endangle arc
+ closepath
+ savematrix setmatrix
+ } def
+
+/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
+/$F2psEnd {$F2psEnteredState restore end} def
+%%EndProlog
+
+$F2psBegin
+10 setmiterlimit
+n -1000 4387 m -1000 -1000 l 10012 -1000 l 10012 4387 l cp clip
+ 0.05039 0.05039 sc
+7.500 slw
+% Ellipse
+n 6141 1350 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 6141 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 5426 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 6856 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 7571 2925 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 8524 2925 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 8047 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 1866 1350 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 1866 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 1151 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2581 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3296 2925 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 4249 2925 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3772 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 8325 1350 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Polyline
+gs clippath
+5507 1945 m 5402 2017 l 5460 1904 l 5369 2008 l 5415 2049 l cp
+clip
+n 5910 1440 m 5402 2017 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 5507 1945 m 5402 2017 l 5460 1904 l 5484 1924 l 5507 1945 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+6134 1902 m 6101 2025 l 6072 1901 l 6070 2039 l 6132 2041 l cp
+clip
+n 6109 1590 m 6101 2025 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 6134 1902 m 6101 2025 l 6072 1901 l 6103 1901 l 6134 1902 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+6649 1952 m 6697 2070 l 6599 1989 l 6681 2100 l 6731 2064 l cp
+clip
+n 6307 1537 m 6697 2070 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 6649 1952 m 6697 2070 l 6599 1989 l 6624 1970 l 6649 1952 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+7696 2606 m 7602 2692 l 7645 2572 l 7568 2687 l 7619 2722 l cp
+clip
+n 7832 2347 m 7602 2692 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 7696 2606 m 7602 2692 l 7645 2572 l 7671 2589 l 7696 2606 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+8306 2632 m 8349 2752 l 8255 2666 l 8332 2782 l 8383 2747 l cp
+clip
+n 8150 2452 m 8349 2752 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 8306 2632 m 8349 2752 l 8255 2666 l 8281 2649 l 8306 2632 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+5853 1564 m 5958 1492 l 5899 1605 l 5991 1501 l 5945 1460 l cp
+clip
+n 5490 2017 m 5958 1492 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 5853 1564 m 5958 1492 l 5899 1605 l 5876 1584 l 5853 1564 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+6140 1698 m 6173 1575 l 6201 1699 l 6204 1561 l 6142 1559 l cp
+clip
+n 6164 2010 m 6173 1575 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 6140 1698 m 6173 1575 l 6201 1699 l 6170 1699 l 6140 1698 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+6404 1588 m 6355 1470 l 6454 1551 l 6371 1440 l 6321 1476 l cp
+clip
+n 6768 2025 m 6355 1470 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 6404 1588 m 6355 1470 l 6454 1551 l 6429 1569 l 6404 1588 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+7784 2499 m 7880 2415 l 7835 2534 l 7914 2420 l 7863 2385 l cp
+clip
+n 7673 2715 m 7880 2415 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 7784 2499 m 7880 2415 l 7835 2534 l 7810 2517 l 7784 2499 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+8263 2535 m 8222 2415 l 8315 2502 l 8240 2386 l 8188 2419 l cp
+clip
+n 8412 2707 m 8222 2415 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 8263 2535 m 8222 2415 l 8315 2502 l 8289 2519 l 8263 2535 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+1232 1945 m 1127 2017 l 1185 1904 l 1094 2008 l 1140 2049 l cp
+clip
+n 1635 1440 m 1127 2017 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 1232 1945 m 1127 2017 l 1185 1904 l 1209 1924 l 1232 1945 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+1859 1902 m 1826 2025 l 1797 1901 l 1795 2039 l 1857 2041 l cp
+clip
+n 1834 1590 m 1826 2025 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 1859 1902 m 1826 2025 l 1797 1901 l 1828 1902 l 1859 1902 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+2374 1952 m 2422 2070 l 2324 1989 l 2406 2100 l 2456 2064 l cp
+clip
+n 2032 1537 m 2422 2070 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 2374 1952 m 2422 2070 l 2324 1989 l 2349 1970 l 2374 1952 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+3421 2606 m 3327 2692 l 3370 2572 l 3293 2687 l 3344 2722 l cp
+clip
+n 3557 2347 m 3327 2692 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 3421 2606 m 3327 2692 l 3370 2572 l 3396 2589 l 3421 2606 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+4031 2632 m 4074 2752 l 3980 2666 l 4057 2782 l 4108 2747 l cp
+clip
+n 3875 2452 m 4074 2752 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 4031 2632 m 4074 2752 l 3980 2666 l 4006 2649 l 4031 2632 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+1578 1564 m 1683 1492 l 1624 1605 l 1716 1501 l 1670 1460 l cp
+clip
+n 1215 2017 m 1683 1492 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 1578 1564 m 1683 1492 l 1624 1605 l 1601 1584 l 1578 1564 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+1865 1698 m 1898 1575 l 1926 1699 l 1929 1561 l 1867 1559 l cp
+clip
+n 1889 2010 m 1898 1575 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 1865 1698 m 1898 1575 l 1926 1699 l 1895 1698 l 1865 1698 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+2129 1588 m 2080 1470 l 2179 1551 l 2096 1440 l 2046 1476 l cp
+clip
+n 2493 2025 m 2080 1470 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 2129 1588 m 2080 1470 l 2179 1551 l 2154 1569 l 2129 1588 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+3509 2499 m 3605 2415 l 3560 2534 l 3639 2420 l 3588 2385 l cp
+clip
+n 3398 2715 m 3605 2415 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 3509 2499 m 3605 2415 l 3560 2534 l 3535 2517 l 3509 2499 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+3988 2535 m 3947 2415 l 4040 2502 l 3965 2386 l 3913 2419 l cp
+clip
+n 4137 2707 m 3947 2415 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 3988 2535 m 3947 2415 l 4040 2502 l 4014 2519 l 3988 2535 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+ [60] 0 sd
+n 6387 1372 m 8023 2017 l gs col7 0.75 shd ef gr gs col0 s gr [] 0 sd
+% Polyline
+n 4950 900 m 9000 900 l 9000 3375 l 4950 3375 l cp gs col0 s gr
+% Polyline
+ [60] 0 sd
+n 2112 1372 m 3748 2017 l gs col7 0.75 shd ef gr gs col0 s gr [] 0 sd
+% Polyline
+n 675 900 m 4725 900 l 4725 3375 l 675 3375 l cp gs col0 s gr
+% Polyline
+gs clippath
+8119 1904 m 8055 2010 l 8061 1886 l 8022 2016 l 8079 2033 l cp
+clip
+n 8197 1545 m 8055 2010 l gs col0 s gr gr
+
+% arrowhead
+n 8119 1904 m 8055 2010 l 8061 1886 l 8090 1895 l 8119 1904 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+8214 1695 m 8280 1590 l 8271 1713 l 8313 1585 l 8256 1566 l cp
+clip
+n 8137 2025 m 8280 1590 l gs col0 s gr gr
+
+% arrowhead
+n 8214 1695 m 8280 1590 l 8271 1713 l 8243 1704 l 8214 1695 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+30.000 slw
+gs clippath
+7687 2205 m 7502 2333 l 7594 2129 l 7410 2351 l 7503 2428 l cp
+clip
+n 7875 1500 m 7620 1965 l 7845 1920 l 7485 2355 l gs col0 s gr gr
+
+% arrowhead
+15.000 slw
+n 7687 2205 m 7502 2333 l 7594 2129 l 7618 2195 l 7687 2205 l cp gs 0.00 setgray ef gr col0 s
+/Courier-Bold ff 195.00 scf sf
+6094 1379 m
+gs 1 -1 sc (x) col0 sh gr
+/Courier-Bold ff 195.00 scf sf
+7991 2265 m
+gs 1 -1 sc (y) col0 sh gr
+/Courier-Bold ff 195.00 scf sf
+1819 1379 m
+gs 1 -1 sc (x) col0 sh gr
+/Courier-Bold ff 195.00 scf sf
+3716 2265 m
+gs 1 -1 sc (y) col0 sh gr
+/Courier ff 180.00 scf sf
+6459 1335 m
+gs 1 -1 sc (x # add_node y) col0 sh gr
+/Courier ff 180.00 scf sf
+2214 1365 m
+gs 1 -1 sc (x # add_node y) col0 sh gr
+$F2psEnd
+rs
+
+%%EndDocument
+ @endspecial 396 4165 a
+ currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
+neg exch translate
+ 396 4165 a 357 x Fv(The)g("delete")g(operation)
+e(simply)i(remo)o(v)o(es)f(the)h(links)g(between)f(tw)o(o)i(nodes.)e
+(In)h(the)g(picture)f(\()p Fr(A)i(deleted)e(node)396
+4629 y(becomes)h(the)g(r)l(oot)g(of)h(the)f(subtr)m(ee)p
+Fv(\))g(the)g(node)f Fq(x)i Fv(is)g(deleted)e(from)h(the)g(list)h(of)f
+(subnodes)f(of)h Fq(y)p Fv(.)g(After)g(that,)g Fq(x)396
+4737 y Fv(becomes)g(the)g(root)f(of)h(the)g(subtree)g(starting)g(at)g
+(this)h(node.)p Black 3800 5278 a Fr(51)p Black eop
+%%Page: 52 52
+52 51 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fu(Figur)o(e)g(3-4.)f(A)i
+(deleted)f(node)g(becomes)h(the)f(r)o(oot)f(of)h(the)g(subtr)o(ee)396
+1912 y
+ currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
+ 396 1912 a @beginspecial 0 @llx 0 @lly 388 @urx
+138 @ury 3880 @rwi @setspecial
+%%BeginDocument: pic/node_delete.ps
+%!PS-Adobe-2.0 EPSF-2.0
+%%Title: src/pic/node_delete.fig
+%%Creator: fig2dev Version 3.2 Patchlevel 1
+%%CreationDate: Sun Aug 27 02:05:42 2000
+%%For: gerd@ice (Gerd Stolpmann)
+%%Orientation: Portrait
+%%BoundingBox: 0 0 388 138
+%%Pages: 0
+%%BeginSetup
+%%EndSetup
+%%Magnification: 0.8000
+%%EndComments
+/$F2psDict 200 dict def
+$F2psDict begin
+$F2psDict /mtrx matrix put
+/col-1 {0 setgray} bind def
+/col0 {0.000 0.000 0.000 srgb} bind def
+/col1 {0.000 0.000 1.000 srgb} bind def
+/col2 {0.000 1.000 0.000 srgb} bind def
+/col3 {0.000 1.000 1.000 srgb} bind def
+/col4 {1.000 0.000 0.000 srgb} bind def
+/col5 {1.000 0.000 1.000 srgb} bind def
+/col6 {1.000 1.000 0.000 srgb} bind def
+/col7 {1.000 1.000 1.000 srgb} bind def
+/col8 {0.000 0.000 0.560 srgb} bind def
+/col9 {0.000 0.000 0.690 srgb} bind def
+/col10 {0.000 0.000 0.820 srgb} bind def
+/col11 {0.530 0.810 1.000 srgb} bind def
+/col12 {0.000 0.560 0.000 srgb} bind def
+/col13 {0.000 0.690 0.000 srgb} bind def
+/col14 {0.000 0.820 0.000 srgb} bind def
+/col15 {0.000 0.560 0.560 srgb} bind def
+/col16 {0.000 0.690 0.690 srgb} bind def
+/col17 {0.000 0.820 0.820 srgb} bind def
+/col18 {0.560 0.000 0.000 srgb} bind def
+/col19 {0.690 0.000 0.000 srgb} bind def
+/col20 {0.820 0.000 0.000 srgb} bind def
+/col21 {0.560 0.000 0.560 srgb} bind def
+/col22 {0.690 0.000 0.690 srgb} bind def
+/col23 {0.820 0.000 0.820 srgb} bind def
+/col24 {0.500 0.190 0.000 srgb} bind def
+/col25 {0.630 0.250 0.000 srgb} bind def
+/col26 {0.750 0.380 0.000 srgb} bind def
+/col27 {1.000 0.500 0.500 srgb} bind def
+/col28 {1.000 0.630 0.630 srgb} bind def
+/col29 {1.000 0.750 0.750 srgb} bind def
+/col30 {1.000 0.880 0.880 srgb} bind def
+/col31 {1.000 0.840 0.000 srgb} bind def
+
+end
+save
+-78.0 205.0 translate
+1 -1 scale
+
+/cp {closepath} bind def
+/ef {eofill} bind def
+/gr {grestore} bind def
+/gs {gsave} bind def
+/sa {save} bind def
+/rs {restore} bind def
+/l {lineto} bind def
+/m {moveto} bind def
+/rm {rmoveto} bind def
+/n {newpath} bind def
+/s {stroke} bind def
+/sh {show} bind def
+/slc {setlinecap} bind def
+/slj {setlinejoin} bind def
+/slw {setlinewidth} bind def
+/srgb {setrgbcolor} bind def
+/rot {rotate} bind def
+/sc {scale} bind def
+/sd {setdash} bind def
+/ff {findfont} bind def
+/sf {setfont} bind def
+/scf {scalefont} bind def
+/sw {stringwidth} bind def
+/tr {translate} bind def
+/tnt {dup dup currentrgbcolor
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
+ bind def
+/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
+ 4 -2 roll mul srgb} bind def
+ /DrawEllipse {
+ /endangle exch def
+ /startangle exch def
+ /yrad exch def
+ /xrad exch def
+ /y exch def
+ /x exch def
+ /savematrix mtrx currentmatrix def
+ x y tr xrad yrad sc 0 0 1 startangle endangle arc
+ closepath
+ savematrix setmatrix
+ } def
+
+/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
+/$F2psEnd {$F2psEnteredState restore end} def
+%%EndProlog
+
+$F2psBegin
+10 setmiterlimit
+n -1000 5062 m -1000 -1000 l 10237 -1000 l 10237 5062 l cp clip
+ 0.05039 0.05039 sc
+7.500 slw
+% Ellipse
+n 2700 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2250 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3150 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Polyline
+gs clippath
+2322 3272 m 2235 3360 l 2271 3242 l 2202 3358 l 2253 3388 l cp
+clip
+n 2535 2857 m 2235 3360 l gs col0 s gr gr
+
+% arrowhead
+n 2322 3272 m 2235 3360 l 2271 3242 l 2296 3257 l 2322 3272 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+2978 3298 m 3000 3420 l 2924 3323 l 2979 3446 l 3034 3421 l cp
+clip
+n 2782 2932 m 3000 3420 l gs col0 s gr gr
+
+% arrowhead
+n 2978 3298 m 3000 3420 l 2924 3323 l 2951 3310 l 2978 3298 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+2500 2998 m 2587 2910 l 2552 3029 l 2620 2912 l 2569 2882 l cp
+clip
+n 2317 3367 m 2587 2910 l gs col0 s gr gr
+
+% arrowhead
+n 2500 2998 m 2587 2910 l 2552 3029 l 2526 3013 l 2500 2998 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+2864 3009 m 2842 2887 l 2918 2984 l 2863 2861 l 2808 2886 l cp
+clip
+n 3060 3375 m 2842 2887 l gs col0 s gr gr
+
+% arrowhead
+n 2864 3009 m 2842 2887 l 2918 2984 l 2891 2997 l 2864 3009 l cp gs col7 1.00 shd ef gr col0 s
+% Ellipse
+n 2700 1800 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2025 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3375 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 6345 1800 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 5670 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 7020 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 8325 1800 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 7875 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 8775 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Polyline
+gs clippath
+2707 2152 m 2737 2032 l 2767 2152 l 2767 2017 l 2707 2017 l cp
+clip
+n 2737 2460 m 2737 2032 l gs col0 s gr gr
+
+% arrowhead
+n 2707 2152 m 2737 2032 l 2767 2152 l 2737 2152 l 2707 2152 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+2692 2347 m 2662 2467 l 2632 2347 l 2632 2482 l 2692 2482 l cp
+clip
+n 2662 2032 m 2662 2467 l gs col0 s gr gr
+
+% arrowhead
+n 2692 2347 m 2662 2467 l 2632 2347 l 2662 2347 l 2692 2347 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+1 slj
+60.000 slw
+n 4050 2610 m 4725 2610 l gs col0 s gr
+% Polyline
+n 4050 2745 m 4725 2745 l gs col0 s gr
+% Polyline
+1 slc
+n 4500 2385 m 4950 2655 l 4500 2970 l gs col0 s gr
+% Polyline
+0 slj
+0 slc
+7.500 slw
+gs clippath
+2125 2394 m 2025 2467 l 2078 2355 l 1992 2459 l 2039 2498 l cp
+clip
+n 2490 1905 m 2025 2467 l gs col0 s gr gr
+
+% arrowhead
+n 2125 2394 m 2025 2467 l 2078 2355 l 2101 2375 l 2125 2394 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+3158 2426 m 3202 2542 l 3109 2461 l 3186 2571 l 3235 2537 l cp
+clip
+n 2827 2002 m 3202 2542 l gs col0 s gr gr
+
+% arrowhead
+n 3158 2426 m 3202 2542 l 3109 2461 l 3134 2443 l 3158 2426 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+2436 2039 m 2535 1965 l 2482 2077 l 2568 1972 l 2521 1934 l cp
+clip
+n 2115 2475 m 2535 1965 l gs col0 s gr gr
+
+% arrowhead
+n 2436 2039 m 2535 1965 l 2482 2077 l 2459 2058 l 2436 2039 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+2916 2073 m 2872 1957 l 2965 2038 l 2888 1928 l 2839 1962 l cp
+clip
+n 3255 2505 m 2872 1957 l gs col0 s gr gr
+
+% arrowhead
+n 2916 2073 m 2872 1957 l 2965 2038 l 2941 2055 l 2916 2073 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+5770 2394 m 5670 2467 l 5723 2355 l 5637 2459 l 5684 2498 l cp
+clip
+n 6135 1905 m 5670 2467 l gs col0 s gr gr
+
+% arrowhead
+n 5770 2394 m 5670 2467 l 5723 2355 l 5746 2375 l 5770 2394 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+6803 2426 m 6847 2542 l 6754 2461 l 6831 2571 l 6880 2537 l cp
+clip
+n 6472 2002 m 6847 2542 l gs col0 s gr gr
+
+% arrowhead
+n 6803 2426 m 6847 2542 l 6754 2461 l 6779 2443 l 6803 2426 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+6081 2039 m 6180 1965 l 6127 2077 l 6213 1972 l 6166 1934 l cp
+clip
+n 5760 2475 m 6180 1965 l gs col0 s gr gr
+
+% arrowhead
+n 6081 2039 m 6180 1965 l 6127 2077 l 6104 2058 l 6081 2039 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+6561 2073 m 6517 1957 l 6610 2038 l 6533 1928 l 6484 1962 l cp
+clip
+n 6900 2505 m 6517 1957 l gs col0 s gr gr
+
+% arrowhead
+n 6561 2073 m 6517 1957 l 6610 2038 l 6586 2055 l 6561 2073 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+7947 2372 m 7860 2460 l 7896 2342 l 7827 2458 l 7878 2488 l cp
+clip
+n 8160 1957 m 7860 2460 l gs col0 s gr gr
+
+% arrowhead
+n 7947 2372 m 7860 2460 l 7896 2342 l 7921 2357 l 7947 2372 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+8603 2398 m 8625 2520 l 8549 2423 l 8604 2546 l 8659 2521 l cp
+clip
+n 8407 2032 m 8625 2520 l gs col0 s gr gr
+
+% arrowhead
+n 8603 2398 m 8625 2520 l 8549 2423 l 8576 2410 l 8603 2398 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+8125 2098 m 8212 2010 l 8177 2129 l 8245 2012 l 8194 1982 l cp
+clip
+n 7942 2467 m 8212 2010 l gs col0 s gr gr
+
+% arrowhead
+n 8125 2098 m 8212 2010 l 8177 2129 l 8151 2113 l 8125 2098 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+8489 2109 m 8467 1987 l 8543 2084 l 8488 1961 l 8433 1986 l cp
+clip
+n 8685 2475 m 8467 1987 l gs col0 s gr gr
+
+% arrowhead
+n 8489 2109 m 8467 1987 l 8543 2084 l 8516 2097 l 8489 2109 l cp gs col7 1.00 shd ef gr col0 s
+/Courier ff 180.00 scf sf
+3960 2250 m
+gs 1 -1 sc (x # delete) col0 sh gr
+% Polyline
+1 slj
+1 slc
+45.000 slw
+n 2595 2362 m 2820 2137 l gs col0 s gr
+% Polyline
+n 2595 2137 m 2820 2362 l gs col0 s gr
+% Polyline
+0 slj
+0 slc
+7.500 slw
+n 1575 1350 m 9225 1350 l 9225 4050 l 1575 4050 l cp gs col0 s gr
+/Courier-Bold ff 180.00 scf sf
+2640 2752 m
+gs 1 -1 sc (x) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+8280 1845 m
+gs 1 -1 sc (x) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+2655 1845 m
+gs 1 -1 sc (y) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+6300 1845 m
+gs 1 -1 sc (y) col0 sh gr
+$F2psEnd
+rs
+
+%%EndDocument
+ @endspecial 396 1912 a
+ currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
+neg exch translate
+ 396 1912 a 357 x Fv(It)g(is)h(also)e(possible)h
+(to)f(mak)o(e)h(a)g(clone)e(of)i(a)g(subtree;)f(illustrated)g(in)h
+Fr(The)f(clone)g(of)h(a)f(subtr)m(ee)p Fv(.)h(In)f(this)h(case,)g(the)f
+(clone)396 2377 y(is)i(a)g(cop)o(y)e(of)h(the)g(original)f(subtree)h(e)
+o(xcept)f(that)h(it)h(is)h(no)d(longer)g(a)i(subnode.)d(Because)i
+(cloning)f(ne)n(v)o(er)g(k)o(eeps)h(the)396 2485 y(connection)e(to)j
+(the)f(parent,)f(the)h(clones)g(are)g(called)g Fr(orphaned)r
+Fv(.)396 2717 y Fu(Figur)o(e)g(3-5.)f(The)i(clone)f(of)g(a)g(subtr)o
+(ee)396 4050 y
+ currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
+ 396 4050 a @beginspecial 0 @llx 0 @lly
+388 @urx 138 @ury 3880 @rwi @setspecial
+%%BeginDocument: pic/node_clone.ps
+%!PS-Adobe-2.0 EPSF-2.0
+%%Title: src/pic/node_clone.fig
+%%Creator: fig2dev Version 3.2 Patchlevel 1
+%%CreationDate: Sun Aug 27 02:05:42 2000
+%%For: gerd@ice (Gerd Stolpmann)
+%%Orientation: Portrait
+%%BoundingBox: 0 0 388 138
+%%Pages: 0
+%%BeginSetup
+%%EndSetup
+%%Magnification: 0.8000
+%%EndComments
+/$F2psDict 200 dict def
+$F2psDict begin
+$F2psDict /mtrx matrix put
+/col-1 {0 setgray} bind def
+/col0 {0.000 0.000 0.000 srgb} bind def
+/col1 {0.000 0.000 1.000 srgb} bind def
+/col2 {0.000 1.000 0.000 srgb} bind def
+/col3 {0.000 1.000 1.000 srgb} bind def
+/col4 {1.000 0.000 0.000 srgb} bind def
+/col5 {1.000 0.000 1.000 srgb} bind def
+/col6 {1.000 1.000 0.000 srgb} bind def
+/col7 {1.000 1.000 1.000 srgb} bind def
+/col8 {0.000 0.000 0.560 srgb} bind def
+/col9 {0.000 0.000 0.690 srgb} bind def
+/col10 {0.000 0.000 0.820 srgb} bind def
+/col11 {0.530 0.810 1.000 srgb} bind def
+/col12 {0.000 0.560 0.000 srgb} bind def
+/col13 {0.000 0.690 0.000 srgb} bind def
+/col14 {0.000 0.820 0.000 srgb} bind def
+/col15 {0.000 0.560 0.560 srgb} bind def
+/col16 {0.000 0.690 0.690 srgb} bind def
+/col17 {0.000 0.820 0.820 srgb} bind def
+/col18 {0.560 0.000 0.000 srgb} bind def
+/col19 {0.690 0.000 0.000 srgb} bind def
+/col20 {0.820 0.000 0.000 srgb} bind def
+/col21 {0.560 0.000 0.560 srgb} bind def
+/col22 {0.690 0.000 0.690 srgb} bind def
+/col23 {0.820 0.000 0.820 srgb} bind def
+/col24 {0.500 0.190 0.000 srgb} bind def
+/col25 {0.630 0.250 0.000 srgb} bind def
+/col26 {0.750 0.380 0.000 srgb} bind def
+/col27 {1.000 0.500 0.500 srgb} bind def
+/col28 {1.000 0.630 0.630 srgb} bind def
+/col29 {1.000 0.750 0.750 srgb} bind def
+/col30 {1.000 0.880 0.880 srgb} bind def
+/col31 {1.000 0.840 0.000 srgb} bind def
+
+end
+save
+-78.0 205.0 translate
+1 -1 scale
+
+/cp {closepath} bind def
+/ef {eofill} bind def
+/gr {grestore} bind def
+/gs {gsave} bind def
+/sa {save} bind def
+/rs {restore} bind def
+/l {lineto} bind def
+/m {moveto} bind def
+/rm {rmoveto} bind def
+/n {newpath} bind def
+/s {stroke} bind def
+/sh {show} bind def
+/slc {setlinecap} bind def
+/slj {setlinejoin} bind def
+/slw {setlinewidth} bind def
+/srgb {setrgbcolor} bind def
+/rot {rotate} bind def
+/sc {scale} bind def
+/sd {setdash} bind def
+/ff {findfont} bind def
+/sf {setfont} bind def
+/scf {scalefont} bind def
+/sw {stringwidth} bind def
+/tr {translate} bind def
+/tnt {dup dup currentrgbcolor
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
+ bind def
+/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
+ 4 -2 roll mul srgb} bind def
+ /DrawEllipse {
+ /endangle exch def
+ /startangle exch def
+ /yrad exch def
+ /xrad exch def
+ /y exch def
+ /x exch def
+ /savematrix mtrx currentmatrix def
+ x y tr xrad yrad sc 0 0 1 startangle endangle arc
+ closepath
+ savematrix setmatrix
+ } def
+
+/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
+/$F2psEnd {$F2psEnteredState restore end} def
+%%EndProlog
+
+$F2psBegin
+10 setmiterlimit
+n -1000 5062 m -1000 -1000 l 10237 -1000 l 10237 5062 l cp clip
+ 0.05039 0.05039 sc
+7.500 slw
+% Ellipse
+n 2700 1800 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2025 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3375 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 6345 1800 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 5670 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 7020 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 8325 1800 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 7875 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 8775 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 6345 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 5895 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 6795 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2700 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2250 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3150 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Polyline
+1 slj
+60.000 slw
+n 4050 2610 m 4725 2610 l gs col0 s gr
+% Polyline
+n 4050 2745 m 4725 2745 l gs col0 s gr
+% Polyline
+1 slc
+n 4500 2385 m 4950 2655 l 4500 2970 l gs col0 s gr
+% Polyline
+0 slj
+0 slc
+7.500 slw
+gs clippath
+2125 2394 m 2025 2467 l 2078 2355 l 1992 2459 l 2039 2498 l cp
+clip
+n 2490 1905 m 2025 2467 l gs col0 s gr gr
+
+% arrowhead
+n 2125 2394 m 2025 2467 l 2078 2355 l 2101 2375 l 2125 2394 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+3158 2426 m 3202 2542 l 3109 2461 l 3186 2571 l 3235 2537 l cp
+clip
+n 2827 2002 m 3202 2542 l gs col0 s gr gr
+
+% arrowhead
+n 3158 2426 m 3202 2542 l 3109 2461 l 3134 2443 l 3158 2426 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+2436 2039 m 2535 1965 l 2482 2077 l 2568 1972 l 2521 1934 l cp
+clip
+n 2115 2475 m 2535 1965 l gs col0 s gr gr
+
+% arrowhead
+n 2436 2039 m 2535 1965 l 2482 2077 l 2459 2058 l 2436 2039 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+2916 2073 m 2872 1957 l 2965 2038 l 2888 1928 l 2839 1962 l cp
+clip
+n 3255 2505 m 2872 1957 l gs col0 s gr gr
+
+% arrowhead
+n 2916 2073 m 2872 1957 l 2965 2038 l 2941 2055 l 2916 2073 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+5770 2394 m 5670 2467 l 5723 2355 l 5637 2459 l 5684 2498 l cp
+clip
+n 6135 1905 m 5670 2467 l gs col0 s gr gr
+
+% arrowhead
+n 5770 2394 m 5670 2467 l 5723 2355 l 5746 2375 l 5770 2394 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+6803 2426 m 6847 2542 l 6754 2461 l 6831 2571 l 6880 2537 l cp
+clip
+n 6472 2002 m 6847 2542 l gs col0 s gr gr
+
+% arrowhead
+n 6803 2426 m 6847 2542 l 6754 2461 l 6779 2443 l 6803 2426 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+6081 2039 m 6180 1965 l 6127 2077 l 6213 1972 l 6166 1934 l cp
+clip
+n 5760 2475 m 6180 1965 l gs col0 s gr gr
+
+% arrowhead
+n 6081 2039 m 6180 1965 l 6127 2077 l 6104 2058 l 6081 2039 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+6561 2073 m 6517 1957 l 6610 2038 l 6533 1928 l 6484 1962 l cp
+clip
+n 6900 2505 m 6517 1957 l gs col0 s gr gr
+
+% arrowhead
+n 6561 2073 m 6517 1957 l 6610 2038 l 6586 2055 l 6561 2073 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+7947 2372 m 7860 2460 l 7896 2342 l 7827 2458 l 7878 2488 l cp
+clip
+n 8160 1957 m 7860 2460 l gs col0 s gr gr
+
+% arrowhead
+n 7947 2372 m 7860 2460 l 7896 2342 l 7921 2357 l 7947 2372 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+8603 2398 m 8625 2520 l 8549 2423 l 8604 2546 l 8659 2521 l cp
+clip
+n 8407 2032 m 8625 2520 l gs col0 s gr gr
+
+% arrowhead
+n 8603 2398 m 8625 2520 l 8549 2423 l 8576 2410 l 8603 2398 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+8125 2098 m 8212 2010 l 8177 2129 l 8245 2012 l 8194 1982 l cp
+clip
+n 7942 2467 m 8212 2010 l gs col0 s gr gr
+
+% arrowhead
+n 8125 2098 m 8212 2010 l 8177 2129 l 8151 2113 l 8125 2098 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+8489 2109 m 8467 1987 l 8543 2084 l 8488 1961 l 8433 1986 l cp
+clip
+n 8685 2475 m 8467 1987 l gs col0 s gr gr
+
+% arrowhead
+n 8489 2109 m 8467 1987 l 8543 2084 l 8516 2097 l 8489 2109 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+6352 2152 m 6382 2032 l 6412 2152 l 6412 2017 l 6352 2017 l cp
+clip
+n 6382 2460 m 6382 2032 l gs col0 s gr gr
+
+% arrowhead
+n 6352 2152 m 6382 2032 l 6412 2152 l 6382 2152 l 6352 2152 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+6337 2347 m 6307 2467 l 6277 2347 l 6277 2482 l 6337 2482 l cp
+clip
+n 6307 2032 m 6307 2467 l gs col0 s gr gr
+
+% arrowhead
+n 6337 2347 m 6307 2467 l 6277 2347 l 6307 2347 l 6337 2347 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+5967 3272 m 5880 3360 l 5916 3242 l 5847 3358 l 5898 3388 l cp
+clip
+n 6180 2857 m 5880 3360 l gs col0 s gr gr
+
+% arrowhead
+n 5967 3272 m 5880 3360 l 5916 3242 l 5941 3257 l 5967 3272 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+6623 3298 m 6645 3420 l 6569 3323 l 6624 3446 l 6679 3421 l cp
+clip
+n 6427 2932 m 6645 3420 l gs col0 s gr gr
+
+% arrowhead
+n 6623 3298 m 6645 3420 l 6569 3323 l 6596 3310 l 6623 3298 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+6145 2998 m 6232 2910 l 6197 3029 l 6265 2912 l 6214 2882 l cp
+clip
+n 5962 3367 m 6232 2910 l gs col0 s gr gr
+
+% arrowhead
+n 6145 2998 m 6232 2910 l 6197 3029 l 6171 3013 l 6145 2998 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+6509 3009 m 6487 2887 l 6563 2984 l 6508 2861 l 6453 2886 l cp
+clip
+n 6705 3375 m 6487 2887 l gs col0 s gr gr
+
+% arrowhead
+n 6509 3009 m 6487 2887 l 6563 2984 l 6536 2997 l 6509 3009 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+2707 2152 m 2737 2032 l 2767 2152 l 2767 2017 l 2707 2017 l cp
+clip
+n 2737 2460 m 2737 2032 l gs col0 s gr gr
+
+% arrowhead
+n 2707 2152 m 2737 2032 l 2767 2152 l 2737 2152 l 2707 2152 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+2692 2347 m 2662 2467 l 2632 2347 l 2632 2482 l 2692 2482 l cp
+clip
+n 2662 2032 m 2662 2467 l gs col0 s gr gr
+
+% arrowhead
+n 2692 2347 m 2662 2467 l 2632 2347 l 2662 2347 l 2692 2347 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+2322 3272 m 2235 3360 l 2271 3242 l 2202 3358 l 2253 3388 l cp
+clip
+n 2535 2857 m 2235 3360 l gs col0 s gr gr
+
+% arrowhead
+n 2322 3272 m 2235 3360 l 2271 3242 l 2296 3257 l 2322 3272 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+2978 3298 m 3000 3420 l 2924 3323 l 2979 3446 l 3034 3421 l cp
+clip
+n 2782 2932 m 3000 3420 l gs col0 s gr gr
+
+% arrowhead
+n 2978 3298 m 3000 3420 l 2924 3323 l 2951 3310 l 2978 3298 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+2500 2998 m 2587 2910 l 2552 3029 l 2620 2912 l 2569 2882 l cp
+clip
+n 2317 3367 m 2587 2910 l gs col0 s gr gr
+
+% arrowhead
+n 2500 2998 m 2587 2910 l 2552 3029 l 2526 3013 l 2500 2998 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+2864 3009 m 2842 2887 l 2918 2984 l 2863 2861 l 2808 2886 l cp
+clip
+n 3060 3375 m 2842 2887 l gs col0 s gr gr
+
+% arrowhead
+n 2864 3009 m 2842 2887 l 2918 2984 l 2891 2997 l 2864 3009 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+n 1575 1350 m 9225 1350 l 9225 4050 l 1575 4050 l cp gs col0 s gr
+/Courier-Bold ff 180.00 scf sf
+2655 1845 m
+gs 1 -1 sc (y) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+6300 1845 m
+gs 1 -1 sc (y) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+6285 2752 m
+gs 1 -1 sc (x) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+2640 2752 m
+gs 1 -1 sc (x) col0 sh gr
+/Courier ff 180.00 scf sf
+3690 2025 m
+gs 1 -1 sc (let x' =) col0 sh gr
+/Courier ff 180.00 scf sf
+3690 2205 m
+gs 1 -1 sc (x # orphaned_clone) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+8235 1845 m
+gs 1 -1 sc (x') col0 sh gr
+$F2psEnd
+rs
+
+%%EndDocument
+ @endspecial 396 4050 a
+ currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
+neg exch translate
+ 396 4050 a -2 4627 a Fp(3.2.2.)35
+b(The)f(methods)g(of)f(the)h(c)n(lass)h(type)f Fc(node)p
+Black 3800 5278 a Fr(52)p Black eop
+%%Page: 53 53
+53 52 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fu(General)g(obser)o(v)o
+(ers)g(.)p Black 396 866 a Ft(\225)p Black 60 w Fq(extension)p
+Fv(:)g(The)f(reference)g(to)h(the)h(e)o(xtension)d(object)i(which)g
+(belongs)f(to)h(this)h(node)e(\(see)h(...\).)p Black
+396 974 a Ft(\225)p Black 60 w Fq(dtd)p Fv(:)h(Returns)f(a)g(reference)
+f(to)h(the)g(global)g(DTD.)g(All)h(nodes)e(of)h(a)h(tree)f(must)g
+(share)g(the)g(same)h(DTD.)p Black 396 1082 a Ft(\225)p
+Black 60 w Fq(parent)p Fv(:)f(Get)h(the)f(f)o(ather)f(node.)g(Raises)j
+Fq(Not_found)d Fv(in)i(the)f(case)g(the)h(node)e(does)h(not)f(ha)n(v)o
+(e)h(a)h(parent,)e(i.e.)h(the)479 1190 y(node)f(is)j(the)e(root.)p
+Black 396 1298 a Ft(\225)p Black 60 w Fq(root)p Fv(:)g(Gets)h(the)g
+(reference)d(to)i(the)h(root)e(node)g(of)h(the)g(tree.)g(Ev)o(ery)f
+(node)g(is)i(contained)e(in)h(a)h(tree)f(with)h(a)f(root,)f(so)479
+1406 y(this)h(method)f(al)o(w)o(ays)h(succeeds.)e(Note)i(that)g(this)g
+(method)e Fr(sear)m(c)o(hes)h Fv(the)h(root,)e(which)h(costs)h(time)g
+(proportional)d(to)479 1514 y(the)j(length)g(of)g(the)g(path)g(to)g
+(the)g(root.)p Black 396 1622 a Ft(\225)p Black 60 w
+Fq(sub_nodes)p Fv(:)g(Returns)g(references)e(to)j(the)f(children.)f
+(The)g(returned)g(list)i(re\003ects)g(the)f(order)f(of)h(the)g
+(children.)e(F)o(or)479 1730 y(data)i(nodes,)g(this)g(method)f(returns)
+g(the)i(empty)e(list.)p Black 396 1838 a Ft(\225)p Black
+60 w Fq(iter_nodes)43 b(f)p Fv(:)21 b(Iterates)f(o)o(v)o(er)f(the)h
+(children,)f(and)g(calls)i Fq(f)g Fv(for)e(e)n(v)o(ery)g(child)h(in)g
+(turn.)p Black 396 1945 a Ft(\225)p Black 60 w Fq(iter_nodes_sibl)43
+b(f)p Fv(:)20 b(Iterates)g(o)o(v)o(er)f(the)h(children,)f(and)h(calls)g
+Fq(f)h Fv(for)f(e)n(v)o(ery)e(child)i(in)h(turn.)e Fq(f)h
+Fv(gets)h(as)479 2053 y(ar)o(guments)d(the)j(pre)n(vious)d(node,)h(the)
+h(current)f(node,)g(and)h(the)g(ne)o(xt)f(node.)p Black
+396 2161 a Ft(\225)p Black 60 w Fq(node_type)p Fv(:)h(Returns)g(either)
+f Fq(T_data)h Fv(which)g(means)g(that)g(the)g(node)f(is)i(a)g(data)f
+(node,)f(or)h Fq(T_element)43 b(n)479 2269 y Fv(which)20
+b(means)g(that)g(the)g(node)f(is)j(an)e(element)f(of)h(type)g
+Fq(n)p Fv(.)g(If)g(con\002gured,)e(possible)i(node)f(types)h(are)g
+(also)479 2377 y Fq(T_pinstr)44 b(t)20 b Fv(indicating)f(that)h(the)h
+(node)e(represents)g(a)i(processing)e(instruction)g(with)h(tar)o(get)f
+Fq(t)p Fv(,)i(and)479 2485 y Fq(T_comment)f Fv(in)g(which)g(case)g(the)
+g(node)g(is)h(a)f(comment.)p Black 396 2593 a Ft(\225)p
+Black 60 w Fq(encoding)p Fv(:)g(Returns)g(the)g(encoding)e(of)i(the)g
+(strings.)p Black 396 2701 a Ft(\225)p Black 60 w Fq(data)p
+Fv(:)g(Returns)g(the)h(character)e(data)h(of)g(this)g(node)f(and)h(all)
+h(children,)d(concatenated)h(as)i(one)e(string.)h(The)479
+2809 y(encoding)e(of)i(the)h(string)e(is)j(what)e(the)g(method)f
+Fq(encoding)g Fv(returns.)g(-)i(F)o(or)e(data)h(nodes,)g(this)g(method)
+f(simply)479 2917 y(returns)h(the)g(represented)e(characters.)h(F)o(or)
+h(elements,)g(the)g(meaning)f(of)g(the)i(method)d(has)j(been)e(e)o
+(xtended)g(such)479 3025 y(that)i(it)f(returns)g(something)e(useful,)i
+(i.e.)g(the)g(ef)n(fecti)n(v)o(ely)f(contained)f(characters,)h(without)
+h(markup.)e(\(F)o(or)479 3133 y Fq(T_pinstr)i Fv(and)f
+Fq(T_comment)h Fv(nodes,)f(the)h(method)f(returns)g(the)h(empty)g
+(string.\))p Black 396 3241 a Ft(\225)p Black 60 w Fq(position)p
+Fv(:)g(If)g(con\002gured,)d(this)k(method)e(returns)g(the)h(position)g
+(of)g(the)g(element)g(as)g(triple)g(\(entity)-5 b(,)19
+b(line,)479 3349 y(byteposition\).)f(F)o(or)i(data)g(nodes,)f(the)h
+(position)g(is)h(not)f(stored.)f(If)h(the)g(position)g(is)h(not)f(a)n
+(v)n(ailable)f(the)i(triple)f Fq("?",)479 3456 y(0,)45
+b(0)20 b Fv(is)h(returned.)p Black 396 3564 a Ft(\225)p
+Black 60 w Fq(comment)p Fv(:)f(Returns)g Fq(Some)44 b(text)20
+b Fv(for)f(comment)g(nodes,)g(and)g Fq(None)h Fv(for)g(other)f(nodes.)g
+(The)h Fq(text)f Fv(is)i(e)n(v)o(erything)479 3672 y(between)f(the)g
+(comment)f(delimiters)g Fo(<)p Fq(-)i Fv(and)e Fq(-)p
+Fo(>)p Fv(.)p Black 396 3780 a Ft(\225)p Black 60 w Fq(pinstr)44
+b(n)p Fv(:)21 b(Returns)f(all)h(processing)d(instructions)i(that)g(are)
+g(directly)f(contained)g(in)h(this)h(element)e(and)h(that)g(ha)n(v)o(e)
+479 3888 y(a)h Fr(tar)m(g)o(et)h Fv(speci\002cation)d(of)h
+Fq(n)p Fv(.)g(The)g(tar)o(get)f(is)j(the)e(\002rst)h(w)o(ord)e(after)h
+(the)g Fo(<)p Fq(?)p Fv(.)p Black 396 3996 a Ft(\225)p
+Black 60 w Fq(pinstr_names)p Fv(:)f(Returns)h(the)g(list)i(of)e(all)g
+(tar)o(gets)g(of)g(processing)f(instructions)g(directly)g(contained)g
+(in)h(this)479 4104 y(element.)p Black 396 4212 a Ft(\225)p
+Black 60 w Fq(write)44 b(s)h(enc)p Fv(:)20 b(Prints)h(the)f(node)f(and)
+h(all)h(subnodes)d(to)j(the)f(passed)g(output)f(stream)h(as)h(v)n(alid)
+f(XML)g(te)o(xt,)g(using)479 4320 y(the)g(passed)h(e)o(xternal)e
+(encoding.)396 4511 y Fu(Attrib)n(ute)h(obser)o(v)o(ers)h(.)p
+Black 396 4743 a Ft(\225)p Black 60 w Fq(attribute)44
+b(n)p Fv(:)20 b(Returns)g(the)h(v)n(alue)e(of)h(the)g(attrib)n(ute)g
+(with)g(name)g Fq(n)p Fv(.)g(This)h(method)d(returns)i(a)g(v)n(alue)g
+(for)f(e)n(v)o(ery)479 4851 y(declared)g(attrib)n(ute,)h(and)f(it)i
+(raises)g Fq(Not_found)e Fv(for)h(an)o(y)f(undeclared)f(attrib)n(ute.)i
+(Note)g(that)g(it)h(e)n(v)o(en)e(returns)h(a)p Black
+3800 5278 a Fr(53)p Black eop
+%%Page: 54 54
+54 53 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 479 579 a Fv(v)n(alue)g(if)g(the)g
+(attrib)n(ute)g(is)h(actually)f(missing)g(b)n(ut)g(is)h(declared)e(as)i
+Fq(#IMPLIED)f Fv(or)g(has)g(a)h(def)o(ault)e(v)n(alue.)g(-)i(Possible)
+479 687 y(v)n(alues)f(are:)p Black 479 919 a Fa(\225)p
+Black 62 w Fq(Implied_value)p Fv(:)f(The)h(attrib)n(ute)g(has)g(been)g
+(declared)e(with)j(the)f(k)o(e)o(yw)o(ord)e Fq(#IMPLIED)p
+Fv(,)i(and)f(the)h(attrib)n(ute)g(is)562 1027 y(missing)g(in)h(the)f
+(attrib)n(ute)g(list)h(of)f(this)h(element.)p Black 479
+1135 a Fa(\225)p Black 62 w Fq(Value)44 b(s)p Fv(:)21
+b(The)f(attrib)n(ute)g(has)g(been)g(declared)e(as)j(type)f
+Fq(CDATA)p Fv(,)g(as)h Fq(ID)p Fv(,)f(as)h Fq(IDREF)p
+Fv(,)e(as)i Fq(ENTITY)p Fv(,)f(or)g(as)562 1243 y Fq(NMTOKEN)p
+Fv(,)g(or)g(as)g(enumeration)e(or)i(notation,)f(and)g(one)h(of)g(the)g
+(tw)o(o)h(conditions)d(holds:)i(\(1\))g(The)g(attrib)n(ute)562
+1351 y(v)n(alue)g(is)h(present)e(in)i(the)f(attrib)n(ute)g(list)h(in)f
+(which)g(case)h(the)f(v)n(alue)f(is)j(returned)c(in)i(the)h(string)e
+Fq(s)p Fv(.)i(\(2\))e(The)562 1459 y(attrib)n(ute)h(has)h(been)e
+(omitted,)g(and)h(the)g(DTD)g(declared)f(the)i(attrib)n(ute)e(with)i(a)
+f(def)o(ault)g(v)n(alue.)f(The)h(def)o(ault)562 1567
+y(v)n(alue)f(is)i(returned)d(in)i Fq(s)p Fv(.)g(-)g(Summarized,)d
+Fq(Value)44 b(s)20 b Fv(is)h(returned)d(for)h(non-implied,)e(non-list)i
+(attrib)n(ute)g(v)n(alues.)p Black 479 1675 a Fa(\225)p
+Black 62 w Fq(Valuelist)44 b(l)p Fv(:)20 b(The)g(attrib)n(ute)g(has)g
+(been)g(declared)f(as)i(type)e Fq(IDREFS)p Fv(,)h(as)h
+Fq(ENTITIES)p Fv(,)e(or)h(as)h Fq(NMTOKENS)p Fv(,)562
+1783 y(and)f(one)g(of)f(the)i(tw)o(o)f(conditions)f(holds:)h(\(1\))f
+(The)h(attrib)n(ute)g(v)n(alue)f(is)i(present)f(in)g(the)h(attrib)n
+(ute)e(list)j(in)e(which)562 1891 y(case)h(the)f(space-separated)e(tok)
+o(ens)i(of)g(the)g(v)n(alue)g(are)g(returned)e(in)j(the)f(string)g
+(list)h Fq(l)p Fv(.)f(\(2\))g(The)g(attrib)n(ute)g(has)562
+1999 y(been)g(omitted,)f(and)h(the)g(DTD)g(declared)f(the)h(attrib)n
+(ute)g(with)h(a)f(def)o(ault)g(v)n(alue.)f(The)h(def)o(ault)f(v)n(alue)
+h(is)h(returned)562 2107 y(in)g Fq(l)p Fv(.)f(-)g(Summarized,)f
+Fq(Valuelist)43 b(l)20 b Fv(is)i(returned)c(for)i(all)g(list-type)g
+(attrib)n(ute)g(v)n(alues.)396 2256 y(Note)g(that)h(before)d(the)j
+(attrib)n(ute)f(v)n(alue)f(is)i(returned,)d(the)i(v)n(alue)g(is)h
+(normalized.)d(This)j(means)e(that)i(ne)n(wlines)e(are)479
+2364 y(con)m(v)o(erted)f(to)i(spaces,)g(and)g(that)g(references)f(to)h
+(character)f(entities)i(\(i.e.)f Fq()p Fn(n)p Fq(;)p
+Fv(\))g(and)f(general)g(entities)i(\(i.e.)479 2472 y
+Fq(&)p Fn(name)p Fq(;)p Fv(\))f(are)g(e)o(xpanded;)e(if)i(necessary)-5
+b(,)19 b(e)o(xpansion)f(is)j(performed)d(recursi)n(v)o(ely)-5
+b(.)479 2621 y(In)20 b(well-formedness)e(mode,)h(there)h(is)h(no)f(DTD)
+g(which)g(could)f(declare)h(an)g(attrib)n(ute.)f(Because)i(of)f(this,)g
+(e)n(v)o(ery)479 2729 y(occuring)f(attrib)n(ute)g(is)i(considered)e(as)
+i(a)f(CD)m(A)-9 b(T)h(A)21 b(attrib)n(ute.)p Black 396
+2879 a Ft(\225)p Black 60 w Fq(required_string_attribute)41
+b(n)p Fv(:)21 b(returns)e(the)h(V)-9 b(alue)20 b(attrib)n(ute)g(called)
+g(n,)g(or)g(the)g(V)-9 b(aluelist)20 b(attrib)n(ute)g(as)h(a)479
+2987 y(string)f(where)g(the)g(list)h(elements)f(are)g(separated)f(by)h
+(spaces.)g(If)h(the)f(attrib)n(ute)g(v)n(alue)f(is)i(implied,)e(or)h
+(if)h(the)479 3094 y(attrib)n(ute)f(does)g(not)g(e)o(xists,)g(the)g
+(method)f(will)i(f)o(ail.)g(-)f(This)g(method)f(is)i(con)m(v)o(enient)d
+(if)i(you)g(e)o(xpect)f(a)h(non-implied)479 3202 y(and)g(non-list)f
+(attrib)n(ute)h(v)n(alue.)p Black 396 3310 a Ft(\225)p
+Black 60 w Fq(optional_string_attribute)41 b(n)p Fv(:)21
+b(returns)e(the)h(V)-9 b(alue)20 b(attrib)n(ute)g(called)g(n,)g(or)g
+(the)g(V)-9 b(aluelist)20 b(attrib)n(ute)g(as)h(a)479
+3418 y(string)f(where)g(the)g(list)h(elements)f(are)g(separated)f(by)h
+(spaces.)g(If)h(the)f(attrib)n(ute)g(v)n(alue)f(is)i(implied,)e(or)h
+(if)h(the)479 3526 y(attrib)n(ute)f(does)g(not)g(e)o(xists,)g(the)g
+(method)f(returns)h(None.)f(-)h(This)h(method)e(is)i(con)m(v)o(enient)c
+(if)k(you)e(e)o(xpect)g(a)i(non-list)479 3634 y(attrib)n(ute)f(v)n
+(alue)g(including)e(the)i(implied)g(v)n(alue.)p Black
+396 3742 a Ft(\225)p Black 60 w Fq(required_list_attribute)41
+b(n)p Fv(:)20 b(returns)f(the)g(V)-9 b(aluelist)20 b(attrib)n(ute)f
+(called)g(n,)g(or)g(the)h(V)-9 b(alue)19 b(attrib)n(ute)g(as)h(a)g
+(list)479 3850 y(with)h(a)f(single)g(element.)g(If)g(the)g(attrib)n
+(ute)g(v)n(alue)f(is)i(implied,)f(or)g(if)g(the)g(attrib)n(ute)g(does)g
+(not)g(e)o(xists,)g(the)g(method)479 3958 y(will)h(f)o(ail.)g(-)f(This)
+g(method)f(is)i(con)m(v)o(enient)d(if)i(you)g(e)o(xpect)f(a)h(list)i
+(attrib)n(ute)d(v)n(alue.)p Black 396 4066 a Ft(\225)p
+Black 60 w Fq(optional_list_attribute)41 b(n)p Fv(:)20
+b(returns)f(the)g(V)-9 b(aluelist)20 b(attrib)n(ute)f(called)g(n,)g(or)
+g(the)h(V)-9 b(alue)19 b(attrib)n(ute)g(as)h(a)g(list)479
+4174 y(with)h(a)f(single)g(element.)g(If)g(the)g(attrib)n(ute)g(v)n
+(alue)f(is)i(implied,)f(or)g(if)g(the)g(attrib)n(ute)g(does)g(not)g(e)o
+(xists,)g(an)g(empty)g(list)479 4282 y(will)h(be)f(returned.)e(-)j
+(This)f(method)f(is)i(con)m(v)o(enient)d(if)i(you)f(e)o(xpect)h(a)g
+(list)i(attrib)n(ute)d(v)n(alue)h(or)g(the)g(implied)f(v)n(alue.)p
+Black 396 4390 a Ft(\225)p Black 60 w Fq(attribute_names)p
+Fv(:)g(returns)g(the)h(list)h(of)f(all)h(attrib)n(ute)f(names)g(of)g
+(this)g(element.)g(As)h(this)f(is)i(a)e(v)n(alidating)479
+4498 y(parser)m(,)f(this)i(list)g(is)g(equal)f(to)g(the)h(list)g(of)f
+(declared)f(attrib)n(utes.)p Black 396 4605 a Ft(\225)p
+Black 60 w Fq(attribute_type)43 b(n)p Fv(:)20 b(returns)g(the)g(type)g
+(of)g(the)g(attrib)n(ute)g(called)g Fq(n)p Fv(.)g(See)h(the)f(module)f
+Fq(Pxp_types)g Fv(for)g(a)479 4713 y(description)g(of)h(the)g(encoding)
+e(of)i(the)g(types.)p Black 396 4821 a Ft(\225)p Black
+60 w Fq(attributes)p Fv(:)f(returns)h(the)g(list)h(of)f(pairs)g(of)g
+(names)g(and)g(v)n(alues)g(for)f(all)i(attrib)n(utes)f(of)g(this)h
+(element.)p Black 3800 5278 a Fr(54)p Black eop
+%%Page: 55 55
+55 54 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black Black 396 579 a Ft(\225)p
+Black 60 w Fq(id_attribute_name)p Fv(:)e(returns)h(the)i(name)e(of)h
+(the)g(attrib)n(ute)g(that)g(is)h(declared)e(with)h(type)g(ID.)g(There)
+f(is)i(at)g(most)479 687 y(one)f(such)g(attrib)n(ute.)f(The)h(method)f
+(raises)i Fq(Not_found)e Fv(if)i(there)e(is)i(no)f(declared)f(ID)i
+(attrib)n(ute)e(for)h(the)g(element)479 795 y(type.)p
+Black 396 903 a Ft(\225)p Black 60 w Fq(id_attribute_value)p
+Fv(:)e(returns)h(the)i(v)n(alue)e(of)h(the)g(attrib)n(ute)g(that)g(is)h
+(declared)e(with)i(type)e(ID.)i(There)e(is)i(at)479 1011
+y(most)g(one)e(such)h(attrib)n(ute.)g(The)g(method)e(raises)j
+Fq(Not_found)e Fv(if)i(there)f(is)h(no)e(declared)g(ID)i(attrib)n(ute)f
+(for)f(the)479 1119 y(element)h(type.)p Black 396 1226
+a Ft(\225)p Black 60 w Fq(idref_attribute_names)p Fv(:)d(returns)h(the)
+h(list)i(of)e(attrib)n(ute)f(names)h(that)h(are)f(declared)f(as)i
+(IDREF)f(or)g(IDREFS.)396 1417 y Fu(Modifying)h(methods)h(.)f
+Fv(The)g(follo)n(wing)f(methods)g(are)h(only)f(de\002ned)g(for)h
+(element)f(nodes)h(\(more)f(e)o(xactly:)g(the)396 1525
+y(methods)g(are)i(de\002ned)e(for)g(data)h(nodes,)f(too,)h(b)n(ut)g(f)o
+(ail)h(al)o(w)o(ays\).)p Black 396 1758 a Ft(\225)p Black
+60 w Fq(add_node)44 b(sn)p Fv(:)20 b(Adds)g(sub)g(node)g
+Fq(sn)g Fv(to)g(the)g(list)i(of)e(children.)e(This)j(operation)d(is)j
+(illustrated)f(in)g(the)g(picture)g Fr(A)479 1866 y(node)f(can)h(only)g
+(be)g(added)f(if)h(it)h(is)h(a)e(r)l(oot)q Fv(.)g(This)h(method)e(e)o
+(xpects)g(that)h Fq(sn)h Fv(is)g(a)g(root,)e(and)g(it)i(requires)f
+(that)g Fq(sn)g Fv(and)479 1974 y(the)g(current)f(object)h(share)g(the)
+g(same)h(DTD.)479 2123 y(Because)g Fq(add_node)e Fv(is)i(the)f(method)f
+(the)h(parser)g(itself)h(uses)g(to)f(add)g(ne)n(w)g(nodes)f(to)h(the)h
+(tree,)e(it)i(performs)e(by)479 2231 y(def)o(ault)h(some)g(simple)g(v)n
+(alidation)f(checks:)g(If)h(the)h(content)e(model)g(is)i(a)g(re)o
+(gular)e(e)o(xpression,)f(it)j(is)g(not)f(allo)n(wed)f(to)479
+2339 y(add)h(data)g(nodes)f(to)i(this)g(node)e(unless)h(the)g(ne)n(w)g
+(nodes)g(consist)g(only)f(of)h(whitespace.)g(In)g(this)g(case,)h(the)f
+(ne)n(w)g(data)479 2447 y(nodes)g(are)g(silently)g(dropped)e(\(you)h
+(can)h(change)f(this)h(by)g(in)m(v)n(oking)e Fq
+(keep_always_whitespace_mode)p Fv(\).)479 2596 y(If)i(the)h(document)d
+(is)j(\003agged)e(as)i(stand-alone,)d(these)j(data)f(nodes)f(only)g
+(containing)g(whitespace)g(are)h(e)n(v)o(en)479 2704
+y(forbidden)e(if)i(the)h(element)e(declaration)g(is)i(contained)d(in)j
+(an)f(e)o(xternal)f(entity)-5 b(.)19 b(This)h(case)h(is)g(detected)f
+(and)479 2812 y(rejected.)479 2962 y(If)g(the)h(content)e(model)g(is)i
+Fq(EMPTY)p Fv(,)f(it)h(is)g(not)f(allo)n(wed)f(to)i(add)e(an)o(y)h
+(data)g(node)f(unless)h(the)g(data)g(node)g(is)h(empty)-5
+b(.)18 b(In)479 3070 y(this)j(case,)f(the)h(ne)n(w)f(data)g(node)f(is)i
+(silently)f(dropped.)479 3219 y(These)g(checks)g(only)f(apply)h(if)g
+(there)g(is)h(a)f(DTD.)h(In)f(well-formedness)e(mode,)h(it)i(is)g
+(assumed)e(that)i(e)n(v)o(ery)d(element)479 3327 y(is)j(declared)e
+(with)i(content)e(model)g Fq(ANY)h Fv(which)g(prohibits)f(an)o(y)g(v)n
+(alidation)g(check.)g(Furthermore,)f(you)h(turn)h(these)479
+3435 y(checks)g(of)n(f)f(by)h(passing)g Fq(~force:true)f
+Fv(as)i(\002rst)g(ar)o(gument.)p Black 396 3584 a Ft(\225)p
+Black 60 w Fq(add_pinstr)43 b(pi)p Fv(:)21 b(Adds)f(the)g(processing)f
+(instruction)g Fq(pi)h Fv(to)h(the)f(list)h(of)f(processing)f
+(instructions.)p Black 396 3692 a Ft(\225)p Black 60
+w Fq(delete)p Fv(:)h(Deletes)h(this)g(node)e(from)g(the)h(tree.)g
+(After)g(this)h(operation,)d(this)i(node)g(is)h(no)f(longer)e(the)j
+(child)e(of)h(the)479 3800 y(former)f(f)o(ather)g(node;)f(and)i(the)g
+(node)e(loses)j(the)e(connection)f(to)i(the)g(f)o(ather)f(as)h(well.)h
+(This)e(operation)f(is)j(illustrated)479 3908 y(by)f(the)g(\002gure)g
+Fr(A)g(deleted)g(node)f(becomes)g(the)i(r)l(oot)f(of)g(the)h(subtr)m
+(ee)p Fv(.)p Black 396 4016 a Ft(\225)p Black 60 w Fq(set_nodes)44
+b(nl)p Fv(:)20 b(Sets)h(the)f(list)i(of)e(children)e(to)j
+Fq(nl)p Fv(.)f(It)g(is)i(required)c(that)i(e)n(v)o(ery)f(member)g(of)h
+Fq(nl)g Fv(is)h(a)g(root,)e(and)479 4124 y(that)i(all)f(members)f(and)h
+(the)g(current)f(object)h(share)g(the)g(same)g(DTD.)g(Unlik)o(e)g
+Fq(add_node)p Fv(,)g(no)f(v)n(alidation)g(checks)479
+4232 y(are)h(performed.)p Black 396 4340 a Ft(\225)p
+Black 60 w Fq(quick_set_attributes)42 b(atts)p Fv(:)20
+b(sets)h(the)f(attrib)n(utes)h(of)e(this)i(element)f(to)g
+Fq(atts)p Fv(.)g(It)g(is)i Fr(not)f Fv(check)o(ed)479
+4448 y(whether)e Fq(atts)i Fv(matches)e(the)i(DTD)f(or)g(not;)g(it)h
+(is)g(up)f(to)g(the)g(caller)g(of)g(this)h(method)e(to)h(ensure)g
+(this.)g(\(This)479 4556 y(method)f(may)h(be)g(useful)g(to)g(transform)
+e(the)j(attrib)n(ute)f(v)n(alues,)f(i.e.)h(apply)f(a)i(mapping)d(to)j
+(e)n(v)o(ery)e(attrib)n(ute.\))p Black 396 4664 a Ft(\225)p
+Black 60 w Fq(set_comment)43 b(text)p Fv(:)20 b(This)h(method)e(is)i
+(only)e(applicable)g(to)h Fq(T_comment)g Fv(nodes;)f(it)i(sets)g(the)g
+(comment)d(te)o(xt)479 4772 y(contained)h(by)h(such)g(nodes.)p
+Black 3800 5278 a Fr(55)p Black eop
+%%Page: 56 56
+56 55 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fu(Cloning)g(methods)h(.)
+p Black 396 811 a Ft(\225)p Black 60 w Fq(orphaned_clone)p
+Fv(:)e(Returns)h(a)g(clone)g(of)g(the)g(node)f(and)h(the)g(complete)f
+(tree)h(belo)n(w)g(this)h(node)e(\(deep)g(clone\).)479
+919 y(The)h(clone)g(does)g(not)g(ha)n(v)o(e)f(a)i(parent)e(\(i.e.)h
+(the)g(reference)f(to)h(the)g(parent)f(node)g(is)j Fr(not)f
+Fv(cloned\).)d(While)j(cop)o(ying)479 1027 y(the)f(subtree,)g(strings)g
+(are)g(skipped;)f(it)i(is)g(lik)o(ely)f(that)h(the)f(original)f(tree)h
+(and)g(the)g(cop)o(y)f(tree)h(share)g(strings.)479 1135
+y(Extension)f(objects)h(are)g(cloned)f(by)h(in)m(v)n(oking)e(the)i
+Fq(clone)g Fv(method)f(on)h(the)g(original)f(objects;)h(ho)n(w)g(much)f
+(of)h(the)479 1243 y(e)o(xtension)f(objects)h(is)h(cloned)e(depends)g
+(on)h(the)g(implemention)e(of)i(this)h(method.)479 1393
+y(This)g(operation)d(is)j(illustrated)f(by)g(the)g(\002gure)f
+Fr(The)i(clone)e(of)i(a)f(subtr)m(ee)p Fv(.)p Black 396
+1542 a Ft(\225)p Black 60 w Fq(orphaned_flat_clone)p
+Fv(:)e(Returns)i(a)h(clone)e(of)h(the)g(node,)f(b)n(ut)h(sets)i(the)e
+(list)h(of)f(sub)g(nodes)g(to)g([],)g(i.e.)g(the)g(sub)479
+1650 y(nodes)g(are)g(not)g(cloned.)p Black 396 1758 a
+Ft(\225)p Black 81 w Fq(create_element)42 b(dtd)i(nt)h(al)p
+Fv(:)20 b(Returns)f(a)i(\003at)f(cop)o(y)f(of)g(this)i(node)d(\(which)h
+(must)h(be)f(an)h(element\))f(with)h(the)479 1866 y(follo)n(wing)f
+(modi\002cations:)g(The)h(DTD)g(is)h(set)g(to)f Fq(dtd)p
+Fv(;)h(the)f(node)f(type)h(is)h(set)g(to)f Fq(nt)p Fv(,)g(and)g(the)g
+(ne)n(w)g(attrib)n(ute)g(list)h(is)479 1974 y(set)g(to)f
+Fq(al)g Fv(\(gi)n(v)o(en)e(as)i(list)h(of)f(\(name,v)n(alue\))d
+(pairs\).)i(The)g(cop)o(y)g(does)h(not)f(ha)n(v)o(e)g(children)g(nor)g
+(a)h(parent.)f(It)h(does)f(not)479 2082 y(contain)g(processing)g
+(instructions.)g(See)i(the)f(e)o(xample)f(belo)n(w.)479
+2231 y(Note)h(that)h(you)e(can)h(specify)g(the)g(position)f(of)h(the)g
+(ne)n(w)g(node)f(by)h(the)g(optional)f(ar)o(gument)f
+Fq(~position)p Fv(.)p Black 396 2380 a Ft(\225)p Black
+81 w Fq(create_data)43 b(dtd)h(cdata)p Fv(:)20 b(Returns)g(a)h(\003at)g
+(cop)o(y)e(of)h(this)h(node)e(\(which)g(must)h(be)h(a)f(data)g(node\))f
+(with)h(the)479 2488 y(follo)n(wing)f(modi\002cations:)g(The)h(DTD)g
+(is)h(set)g(to)f Fq(dtd)p Fv(;)h(the)f(node)f(type)h(is)h(set)g(to)f
+Fq(T_data)p Fv(;)g(the)g(attrib)n(ute)g(list)h(is)479
+2596 y(empty)f(\(data)f(nodes)h(ne)n(v)o(er)f(ha)n(v)o(e)g(attrib)n
+(utes\);)h(the)g(list)h(of)f(children)f(and)h(PIs)h(is)g(empty)-5
+b(,)19 b(too)g(\(same)h(reason\).)f(The)479 2704 y(ne)n(w)h(node)f
+(does)h(not)g(ha)n(v)o(e)g(a)g(parent.)f(The)h(v)n(alue)g
+Fq(cdata)g Fv(is)h(the)f(ne)n(w)g(character)f(content)g(of)h(the)g
+(node.)f(See)i(the)479 2812 y(e)o(xample)e(belo)n(w.)p
+Black 396 2920 a Ft(\225)p Black 60 w Fq(keep_always_whitespace_mode)p
+Fv(:)e(Ev)o(en)i(data)h(nodes)f(which)h(are)g(normally)f(dropped)e
+(because)j(the)o(y)f(only)479 3028 y(contain)g(ignorable)f(whitespace,)
+h(can)h(added)e(to)i(this)h(node)d(once)h(this)i(mode)e(is)h(turned)f
+(on.)g(\(This)h(mode)f(is)h(useful)479 3136 y(to)h(produce)d(canonical)
+h(XML.\))396 3327 y Fu(V)-8 b(alidating)20 b(methods)h(.)f
+Fv(There)f(is)j(one)d(method)g(which)h(locally)f(v)n(alidates)h(the)g
+(node,)f(i.e.)i(checks)e(whether)g(the)396 3435 y(subnodes)g(match)h
+(the)g(content)f(model)g(of)h(this)h(node.)p Black 396
+3667 a Ft(\225)p Black 60 w Fq(local_validate)p Fv(:)e(Checks)h(that)g
+(this)h(node)e(conforms)f(to)j(the)f(DTD)g(by)g(comparing)e(the)i(type)
+g(of)g(the)479 3775 y(subnodes)e(with)i(the)g(content)e(model)h(for)g
+(this)h(node.)e(\(Applications)g(need)h(not)g(call)h(this)h(method)d
+(unless)h(the)o(y)g(add)479 3883 y(ne)n(w)h(nodes)g(themselv)o(es)f(to)
+i(the)f(tree.\))-2 4294 y Fp(3.2.3.)35 b(The)f(c)n(lass)h
+Fc(element_impl)396 4462 y Fv(This)21 b(class)g(is)g(an)f
+(implementation)e(of)i Fq(node)g Fv(which)g(realizes)g(element)g
+(nodes:)396 4642 y Fq(class)44 b([)h('ext)f(])h(element_impl)e(:)h
+('ext)g(->)h([)g('ext)f(])g(node)396 4875 y Fu(Constructor)-8
+b(.)19 b Fv(Y)-9 b(ou)20 b(can)g(create)f(a)i(ne)n(w)f(instance)g(by)p
+Black 3798 5278 a Fr(56)p Black eop
+%%Page: 57 57
+57 56 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fq(new)45
+b(element_impl)d Fn(extension_object)396 770 y Fv(which)20
+b(creates)g(a)h(special)f(form)f(of)h(empty)f(element)h(which)g
+(already)f(contains)g(a)i(reference)d(to)j(the)396 878
+y Fl(extension_object)p Fv(,)d(b)n(ut)i(is)h(otherwise)f(empty)-5
+b(.)18 b(This)j(special)f(form)f(is)i(called)f(an)g Fr(e)n(xemplar)r
+Fv(.)g(The)g(purpose)f(of)396 986 y(e)o(x)o(emplars)g(is)i(that)f(the)o
+(y)g(serv)o(e)f(as)i(patterns)f(that)g(can)g(be)g(duplicated)f(and)g
+(\002lled)i(with)f(data.)g(The)g(method)396 1094 y Fq(create_element)f
+Fv(is)i(designed)e(to)h(perform)e(this)j(action.)396
+1243 y Fu(Example.)f Fv(First,)h(create)f(an)g(e)o(x)o(emplar)e(by)396
+1423 y Fq(let)45 b(exemplar_ext)d(=)j(...)f(in)396 1520
+y(let)h(exemplar)222 b(=)45 b(new)f(element_impl)f(exemplar_ext)g(in)
+396 1711 y Fv(The)20 b Fq(exemplar)g Fv(is)h(not)f(used)f(in)i(node)e
+(trees,)h(b)n(ut)g(only)g(as)h(a)f(pattern)g(when)f(the)h(element)g
+(nodes)f(are)i(created:)396 1891 y Fq(let)45 b(element)e(=)i(exemplar)e
+(#)i(create_element)e(dtd)h(\(T_element)f(name\))h(attlist)396
+2082 y Fv(The)20 b Fq(element)g Fv(is)h(a)f(cop)o(y)g(of)g
+Fq(exemplar)f Fv(\(e)n(v)o(en)g(the)h(e)o(xtension)f
+Fq(exemplar_ext)g Fv(has)h(been)g(copied\))e(which)396
+2190 y(ensures)h(that)h Fq(element)f Fv(and)g(its)i(e)o(xtension)d(are)
+i(objects)f(of)h(the)f(same)h(class)h(as)f(the)g(e)o(x)o(emplars;)e
+(note)h(that)h(you)e(need)396 2298 y(not)i(to)g(pass)h(a)g(class)g
+(name)f(or)f(other)h(meta)g(information.)d(The)j(cop)o(y)g(is)h
+(initially)f(connected)e(with)j(the)f Fq(dtd)p Fv(,)g(it)h(gets)f(a)396
+2406 y(node)f(type,)h(and)g(the)g(attrib)n(ute)g(list)h(is)g(\002lled.)
+f(The)g Fq(element)g Fv(is)h(no)n(w)e(fully)h(functional;)e(it)j(can)f
+(be)g(added)f(to)i(another)396 2514 y(element)f(as)h(child,)e(and)h(it)
+h(can)f(contain)f(references)g(to)h(subnodes.)-2 2884
+y Fp(3.2.4.)35 b(The)f(c)n(lass)h Fc(data_impl)396 3051
+y Fv(This)21 b(class)g(is)g(an)f(implementation)e(of)i
+Fq(node)g Fv(which)g(should)f(be)h(used)g(for)f(all)i(character)e(data)
+h(nodes:)396 3232 y Fq(class)44 b([)h('ext)f(])h(data_impl)e(:)i('ext)f
+(->)g([)h('ext)f(])h(node)396 3464 y Fu(Constructor)-8
+b(.)19 b Fv(Y)-9 b(ou)20 b(can)g(create)f(a)i(ne)n(w)f(instance)g(by)
+396 3644 y Fq(new)45 b(data_impl)e Fn(extension_object)396
+3835 y Fv(which)20 b(creates)g(an)g(empty)g(e)o(x)o(emplar)e(node)h
+(which)h(is)h(connected)d(to)i Fl(extension_object)p
+Fv(.)e(The)i(node)f(does)396 3943 y(not)h(contain)f(a)i(reference)d(to)
+j(an)o(y)e(DTD,)h(and)g(because)f(of)h(this)h(it)g(cannot)e(be)h(added)
+f(to)i(node)e(trees.)396 4093 y(T)-7 b(o)21 b(get)f(a)g(fully)g(w)o
+(orking)f(data)h(node,)f(apply)g(the)h(method)f Fq(create_data)g
+Fv(to)h(the)g(e)o(x)o(emplar)f(\(see)h(e)o(xample\).)396
+4242 y Fu(Example.)g Fv(First,)h(create)f(an)g(e)o(x)o(emplar)e(by)396
+4422 y Fq(let)45 b(exemplar_ext)d(=)j(...)f(in)396 4519
+y(let)h(exemplar)222 b(=)45 b(new)f(exemplar_ext)f(data_impl)h(in)396
+4710 y Fv(The)20 b Fq(exemplar)g Fv(is)h(not)f(used)f(in)i(node)e
+(trees,)h(b)n(ut)g(only)g(as)h(a)f(pattern)g(when)f(the)h(data)g(nodes)
+g(are)g(created:)p Black 3797 5278 a Fr(57)p Black eop
+%%Page: 58 58
+58 57 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fq(let)45
+b(data_node)e(=)i(exemplar)e(#)i(create_data)e(dtd)h("The)g(characters)
+f(con-)396 676 y(tained)h(in)h(the)f(data)g(node")396
+867 y Fv(The)20 b Fq(data_node)f Fv(is)i(a)g(cop)o(y)e(of)h
+Fq(exemplar)p Fv(.)g(The)f(cop)o(y)h(is)h(initially)f(connected)e(with)
+j(the)f Fq(dtd)p Fv(,)g(and)f(it)i(is)h(\002lled)396
+975 y(with)f(character)e(material.)g(The)h Fq(data_node)f
+Fv(is)i(no)n(w)f(fully)g(functional;)e(it)j(can)f(be)g(added)f(to)h(an)
+h(element)e(as)i(child.)-2 1345 y Fp(3.2.5.)35 b(The)f(type)g
+Fc(spec)396 1512 y Fv(The)20 b(type)g Fq(spec)g Fv(de\002nes)g(a)g(w)o
+(ay)h(to)f(handle)f(the)h(details)h(of)f(creating)f(nodes)g(from)h(e)o
+(x)o(emplars.)396 1692 y Fq(type)44 b('ext)h(spec)396
+1790 y(constraint)e('ext)i(=)f('ext)g(node)h(#extension)396
+1984 y(val)g(make_spec_from_mapping)c(:)665 2081 y
+(?super_root_exemplar)h(:)i('ext)h(node)f(->)665 2178
+y(?comment_exemplar)e(:)j('ext)f(node)g(->)665 2275 y
+(?default_pinstr_exemplar)d(:)k('ext)f(node)g(->)665
+2372 y(?pinstr_mapping)f(:)h(\(string,)g('ext)g(node\))g(Hashtbl.t)f
+(->)665 2469 y(data_exemplar:)g('ext)h(node)g(->)665
+2567 y(default_element_exemplar:)d('ext)j(node)g(->)665
+2664 y(element_mapping:)e(\(string,)i('ext)g(node\))g(Hashtbl.t)f(->)
+665 2761 y(unit)h(->)755 2858 y('ext)g(spec)396 3052
+y(val)h(make_spec_from_alist)c(:)665 3149 y(?super_root_exemplar)h(:)i
+('ext)h(node)f(->)665 3247 y(?comment_exemplar)e(:)j('ext)f(node)g(->)
+665 3344 y(?default_pinstr_exemplar)d(:)k('ext)f(node)g(->)665
+3441 y(?pinstr_alist)f(:)i(\(string)e(*)i('ext)f(node\))g(list)g(->)665
+3538 y(data_exemplar:)f('ext)h(node)g(->)665 3635 y
+(default_element_exemplar:)d('ext)j(node)g(->)665 3732
+y(element_alist:)f(\(string)g(*)i('ext)f(node\))g(list)g(->)665
+3829 y(unit)g(->)755 3927 y('ext)g(spec)396 4117 y Fv(The)20
+b(tw)o(o)h(functions)d Fq(make_spec_from_mapping)f Fv(and)j
+Fq(make_spec_from_alist)d Fv(create)j Fq(spec)g Fv(v)n(alues.)396
+4225 y(Both)g(functions)f(are)h(functionally)e(equi)n(v)n(alent)h(and)g
+(the)i(only)e(dif)n(ference)f(is)j(that)g(the)f(\002rst)h(function)d
+(prefers)396 4333 y(hashtables)i(and)g(the)g(latter)g(associati)n(v)o
+(e)g(lists)h(to)g(describe)e(mappings)g(from)g(names)h(to)g(e)o(x)o
+(emplars.)396 4483 y(Y)-9 b(ou)20 b(can)g(specify)f(e)o(x)o(emplars)g
+(for)g(the)i(v)n(arious)e(kinds)g(of)h(nodes)g(that)g(need)g(to)g(be)g
+(generated)e(when)i(an)g(XML)396 4591 y(document)e(is)k(parsed:)p
+Black 3800 5278 a Fr(58)p Black eop
+%%Page: 59 59
+59 58 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black Black 396 579 a Ft(\225)p
+Black 60 w Fq(~super_root_exemplar)p Fv(:)e(This)i(e)o(x)o(emplar)e(is)
+j(used)f(to)h(create)f(the)g(super)f(root.)h(This)g(special)g(node)g
+(is)h(only)479 687 y(created)f(if)g(the)g(corresponding)d
+(con\002guration)h(option)h(has)h(been)g(selected;)g(it)h(is)g(the)f
+(parent)f(node)g(of)h(the)h(root)479 795 y(node)e(which)h(may)g(be)g
+(con)m(v)o(enient)d(if)k(e)n(v)o(ery)e(w)o(orking)f(node)i(must)g(ha)n
+(v)o(e)f(a)i(parent.)p Black 396 903 a Ft(\225)p Black
+60 w Fq(~comment_exemplar)p Fv(:)d(This)j(e)o(x)o(emplar)d(is)j(used)f
+(when)f(a)i(comment)e(node)g(must)h(be)g(created.)g(Note)g(that)g(such)
+479 1011 y(nodes)g(are)g(only)f(created)h(if)g(the)g(corresponding)d
+(con\002guration)h(option)h(is)i("on".)p Black 396 1119
+a Ft(\225)p Black 60 w Fq(~default_pinstr_exemplar)p
+Fv(:)c(If)j(a)h(node)e(for)g(a)i(processing)e(instruction)g(must)h(be)g
+(created,)f(and)h(the)479 1226 y(instruction)f(is)i(not)f(listed)h(in)f
+(the)g(table)h(passed)f(by)f Fq(~pinstr_mapping)g Fv(or)h
+Fq(~pinstr_alist)p Fv(,)e(this)j(e)o(x)o(emplar)479 1334
+y(is)g(used.)f(Again)f(the)i(con\002guration)c(option)i(must)h(be)g
+("on")g(in)g(order)f(to)i(create)e(such)h(nodes)g(at)h(all.)p
+Black 396 1442 a Ft(\225)p Black 60 w Fq(~pinstr_mapping)e
+Fv(or)g Fq(~pinstr_alist)p Fv(:)g(Map)h(the)g(tar)o(get)g(names)f(of)h
+(processing)f(instructions)g(to)479 1550 y(e)o(x)o(emplars.)g(These)h
+(mappings)e(are)i(only)g(used)g(when)f(nodes)h(for)f(processing)g
+(instructions)g(are)h(created.)p Black 396 1658 a Ft(\225)p
+Black 60 w Fq(~data_exemplar)p Fv(:)f(The)h(e)o(x)o(emplar)e(for)h
+(ordinary)f(data)i(nodes.)p Black 396 1766 a Ft(\225)p
+Black 60 w Fq(~default_element_exemplar)p Fv(:)d(This)j(e)o(x)o(emplar)
+e(is)k(used)e(if)g(an)g(element)g(node)f(must)h(be)g(created,)f(b)n(ut)
+i(the)479 1874 y(element)f(type)g(cannot)f(be)h(found)e(in)j(the)f
+(tables)g Fq(element_mapping)e Fv(or)i Fq(element_alist)p
+Fv(.)p Black 396 1982 a Ft(\225)p Black 60 w Fq(~element_mapping)e
+Fv(or)i Fq(~element_alist)p Fv(:)f(Map)h(the)g(element)f(types)h(to)h
+(e)o(x)o(emplars.)d(These)i(mappings)f(are)479 2090 y(used)h(to)h
+(create)e(element)h(nodes.)396 2239 y(In)g(most)g(cases,)h(you)e(only)h
+(w)o(ant)g(to)g(create)g Fq(spec)g Fv(v)n(alues)g(to)h(pass)f(them)g
+(to)g(the)h(parser)e(functions)g(found)f(in)396 2347
+y Fq(Pxp_yacc)p Fv(.)h(Ho)n(we)n(v)o(er)m(,)f(it)j(might)f(be)g(useful)
+g(to)g(apply)f Fq(spec)h Fv(v)n(alues)g(directly)-5 b(.)396
+2497 y(The)20 b(follo)n(wing)f(functions)f(create)i(v)n(arious)f(types)
+h(of)g(nodes)g(by)g(selecting)f(the)i(corresponding)16
+b(e)o(x)o(emplar)j(from)g(the)396 2605 y(passed)h Fq(spec)g
+Fv(v)n(alue,)g(and)f(by)h(calling)g Fq(create_element)e
+Fv(or)i Fq(create_data)f Fv(on)h(the)g(e)o(x)o(emplar)-5
+b(.)396 2785 y Fq(val)45 b(create_data_node)d(:)665 2882
+y('ext)i(spec)h(->)665 2979 y(dtd)g(->)665 3076 y(\(*)g(data)f
+(material:)f(*\))i(string)f(->)845 3173 y('ext)g(node)396
+3368 y(val)h(create_element_node)c(:)665 3465 y(?position:\(string)h(*)
+j(int)f(*)h(int\))f(->)665 3562 y('ext)g(spec)h(->)665
+3659 y(dtd)g(->)665 3756 y(\(*)g(element)e(type:)h(*\))h(string)f(->)
+665 3853 y(\(*)h(attributes:)e(*\))h(\(string)g(*)h(string\))e(list)h
+(->)845 3950 y('ext)g(node)396 4145 y(val)h(create_super_root_node)c(:)
+665 4242 y(?position:\(string)h(*)j(int)f(*)h(int\))f(->)665
+4339 y('ext)g(spec)h(->)710 4436 y(dtd)f(->)889 4533
+y('ext)h(node)396 4728 y(val)g(create_comment_node)c(:)665
+4825 y(?position:\(string)h(*)j(int)f(*)h(int\))f(->)p
+Black 3800 5278 a Fr(59)p Black eop
+%%Page: 60 60
+60 59 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 665 579 a Fq('ext)44
+b(spec)h(->)665 676 y(dtd)g(->)665 773 y(\(*)g(comment)e(text:)h(*\))h
+(string)f(->)845 870 y('ext)g(node)396 1065 y(val)h(create_pinstr_node)
+c(:)665 1162 y(?position:\(string)h(*)j(int)f(*)h(int\))f(->)665
+1259 y('ext)g(spec)h(->)665 1356 y(dtd)g(->)665 1453
+y(proc_instruction)d(->)845 1550 y('ext)i(node)-2 2003
+y Fp(3.2.6.)35 b(Examples)396 2171 y Fu(Building)22 b(tr)o(ees.)d
+Fv(Here)h(is)h(the)g(piece)e(of)h(code)g(that)g(creates)g(the)h(tree)f
+(of)g(the)g(\002gure)f Fr(A)i(tr)m(ee)g(with)f(element)g(nodes,)396
+2279 y(data)g(nodes,)f(and)g(attrib)n(utes)p Fv(.)h(The)g(e)o(xtension)
+f(object)h(and)f(the)h(DTD)h(are)f(be)o(yond)e(the)i(scope)g(of)g(this)
+g(e)o(xample.)396 2459 y Fq(let)45 b(exemplar_ext)d(=)j(...)f(\(*)h
+(some)f(extension)f(*\))i(in)396 2556 y(let)g(dtd)f(=)h(...)f(\(*)g
+(some)h(DTD)f(*\))g(in)396 2750 y(let)h(element_exemplar)d(=)i(new)h
+(element_impl)e(exemplar_ext)f(in)396 2847 y(let)j(data_exemplar)177
+b(=)44 b(new)h(data_impl)178 b(exemplar_ext)42 b(in)396
+3042 y(let)j(a1)f(=)h(element_exemplar)d(#)j(cre-)396
+3139 y(ate_element)e(dtd)i(\(T_element)e("a"\))h(["att",)g("apple"])396
+3236 y(and)h(b1)f(=)h(element_exemplar)d(#)j(create_element)d(dtd)i
+(\(T_element)g("b"\))g([])396 3333 y(and)h(c1)f(=)h(element_exemplar)d
+(#)j(create_element)d(dtd)i(\(T_element)g("c"\))g([])396
+3430 y(and)h(a2)f(=)h(element_exemplar)d(#)j(cre-)396
+3527 y(ate_element)e(dtd)i(\(T_element)e("a"\))h(["att",)g("orange"])
+396 3624 y(in)396 3819 y(let)h(cherries)e(=)i(data_exemplar)d(#)j
+(create_data)e(dtd)h("Cherries")g(in)396 3916 y(let)h(orange)133
+b(=)45 b(data_exemplar)d(#)j(create_data)e(dtd)h("An)h(orange")e(in)396
+4110 y(a1)i(#)f(add_node)g(b1;)396 4207 y(a1)h(#)f(add_node)g(c1;)396
+4304 y(b1)h(#)f(add_node)g(a2;)396 4401 y(b1)h(#)f(add_node)g
+(cherries;)396 4499 y(a2)h(#)f(add_node)g(orange;)396
+4689 y Fv(Alternati)n(v)o(ely)-5 b(,)18 b(the)i(last)h(block)f(of)g
+(statements)g(could)f(also)i(be)f(written)g(as:)396 4870
+y Fq(a1)45 b(#)f(set_nodes)g([b1;)g(c1];)p Black 3800
+5278 a Fr(60)p Black eop
+%%Page: 61 61
+61 60 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fq(b1)45
+b(#)f(set_nodes)g([a2;)g(cherries];)396 676 y(a2)h(#)f(set_nodes)g
+([orange];)396 867 y Fv(The)20 b(root)g(of)g(the)g(tree)g(is)h
+Fq(a1)p Fv(,)f(i.e.)g(it)h(is)g(true)f(that)396 1047
+y Fq(x)45 b(#)g(root)f(==)g(a1)396 1238 y Fv(for)20 b(e)n(v)o(ery)f(x)h
+(from)f({)i Fq(a1)p Fv(,)f Fq(a2)p Fv(,)g Fq(b1)p Fv(,)g
+Fq(c1)p Fv(,)g Fq(cherries)p Fv(,)g Fq(orange)f Fv(}.)396
+1388 y(Furthermore,)f(the)i(follo)n(wing)f(properties)f(hold:)486
+1568 y Fq(a1)44 b(#)h(attribute)e("att")h(=)h(Value)f("apple")396
+1665 y(&)h(a2)f(#)h(attribute)e("att")h(=)h(Value)f("orange")396
+1859 y(&)h(cherries)e(#)i(data)f(=)h("Cherries")396 1956
+y(&)135 b(orange)43 b(#)i(data)f(=)h("An)f(orange")396
+2053 y(&)314 b(a1)44 b(#)h(data)f(=)h("CherriesAn)e(orange")396
+2248 y(&)314 b(a1)44 b(#)h(node_type)e(=)i(T_element)e("a")396
+2345 y(&)314 b(a2)44 b(#)h(node_type)e(=)i(T_element)e("a")396
+2442 y(&)314 b(b1)44 b(#)h(node_type)e(=)i(T_element)e("b")396
+2539 y(&)314 b(c1)44 b(#)h(node_type)e(=)i(T_element)e("c")396
+2636 y(&)i(cherries)e(#)i(node_type)e(=)i(T_data)396
+2733 y(&)135 b(orange)43 b(#)i(node_type)e(=)i(T_data)396
+2928 y(&)314 b(a1)44 b(#)h(sub_nodes)e(=)i([)g(b1;)f(c1)h(])396
+3025 y(&)314 b(a2)44 b(#)h(sub_nodes)e(=)i([)g(orange)f(])396
+3122 y(&)314 b(b1)44 b(#)h(sub_nodes)e(=)i([)g(a2;)f(cherries)g(])396
+3219 y(&)314 b(c1)44 b(#)h(sub_nodes)e(=)i([])396 3316
+y(&)g(cherries)e(#)i(sub_nodes)e(=)i([])396 3413 y(&)135
+b(orange)43 b(#)i(sub_nodes)e(=)i([])396 3608 y(&)314
+b(a2)44 b(#)h(parent)f(==)g(a1)396 3705 y(&)314 b(b1)44
+b(#)h(parent)f(==)g(b1)396 3802 y(&)314 b(c1)44 b(#)h(parent)f(==)g(a1)
+396 3899 y(&)h(cherries)e(#)i(parent)f(==)g(b1)396 3996
+y(&)135 b(orange)43 b(#)i(parent)f(==)g(a2)396 4229 y
+Fu(Sear)o(ching)19 b(nodes.)g Fv(The)g(follo)n(wing)e(function)h
+(searches)h(all)g(nodes)g(of)g(a)g(tree)h(for)e(which)h(a)g(certain)g
+(condition)e(holds:)396 4409 y Fq(let)45 b(rec)f(search)g(p)g(t)h(=)486
+4506 y(if)f(p)h(t)g(then)576 4603 y(t)f(::)h(search_list)e(p)h(\(t)h(#)
+g(sub_nodes\))486 4700 y(else)576 4797 y(search_list)e(p)h(\(t)h(#)f
+(sub_nodes\))p Black 3800 5278 a Fr(61)p Black eop
+%%Page: 62 62
+62 61 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 676 a Fq(and)45
+b(search_list)e(p)h(l)h(=)486 773 y(match)f(l)h(with)576
+870 y([])268 b(-)p Fo(>)45 b Fq([])486 967 y(|)g(t)f(::)h(l')f(-)p
+Fo(>)h Fq(\(search)e(p)i(t\))f(@)h(\(search_list)e(p)i(l'\))396
+1065 y(;;)396 1297 y Fv(F)o(or)20 b(e)o(xample,)f(if)h(you)f(w)o(ant)i
+(to)f(search)g(all)h(elements)f(of)f(a)i(certain)f(type)f
+Fq(et)p Fv(,)i(the)f(function)e Fq(search)i Fv(can)g(be)g(applied)396
+1405 y(as)h(follo)n(ws:)396 1585 y Fq(let)45 b(search_element_type)c
+(et)k(t)f(=)486 1682 y(search)g(\(fun)g(x)h(-)p Fo(>)f
+Fq(x)h(#)f(node_type)g(=)g(T_element)g(et\))g(t)396 1779
+y(;;)396 2012 y Fu(Getting)20 b(attrib)n(ute)f(v)o(alues.)h
+Fv(Suppose)f(we)i(ha)n(v)o(e)f(the)g(declaration:)396
+2192 y Fq()396
+2577 y Fv(In)20 b(this)h(case,)f(e)n(v)o(ery)f(element)h
+Fq(e)g Fv(must)h(ha)n(v)o(e)e(an)h(attrib)n(ute)g Fq(a)p
+Fv(,)g(otherwise)g(the)g(parser)g(w)o(ould)f(indicate)h(an)g(error)-5
+b(.)19 b(If)h(the)396 2685 y(O'Caml)h(v)n(ariable)e Fq(n)h
+Fv(holds)g(the)g(node)f(of)h(the)g(tree)h(corresponding)16
+b(to)21 b(the)f(element,)f(you)g(can)h(get)h(the)f(v)n(alue)f(of)h(the)
+396 2793 y(attrib)n(ute)g Fq(a)h Fv(by)396 2973 y Fq(let)45
+b(value_of_a)e(=)h(n)h(#)g(required_string_attribute)40
+b("a")396 3164 y Fv(which)20 b(is)h(more)e(or)h(less)i(an)e(abbre)n
+(viation)d(for)396 3344 y Fq(let)45 b(value_of_a)e(=)486
+3442 y(match)h(n)h(#)f(attribute)g("a")g(with)576 3539
+y(Value)g(s)g(->)h(s)486 3636 y(|)g(_)313 b(->)45 b(assert)f(false)396
+3827 y Fv(-)21 b(as)g(the)f(attrib)n(ute)g(is)h(required,)d(the)i
+Fq(attribute)f Fv(method)g(al)o(w)o(ays)i(returns)e(a)i
+Fq(Value)p Fv(.)396 3976 y(In)f(contrast)g(to)g(this,)h(the)f(attrib)n
+(ute)g Fq(b)g Fv(can)g(be)g(omitted.)g(In)f(this)i(case,)g(the)f
+(method)396 4084 y Fq(required_string_attribute)d Fv(w)o(orks)j(only)f
+(if)h(the)h(attrib)n(ute)f(is)h(there,)e(and)h(the)g(method)f(will)i(f)
+o(ail)f(if)h(the)396 4192 y(attrib)n(ute)f(is)h(missing.)f(T)-7
+b(o)20 b(get)h(the)f(v)n(alue,)f(you)g(can)h(apply)g(the)g(method)f
+Fq(optional_string_attribute)p Fv(:)396 4372 y Fq(let)45
+b(value_of_b)e(=)h(n)h(#)g(optional_string_attribute)40
+b("b")396 4563 y Fv(No)n(w)-5 b(,)20 b Fq(value_of_b)f
+Fv(is)i(of)f(type)g Fq(string)43 b(option)p Fv(,)20 b(and)f
+Fq(None)i Fv(represents)e(the)h(omitted)g(attrib)n(ute.)f(Alternati)n
+(v)o(ely)-5 b(,)396 4671 y(you)20 b(could)f(also)h(use)h
+Fq(attribute)p Fv(:)396 4851 y Fq(let)45 b(value_of_b)e(=)p
+Black 3800 5278 a Fr(62)p Black eop
+%%Page: 63 63
+63 62 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 486 579 a Fq(match)44
+b(n)h(#)f(attribute)g("b")g(with)576 676 y(Value)g(s)313
+b(->)45 b(Some)f(s)486 773 y(|)h(Implied_value)d(->)j(None)486
+870 y(|)g(_)582 b(->)45 b(assert)f(false)396 1103 y Fv(The)20
+b(attrib)n(ute)g Fq(c)h Fv(beha)n(v)o(es)e(much)g(lik)o(e)h
+Fq(a)p Fv(,)h(because)e(it)i(has)g(al)o(w)o(ays)f(a)h(v)n(alue.)e(If)h
+(the)g(attrib)n(ute)g(is)h(omitted,)f(the)g(def)o(ault,)396
+1211 y(here)g("12345",)e(will)j(be)f(returned)e(instead.)i(Because)g
+(of)g(this,)h(you)e(can)h(again)f(use)396 1319 y Fq
+(required_string_attribute)e Fv(to)j(get)g(the)h(v)n(alue.)396
+1468 y(The)f(type)g Fq(CDATA)g Fv(is)h(the)f(most)g(general)f(string)h
+(type.)g(The)g(types)g Fq(NMTOKEN)p Fv(,)f Fq(ID)p Fv(,)h
+Fq(IDREF)p Fv(,)g Fq(ENTITY)p Fv(,)f(and)h(all)396 1576
+y(enumerators)e(and)i(notations)f(are)h(special)h(forms)e(of)h(string)g
+(types)g(that)g(restrict)g(the)h(possible)f(v)n(alues.)f(From)396
+1684 y(O'Caml,)h(the)o(y)g(beha)n(v)o(e)f(lik)o(e)h Fq(CDATA)p
+Fv(,)g(i.e.)g(you)f(can)h(use)h(the)f(methods)f Fq
+(required_string_attribute)e Fv(and)396 1792 y Fq
+(optional_string_attribute)p Fv(,)g(too.)396 1941 y(In)j(contrast)g(to)
+g(this,)h(the)f(types)g Fq(NMTOKENS)p Fv(,)f Fq(IDREFS)p
+Fv(,)g(and)h Fq(ENTITIES)g Fv(mean)f(lists)j(of)e(strings.)g(Suppose)f
+(we)h(ha)n(v)o(e)396 2049 y(the)g(declaration:)396 2229
+y Fq()396 2517 y Fv(The)20
+b(type)g Fq(NMTOKENS)f Fv(stands)i(for)e(lists)j(of)e(space-separated)e
+(tok)o(ens;)i(for)f(e)o(xample)g(the)h(v)n(alue)g Fq("1)44
+b(abc)h(23ef")396 2625 y Fv(means)20 b(the)g(list)i Fq(["1";)44
+b("abc";)f("23ef"])p Fv(.)20 b(\(Again,)e Fq(IDREFS)i
+Fv(and)g Fq(ENTITIES)f Fv(ha)n(v)o(e)h(more)f(restricted)h(v)n
+(alues.\))396 2733 y(T)-7 b(o)21 b(get)f(the)g(v)n(alue)g(of)f(attrib)n
+(ute)h Fq(d)p Fv(,)h(one)e(can)h(use)396 2913 y Fq(let)45
+b(value_of_d)e(=)h(n)h(#)g(required_list_attribute)c("d")396
+3104 y Fv(or)396 3285 y Fq(let)k(value_of_d)e(=)486 3382
+y(match)h(n)h(#)f(attribute)g("d")g(with)576 3479 y(Valuelist)f(l)i(->)
+f(l)486 3576 y(|)h(_)493 b(->)44 b(assert)g(false)396
+3767 y Fv(As)21 b Fq(d)g Fv(is)g(required,)d(the)i(attrib)n(ute)g
+(cannot)f(be)h(omitted,)g(and)f(the)h Fq(attribute)g
+Fv(method)e(returns)i(al)o(w)o(ays)g(a)396 3875 y Fq(Valuelist)p
+Fv(.)396 4024 y(F)o(or)g(optional)f(attrib)n(utes)h(lik)o(e)h
+Fq(e)p Fv(,)f(apply)396 4204 y Fq(let)45 b(value_of_e)e(=)h(n)h(#)g
+(optional_list_attribute)c("e")396 4395 y Fv(or)396 4576
+y Fq(let)k(value_of_e)e(=)486 4673 y(match)h(n)h(#)f(attribute)g("e")g
+(with)576 4770 y(Valuelist)f(l)134 b(->)45 b(l)486 4867
+y(|)g(Implied_value)d(->)j([])p Black 3800 5278 a Fr(63)p
+Black eop
+%%Page: 64 64
+64 63 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 486 579 a Fq(|)45
+b(_)582 b(->)45 b(assert)f(false)396 770 y Fv(Here,)20
+b(the)g(case)h(that)f(the)g(attrib)n(ute)g(is)h(missing)f(counts)g(lik)
+o(e)g(the)h(empty)e(list.)-2 1139 y Fp(3.2.7.)35 b(Iterator)n(s)396
+1307 y Fv(There)20 b(are)g(also)g(se)n(v)o(eral)g(iterators)g(in)g
+(Pxp_document;)d(please)j(see)h(the)f(mli)h(\002le)f(for)g(details.)g
+(Y)-9 b(ou)20 b(can)g(\002nd)396 1415 y(e)o(xamples)f(for)h(them)g(in)g
+(the)g("simple_transformation")d(directory)-5 b(.)396
+1595 y Fq(val)45 b(find)f(:)g(?deeply:bool)f(->)889 1692
+y(f:\('ext)h(node)g(->)h(bool\))f(->)g('ext)g(node)h(->)f('ext)g(node)
+396 1887 y(val)h(find_all)e(:)i(?deeply:bool)e(->)1069
+1984 y(f:\('ext)g(node)i(->)f(bool\))g(->)h('ext)f(node)g(->)g('ext)h
+(node)f(list)396 2178 y(val)h(find_element)d(:)j(?deeply:bool)e(->)1248
+2275 y(string)h(->)g('ext)h(node)f(->)g('ext)g(node)396
+2469 y(val)h(find_all_elements)d(:)i(?deeply:bool)f(->)1472
+2567 y(string)h(->)h('ext)f(node)g(->)g('ext)h(node)f(list)396
+2761 y(exception)g(Skip)396 2858 y(val)h(map_tree)e(:)90
+b(pre:\('exta)43 b(node)h(->)g('extb)g(node\))g(->)1069
+2955 y(?post:\('extb)f(node)h(->)g('extb)g(node\))g(->)1069
+3052 y('exta)g(node)g(->)1248 3149 y('extb)g(node)396
+3441 y(val)h(map_tree_sibl)d(:)755 3538 y(pre:)i(\('exta)g(node)g
+(option)g(->)g('exta)g(node)h(->)f('exta)g(node)g(option)g(->)1203
+3635 y('extb)g(node\))g(->)710 3732 y(?post:\('extb)f(node)h(option)g
+(->)g('extb)g(node)h(->)f('extb)g(node)g(option)g(->)1203
+3829 y('extb)g(node\))g(->)710 3927 y('exta)g(node)g(->)889
+4024 y('extb)g(node)396 4218 y(val)h(iter_tree)e(:)i(?pre:\('ext)e
+(node)h(->)g(unit\))g(->)1114 4315 y(?post:\('ext)f(node)h(->)g(unit\))
+g(->)1114 4412 y('ext)g(node)g(->)1293 4509 y(unit)396
+4704 y(val)h(iter_tree_sibl)d(:)710 4801 y(?pre:)i(\('ext)g(node)g
+(option)g(->)h('ext)f(node)g(->)g('ext)h(node)f(option)g(->)g(unit\))g
+(->)p Black 3800 5278 a Fr(64)p Black eop
+%%Page: 65 65
+65 64 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 710 579 a Fq(?post:\('ext)43
+b(node)h(option)g(->)h('ext)f(node)g(->)g('ext)h(node)f(option)g(->)g
+(unit\))g(->)710 676 y('ext)g(node)g(->)889 773 y(unit)-2
+1358 y Fx(3.3.)39 b(The)g(c)m(lass)g(type)g Fb(extension)396
+1610 y Fq(class)44 b(type)g([)h('node)f(])h(extension)e(=)486
+1707 y(object)h(\('self\))576 1804 y(method)f(clone)h(:)h('self)665
+1901 y(\(*)g("clone")e(should)h(return)g(an)h(exact)f(deep)g(copy)g(of)
+g(the)h(object.)e(*\))576 1998 y(method)g(node)i(:)f('node)665
+2095 y(\(*)h("node")f(returns)f(the)i(corresponding)d(node)i(of)h(this)
+f(extension.)f(This)h(method)710 2193 y(*)h(intended)e(to)i(return)f
+(exactly)f(what)h(previ-)396 2290 y(ously)g(has)h(been)f(set)g(by)h
+("set_node".)710 2387 y(*\))576 2484 y(method)e(set_node)h(:)h('node)f
+(->)g(unit)665 2581 y(\(*)h("set_node")e(is)h(invoked)g(once)g(the)h
+(extension)e(is)h(associated)g(to)g(a)h(new)710 2678
+y(*)g(node)f(object.)710 2775 y(*\))486 2873 y(end)396
+3063 y Fv(This)21 b(is)g(the)f(type)g(of)g(classes)h(used)f(for)f(node)
+h(e)o(xtensions.)e(F)o(or)i(e)n(v)o(ery)f(node)g(of)h(the)g(document)e
+(tree,)i(there)g(is)h(not)396 3171 y(only)f(the)g Fq(node)g
+Fv(object,)f(b)n(ut)h(also)g(an)g Fq(extension)f Fv(object.)h(The)f
+(latter)i(has)f(minimal)f(functionality;)f(it)j(has)f(only)g(the)396
+3279 y(necessary)g(methods)f(to)h(be)g(attached)g(to)g(the)g(node)f
+(object)h(containing)e(the)j(details)f(of)g(the)g(node)f(instance.)h
+(The)396 3387 y(e)o(xtension)f(object)h(is)h(called)f(e)o(xtension)f
+(because)g(its)i(purpose)e(is)i(e)o(xtensibility)-5 b(.)396
+3537 y(F)o(or)20 b(some)g(reasons,)g(it)h(is)g(impossible)e(to)i(deri)n
+(v)o(e)d(the)j Fq(node)f Fv(classes)h(\(i.e.)f Fq(element_impl)f
+Fv(and)g Fq(data_impl)p Fv(\))g(such)396 3645 y(that)i(the)f
+(subclasses)g(can)g(be)g(e)o(xtended)f(by)g(ne)n(w)h(ne)n(w)g(methods.)
+f(But)i(subclassing)f(nodes)f(is)i(a)g(great)f(feature,)396
+3753 y(because)g(it)h(allo)n(ws)f(the)g(user)g(to)h(pro)o(vide)d(dif)n
+(ferent)g(classes)k(for)d(dif)n(ferent)g(types)h(of)g(nodes.)f(The)h(e)
+o(xtension)f(objects)396 3860 y(are)h(a)h(w)o(orkaround)c(that)j(is)i
+(as)e(po)n(werful)f(as)i(direct)f(subclassing,)f(the)h(costs)h(are)f
+(some)g(notation)f(o)o(v)o(erhead.)p Black 3800 5278
+a Fr(65)p Black eop
+%%Page: 66 66
+66 65 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fu(Figur)o(e)g(3-6.)f
+(The)i(structur)o(e)f(of)g(nodes)g(and)h(extensions)396
+1928 y
+ currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
+ 396 1928 a @beginspecial 0 @llx 0 @lly 206 @urx
+140 @ury 2060 @rwi @setspecial
+%%BeginDocument: pic/extension_general.ps
+%!PS-Adobe-2.0 EPSF-2.0
+%%Title: src/pic/extension_general.fig
+%%Creator: fig2dev Version 3.2 Patchlevel 1
+%%CreationDate: Sun Aug 27 02:05:42 2000
+%%For: gerd@ice (Gerd Stolpmann)
+%%Orientation: Portrait
+%%BoundingBox: 0 0 206 140
+%%Pages: 0
+%%BeginSetup
+%%EndSetup
+%%Magnification: 0.8000
+%%EndComments
+/$F2psDict 200 dict def
+$F2psDict begin
+$F2psDict /mtrx matrix put
+/col-1 {0 setgray} bind def
+/col0 {0.000 0.000 0.000 srgb} bind def
+/col1 {0.000 0.000 1.000 srgb} bind def
+/col2 {0.000 1.000 0.000 srgb} bind def
+/col3 {0.000 1.000 1.000 srgb} bind def
+/col4 {1.000 0.000 0.000 srgb} bind def
+/col5 {1.000 0.000 1.000 srgb} bind def
+/col6 {1.000 1.000 0.000 srgb} bind def
+/col7 {1.000 1.000 1.000 srgb} bind def
+/col8 {0.000 0.000 0.560 srgb} bind def
+/col9 {0.000 0.000 0.690 srgb} bind def
+/col10 {0.000 0.000 0.820 srgb} bind def
+/col11 {0.530 0.810 1.000 srgb} bind def
+/col12 {0.000 0.560 0.000 srgb} bind def
+/col13 {0.000 0.690 0.000 srgb} bind def
+/col14 {0.000 0.820 0.000 srgb} bind def
+/col15 {0.000 0.560 0.560 srgb} bind def
+/col16 {0.000 0.690 0.690 srgb} bind def
+/col17 {0.000 0.820 0.820 srgb} bind def
+/col18 {0.560 0.000 0.000 srgb} bind def
+/col19 {0.690 0.000 0.000 srgb} bind def
+/col20 {0.820 0.000 0.000 srgb} bind def
+/col21 {0.560 0.000 0.560 srgb} bind def
+/col22 {0.690 0.000 0.690 srgb} bind def
+/col23 {0.820 0.000 0.820 srgb} bind def
+/col24 {0.500 0.190 0.000 srgb} bind def
+/col25 {0.630 0.250 0.000 srgb} bind def
+/col26 {0.750 0.380 0.000 srgb} bind def
+/col27 {1.000 0.500 0.500 srgb} bind def
+/col28 {1.000 0.630 0.630 srgb} bind def
+/col29 {1.000 0.750 0.750 srgb} bind def
+/col30 {1.000 0.880 0.880 srgb} bind def
+/col31 {1.000 0.840 0.000 srgb} bind def
+
+end
+save
+-22.0 205.0 translate
+1 -1 scale
+
+/cp {closepath} bind def
+/ef {eofill} bind def
+/gr {grestore} bind def
+/gs {gsave} bind def
+/sa {save} bind def
+/rs {restore} bind def
+/l {lineto} bind def
+/m {moveto} bind def
+/rm {rmoveto} bind def
+/n {newpath} bind def
+/s {stroke} bind def
+/sh {show} bind def
+/slc {setlinecap} bind def
+/slj {setlinejoin} bind def
+/slw {setlinewidth} bind def
+/srgb {setrgbcolor} bind def
+/rot {rotate} bind def
+/sc {scale} bind def
+/sd {setdash} bind def
+/ff {findfont} bind def
+/sf {setfont} bind def
+/scf {scalefont} bind def
+/sw {stringwidth} bind def
+/tr {translate} bind def
+/tnt {dup dup currentrgbcolor
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
+ bind def
+/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
+ 4 -2 roll mul srgb} bind def
+ /DrawEllipse {
+ /endangle exch def
+ /startangle exch def
+ /yrad exch def
+ /xrad exch def
+ /y exch def
+ /x exch def
+ /savematrix mtrx currentmatrix def
+ x y tr xrad yrad sc 0 0 1 startangle endangle arc
+ closepath
+ savematrix setmatrix
+ } def
+
+/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
+/$F2psEnd {$F2psEnteredState restore end} def
+%%EndProlog
+
+$F2psBegin
+10 setmiterlimit
+n -1000 5050 m -1000 -1000 l 5514 -1000 l 5514 5050 l cp clip
+ 0.05039 0.05039 sc
+7.500 slw
+% Ellipse
+n 1575 2250 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 1575 3375 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 675 3375 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2475 3375 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3600 2475 180 180 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2880 2475 180 180 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 4320 2475 186 186 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3600 1485 186 186 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Polyline
+n 675 3150 m 1395 2385 l gs col0 s gr
+% Polyline
+n 1575 2475 m 1575 3150 l gs col0 s gr
+% Polyline
+n 1755 2385 m 2475 3150 l gs col0 s gr
+% Polyline
+ [60] 0 sd
+gs clippath
+3288 1467 m 3412 1462 l 3305 1524 l 3435 1487 l 3418 1429 l cp
+clip
+n 1537 2010 m 3412 1462 l gs col0 s gr gr
+ [] 0 sd
+% arrowhead
+n 3288 1467 m 3412 1462 l 3305 1524 l col0 s
+% Polyline
+ [60] 0 sd
+gs clippath
+1796 2042 m 1672 2047 l 1779 1984 l 1649 2022 l 1666 2080 l cp
+clip
+n 3412 1537 m 1672 2047 l gs col0 s gr gr
+ [] 0 sd
+% arrowhead
+n 1796 2042 m 1672 2047 l 1779 1984 l col0 s
+% Polyline
+ [60] 0 sd
+gs clippath
+2584 2524 m 2707 2512 l 2604 2581 l 2731 2535 l 2711 2479 l cp
+933 3183 m 810 3195 l 913 3126 l 786 3172 l 806 3228 l cp
+clip
+n 810 3195 m 2707 2512 l gs col0 s gr gr
+ [] 0 sd
+% arrowhead
+n 933 3183 m 810 3195 l 913 3126 l col0 s
+% arrowhead
+n 2584 2524 m 2707 2512 l 2604 2581 l col0 s
+% Polyline
+ [60] 0 sd
+gs clippath
+3319 2594 m 3442 2580 l 3340 2650 l 3467 2603 l 3446 2547 l cp
+1863 3203 m 1740 3217 l 1842 3147 l 1715 3194 l 1736 3250 l cp
+clip
+n 1740 3217 m 3442 2580 l gs col0 s gr gr
+ [] 0 sd
+% arrowhead
+n 1863 3203 m 1740 3217 l 1842 3147 l col0 s
+% arrowhead
+n 3319 2594 m 3442 2580 l 3340 2650 l col0 s
+% Polyline
+ [60] 0 sd
+gs clippath
+4054 2626 m 4177 2610 l 4076 2682 l 4202 2632 l 4180 2577 l cp
+2763 3194 m 2640 3210 l 2741 3138 l 2615 3188 l 2637 3243 l cp
+clip
+n 2640 3210 m 4177 2610 l gs col0 s gr gr
+ [] 0 sd
+% arrowhead
+n 2763 3194 m 2640 3210 l 2741 3138 l col0 s
+% arrowhead
+n 4054 2626 m 4177 2610 l 4076 2682 l col0 s
+/Courier-Bold ff 180.00 scf sf
+3555 1530 m
+gs 1 -1 sc (x) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+1530 2295 m
+gs 1 -1 sc (n) col0 sh gr
+/Courier ff 180.00 scf sf
+1658 1950 m
+gs 1 -1 sc 17.0 rot (n # extension) col0 sh gr
+/Courier ff 180.00 scf sf
+2475 1950 m
+gs 1 -1 sc 17.0 rot (x # node) col0 sh gr
+/Helvetica ff 180.00 scf sf
+1020 4050 m
+gs 1 -1 sc (The node tree) col0 sh gr
+/Helvetica ff 180.00 scf sf
+3225 3285 m
+gs 1 -1 sc (The extensions) col0 sh gr
+$F2psEnd
+rs
+
+%%EndDocument
+ @endspecial 396 1928 a
+ currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
+neg exch translate
+ 396 1928 a 357 x Fv(The)f(picture)f(sho)n(ws)i
+(ho)n(w)e(the)i(nodes)e(and)h(e)o(xtensions)f(are)h(link)o(ed)f
+(together)-5 b(.)19 b(Ev)o(ery)g(node)g(has)i(a)f(reference)f(to)h(its)
+396 2393 y(e)o(xtension,)f(and)g(e)n(v)o(ery)g(e)o(xtension)g(has)h(a)h
+(reference)d(to)j(its)g(node.)e(The)h(methods)f Fq(extension)g
+Fv(and)h Fq(node)g Fv(follo)n(w)396 2501 y(these)h(references;)e(a)h
+(typical)g(phrase)f(is)396 2681 y Fq(self)44 b(#)h(node)f(#)h
+(attribute)e("xy")396 2872 y Fv(to)21 b(get)f(the)g(v)n(alue)g(of)f(an)
+i(attrib)n(ute)e(from)h(a)g(method)f(de\002ned)g(in)h(the)h(e)o
+(xtension)d(object;)i(or)396 3053 y Fq(self)44 b(#)h(node)f(#)h(iter)
+486 3150 y(\(fun)f(n)h(-)p Fo(>)f Fq(n)h(#)f(extension)g(#)g(my_method)
+g(...\))396 3341 y Fv(to)21 b(iterate)f(o)o(v)o(er)f(the)h(subnodes)f
+(and)g(to)i(call)f Fq(my_method)f Fv(of)h(the)h(corresponding)16
+b(e)o(xtension)j(objects.)396 3490 y(Note)h(that)h(e)o(xtension)d
+(objects)i(do)g(not)g(ha)n(v)o(e)g(references)e(to)j(subnodes)e(\(or)g
+("sube)o(xtensions"\))f(themselv)o(es;)h(in)i(order)396
+3598 y(to)g(get)f(one)f(of)h(the)h(children)d(of)i(an)g(e)o(xtension)f
+(you)g(must)i(\002rst)g(go)e(to)i(the)f(node)f(object,)h(then)f(get)h
+(the)h(child)e(node,)396 3706 y(and)h(\002nally)g(reach)f(the)i(e)o
+(xtension)d(that)j(is)g(logically)e(the)h(child)g(of)g(the)g(e)o
+(xtension)f(you)g(started)h(with.)-2 4034 y Fp(3.3.1.)35
+b(Ho)n(w)f(to)f(de\002ne)h(an)g(e)n(xtension)i(c)n(lass)396
+4202 y Fv(At)21 b(minimum,)e(you)g(must)h(de\002ne)g(the)g(methods)f
+Fq(clone)p Fv(,)h Fq(node)p Fv(,)g(and)f Fq(set_node)h
+Fv(such)f(that)i(your)e(class)i(is)396 4310 y(compatible)e(with)h(the)h
+(type)e Fq(extension)p Fv(.)g(The)h(method)f Fq(set_node)g
+Fv(is)i(called)f(during)f(the)h(initialization)g(of)g(the)396
+4418 y(node,)f(or)h(after)g(a)h(node)e(has)h(been)g(cloned;)f(the)h
+(node)f(object)h(in)m(v)n(ok)o(es)f Fq(set_node)g Fv(on)h(the)g(e)o
+(xtension)f(object)h(to)g(tell)396 4526 y(it)h(that)f(this)h(node)e(is)
+i(no)n(w)f(the)g(object)g(the)g(e)o(xtension)f(is)i(link)o(ed)f(to.)g
+(The)f(e)o(xtension)g(must)h(return)f(the)i(node)e(object)396
+4633 y(passed)h(as)h(ar)o(gument)d(of)i Fq(set_node)f
+Fv(when)h(the)g Fq(node)g Fv(method)f(is)i(called.)p
+Black 3798 5278 a Fr(66)p Black eop
+%%Page: 67 67
+67 66 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fv(The)g
+Fq(clone)g Fv(method)f(must)h(return)f(a)i(cop)o(y)e(of)h(the)g(e)o
+(xtension)f(object;)h(at)g(least)h(the)f(object)g(itself)h(must)f(be)
+396 687 y(duplicated,)f(b)n(ut)h(if)g(required,)e(the)j(cop)o(y)e
+(should)g(deeply)g(duplicate)g(all)i(objects)f(and)g(v)n(alues)g(that)g
+(are)g(referred)e(by)396 795 y(the)i(e)o(xtension,)f(too.)h(Whether)f
+(this)i(is)g(required,)d(depends)h(on)h(the)g(application;)f
+Fq(clone)h Fv(is)h(in)m(v)n(ok)o(ed)d(by)i(the)g(node)396
+903 y(object)g(when)g(one)f(of)h(its)h(cloning)e(methods)g(is)i
+(called.)396 1052 y(A)g(good)e(starting)h(point)f(for)h(an)g(e)o
+(xtension)e(class:)396 1232 y Fq(class)44 b(custom_extension)e(=)486
+1329 y(object)i(\(self\))576 1524 y(val)g(mutable)g(node)g(=)g(\(None)g
+(:)h(custom_extension)d(node)i(option\))576 1718 y(method)f(clone)h(=)h
+({<)g(>})576 1912 y(method)e(node)i(=)665 2009 y(match)f(node)g(with)
+845 2107 y(None)g(->)934 2204 y(assert)g(false)755 2301
+y(|)h(Some)f(n)g(->)h(n)576 2495 y(method)e(set_node)h(n)h(=)665
+2592 y(node)f(<-)h(Some)f(n)486 2786 y(end)396 2977 y
+Fv(This)21 b(class)g(is)g(compatible)e(with)h Fq(extension)p
+Fv(.)f(The)h(purpose)e(of)i(de\002ning)f(such)h(a)h(class)g(is,)g(of)f
+(course,)f(adding)396 3085 y(further)g(methods;)g(and)h(you)f(can)h(do)
+g(it)h(without)e(restriction.)396 3235 y(Often,)h(you)f(w)o(ant)h(not)g
+(only)g(one)f(e)o(xtension)g(class.)i(In)f(this)h(case,)f(it)h(is)g
+(the)f(simplest)h(w)o(ay)f(that)g(all)h(your)e(classes)i(\(for)396
+3343 y(one)f(kind)f(of)h(document\))e(ha)n(v)o(e)i(the)g(same)g(type)g
+(\(with)g(respect)g(to)g(the)g(interf)o(ace;)g(i.e.)g(it)h(does)f(not)g
+(matter)g(if)g(your)396 3451 y(classes)i(dif)n(fer)d(in)h(the)g
+(de\002ned)f(pri)n(v)n(ate)h(methods)f(and)g(instance)h(v)n(ariables,)f
+(b)n(ut)h(public)g(methods)f(count\).)f(This)396 3559
+y(approach)g(a)n(v)n(oids)i(lots)h(of)f(coercions)f(and)h(problems)e
+(with)j(type)f(incompatibilities.)e(It)j(is)g(simple)f(to)g(implement:)
+396 3739 y Fq(class)44 b(custom_extension)e(=)486 3836
+y(object)i(\(self\))576 3933 y(val)g(mutable)g(node)g(=)g(\(None)g(:)h
+(custom_extension)d(node)i(option\))576 4127 y(method)f(clone)h(=)h
+(...)269 b(\(*)44 b(see)g(above)g(*\))576 4224 y(method)f(node)i(=)f
+(...)314 b(\(*)44 b(see)g(above)g(*\))576 4322 y(method)f(set_node)h(n)
+h(=)f(...)h(\(*)f(see)g(above)g(*\))576 4516 y(method)f(virtual)h
+(my_method1)f(:)i(...)576 4613 y(method)e(virtual)h(my_method2)f(:)i
+(...)576 4710 y(...)f(\(*)g(etc.)h(*\))486 4807 y(end)p
+Black 3797 5278 a Fr(67)p Black eop
+%%Page: 68 68
+68 67 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 676 a Fq(class)44
+b(custom_extension_kind_A)d(=)486 773 y(object)j(\(self\))576
+870 y(inherit)f(custom_extension)576 1065 y(method)g(my_method1)h(=)g
+(...)576 1162 y(method)f(my_method2)h(=)g(...)486 1259
+y(end)396 1453 y(class)g(custom_extension_kind_B)d(=)486
+1550 y(object)j(\(self\))576 1647 y(inherit)f(custom_extension)576
+1842 y(method)g(my_method1)h(=)g(...)576 1939 y(method)f(my_method2)h
+(=)g(...)486 2036 y(end)396 2227 y Fv(If)20 b(a)h(class)g(does)f(not)g
+(need)f(a)i(method)e(\(e.g.)g(because)h(it)h(does)e(not)h(mak)o(e)g
+(sense,)g(or)g(it)h(w)o(ould)f(violate)f(some)396 2335
+y(important)g(condition\),)f(it)j(is)g(possible)f(to)g(de\002ne)g(the)g
+(method)f(and)g(to)i(al)o(w)o(ays)f(raise)h(an)f(e)o(xception)e(when)i
+(the)396 2443 y(method)f(is)i(in)m(v)n(ok)o(ed)e(\(e.g.)g
+Fq(assert)44 b(false)p Fv(\).)396 2592 y(The)20 b(latter)g(is)i(a)e
+(strong)g(recommendation:)c(do)k(not)g(try)g(to)g(further)f(specialize)
+h(the)g(types)g(of)g(e)o(xtension)f(objects.)h(It)g(is)396
+2700 y(dif)n(\002cult,)g(sometimes)g(e)n(v)o(en)f(impossible,)g(and)h
+(almost)g(ne)n(v)o(er)f(w)o(orth-while.)-2 3070 y Fp(3.3.2.)35
+b(Ho)n(w)f(to)f(bind)h(e)n(xtension)h(c)n(lasses)h(to)d(element)i
+(types)396 3237 y Fv(Once)20 b(you)f(ha)n(v)o(e)h(de\002ned)f(your)g(e)
+o(xtension)g(classes,)i(you)e(can)h(bind)g(them)f(to)i(element)e
+(types.)h(The)g(simplest)h(case)f(is)396 3345 y(that)h(you)e(ha)n(v)o
+(e)g(only)h(one)f(class)j(and)d(that)i(this)f(class)h(is)h(to)e(be)g
+(al)o(w)o(ays)h(used.)e(The)h(parsing)f(functions)g(in)h(the)h(module)
+396 3453 y Fq(Pxp_yacc)f Fv(tak)o(e)g(a)h Fq(spec)f Fv(ar)o(gument)d
+(which)j(can)g(be)g(customized.)f(If)h(your)f(single)h(class)h(has)g
+(the)f(name)f Fq(c)p Fv(,)i(this)396 3561 y(ar)o(gument)d(should)h(be)
+396 3741 y Fq(let)45 b(spec)f(=)486 3839 y(make_spec_from_alist)576
+3936 y(~data_exemplar:)535 b(\(new)44 b(data_impl)g(c\))576
+4033 y(~default_element_exemplar:)c(\(new)k(element_impl)f(c\))576
+4130 y(~element_alist:)535 b([])576 4227 y(\(\))396 4418
+y Fv(This)21 b(means)f(that)g(data)g(nodes)f(will)i(be)f(created)g
+(from)f(the)h(e)o(x)o(emplar)e(passed)i(by)g(~data_e)o(x)o(emplar)d
+(and)j(that)g(all)396 4526 y(element)g(nodes)f(will)i(be)f(made)g(from)
+f(the)h(e)o(x)o(emplar)e(speci\002ed)i(by)g(~def)o(ault_element_e)o(x)o
+(emplar)-5 b(.)15 b(In)396 4634 y(~element_alist,)k(you)h(can)g(pass)g
+(that)h(dif)n(ferent)d(e)o(x)o(emplars)h(are)h(to)g(be)g(used)g(for)g
+(dif)n(ferent)e(element)i(types;)g(b)n(ut)g(this)396
+4742 y(is)h(an)g(optional)d(feature.)h(If)h(you)g(do)g(not)f(need)h
+(it,)h(pass)f(the)g(empty)g(list.)p Black 3800 5278 a
+Fr(68)p Black eop
+%%Page: 69 69
+69 68 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fv(Remember)f(that)i(an)f
+(e)o(x)o(emplar)e(is)j(a)g(\(node,)d(e)o(xtension\))h(pair)g(that)i
+(serv)o(es)f(as)h(pattern)e(when)h(ne)n(w)g(nodes)f(\(and)g(the)396
+687 y(corresponding)e(e)o(xtension)i(objects\))g(are)h(added)f(to)i
+(the)f(document)e(tree.)i(In)g(this)h(case,)f(the)g(e)o(x)o(emplar)f
+(contains)g Fq(c)i Fv(as)396 795 y(e)o(xtension,)e(and)g(when)h(nodes)f
+(are)i(created,)e(the)h(e)o(x)o(emplar)e(is)j(cloned,)e(and)h(cloning)f
+(mak)o(es)h(also)g(a)h(cop)o(y)e(of)h Fq(c)h Fv(such)396
+903 y(that)g(all)f(nodes)g(of)g(the)g(document)e(tree)i(will)h(ha)n(v)o
+(e)f(a)g(cop)o(y)g(of)g Fq(c)g Fv(as)h(e)o(xtension.)396
+1052 y(The)f Fq(~element_alist)f Fv(ar)o(gument)e(can)j(bind)g
+(speci\002c)g(element)g(types)g(to)g(speci\002c)g(e)o(x)o(emplars;)f
+(as)i(e)o(x)o(emplars)396 1160 y(may)f(be)g(instances)g(of)g(dif)n
+(ferent)f(classes)i(it)g(is)g(ef)n(fecti)n(v)o(ely)d(possible)i(to)h
+(bind)e(element)h(types)g(to)g(classes.)h(F)o(or)396
+1268 y(e)o(xample,)e(if)h(the)g(element)g(type)g("p")g(is)h
+(implemented)d(by)i(class)h("c_p",)e(and)h("q")g(is)h(realized)f(by)f
+("c_q",)h(you)f(can)396 1376 y(pass)i(the)f(follo)n(wing)f(v)n(alue:)
+396 1556 y Fq(let)45 b(spec)f(=)486 1653 y(make_spec_from_alist)576
+1750 y(~data_exemplar:)535 b(\(new)44 b(data_impl)g(c\))576
+1847 y(~default_element_exemplar:)c(\(new)k(element_impl)f(c\))576
+1945 y(~element_alist:)665 2042 y([)i("p",)f(new)g(element_impl)f(c_p;)
+755 2139 y("q",)h(new)g(element_impl)f(c_q;)665 2236
+y(])576 2333 y(\(\))396 2524 y Fv(The)20 b(e)o(xtension)f(object)h
+Fq(c)g Fv(is)h(still)h(used)e(for)f(all)i(data)f(nodes)f(and)h(for)g
+(all)g(other)g(element)f(types.)-2 3026 y Fx(3.4.)39
+b(Details)f(of)i(the)f(mapping)e(fr)m(om)i(XML)g(te)n(xt)g(to)g(the)g
+(tree)-2 3212 y(representation)-2 3540 y Fp(3.4.1.)c(The)f
+(representation)h(of)e(c)o(haracter)n(-free)h(elements)396
+3708 y Fv(If)20 b(an)g(element)g(declaration)f(does)h(not)f(allo)n(w)i
+(the)f(element)f(to)i(contain)e(character)g(data,)h(the)g(follo)n(wing)
+e(rules)j(apply)-5 b(.)396 3858 y(If)20 b(the)h(element)e(must)h(be)g
+(empty)-5 b(,)19 b(i.e.)h(it)h(is)g(declared)e(with)i(the)f(k)o(e)o(yw)
+o(ord)e Fq(EMPTY)p Fv(,)i(the)g(element)g(instance)g(must)g(be)396
+3965 y(ef)n(fecti)n(v)o(ely)f(empty)g(\(it)h(must)h(not)f(e)n(v)o(en)f
+(contain)g(whitespace)h(characters\).)e(The)i(parser)g(guarantees)e
+(that)j(a)f(declared)396 4073 y Fq(EMPTY)g Fv(element)g(does)g(ne)n(v)o
+(er)f(contain)g(a)h(data)g(node,)f(e)n(v)o(en)g(if)i(the)f(data)g(node)
+f(represents)h(the)g(empty)f(string.)396 4223 y(If)h(the)h(element)e
+(declaration)g(only)g(permits)h(other)f(elements)h(to)h(occur)e(within)
+h(that)g(element)g(b)n(ut)g(not)g(character)396 4331
+y(data,)g(it)h(is)g(still)g(possible)f(to)h(insert)f(whitespace)g
+(characters)f(between)g(the)h(subelements.)f(The)h(parser)g(ignores)f
+(these)396 4439 y(characters,)g(too,)h(and)g(does)f(not)h(create)g
+(data)g(nodes)g(for)f(them.)396 4588 y Fu(Example.)h
+Fv(Consider)g(the)g(follo)n(wing)f(element)g(types:)396
+4768 y Fq()396
+4865 y()p Black 3800
+5278 a Fr(69)p Black eop
+%%Page: 70 70
+70 69 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fq()396 770 y Fv(Only)20 b Fq(x)h Fv(may)e(contain)h
+(character)e(data,)i(the)h(k)o(e)o(yw)o(ord)d Fq(#PCDATA)h
+Fv(indicates)h(this.)h(The)f(other)f(types)h(are)396
+878 y(character)n(-free.)396 1027 y(The)g(XML)g(term)396
+1207 y Fq( )44 b( )396 1398 y Fv(will)21
+b(be)f(internally)f(represented)g(by)g(an)i(element)e(node)g(for)h
+Fq(x)g Fv(with)h(three)f(subnodes:)e(the)j(\002rst)g
+Fq(z)f Fv(element,)g(a)g(data)396 1506 y(node)f(containing)g(the)h
+(space)g(character)m(,)e(and)i(the)g(second)g Fq(z)g
+Fv(element.)g(In)f(contrast)h(to)g(this,)h(the)f(term)396
+1686 y Fq( )44 b( )396 1877 y Fv(is)21 b(represented)e(by)
+h(an)g(element)f(node)g(for)h Fq(y)h Fv(with)f(only)f
+Fr(two)i Fv(subnodes,)e(the)h(tw)o(o)g Fq(z)h Fv(elements.)e(There)h
+(is)h(no)f(data)396 1985 y(node)f(for)h(the)g(space)g(character)f
+(because)h(spaces)g(are)g(ignored)f(in)h(the)g(character)n(-free)e
+(element)i Fq(y)p Fv(.)-2 2355 y Fp(3.4.2.)35 b(The)f(representation)h
+(of)e(c)o(haracter)h(data)396 2523 y Fv(The)20 b(XML)g(speci\002cation)
+g(allo)n(ws)g(all)h(Unicode)e(characters)g(in)i(XML)f(te)o(xts.)g(This)
+g(parser)g(can)g(be)g(con\002gured)e(such)396 2631 y(that)j(UTF-8)e(is)
+i(used)f(to)h(represent)e(the)h(characters)f(internally;)g(ho)n(we)n(v)
+o(er)m(,)f(the)i(def)o(ault)g(character)e(encoding)h(is)396
+2738 y(ISO-8859-1.)e(\(Currently)-5 b(,)18 b(no)i(other)f(encodings)g
+(are)h(possible)g(for)f(the)i(internal)e(string)h(representation;)e
+(the)i(type)396 2846 y Fq(Pxp_types.rep_encoding)d Fv(enumerates)i(the)
+h(possible)g(encodings.)e(Principially)-5 b(,)19 b(the)h(parser)g
+(could)f(use)h(an)o(y)396 2954 y(encoding)e(that)j(is)g
+(ASCII-compatible,)d(b)n(ut)i(there)g(are)g(currently)e(only)i(le)o
+(xical)f(analyzers)h(for)f(UTF-8)h(and)396 3062 y(ISO-8859-1.)d(It)k
+(is)g(currently)d(impossible)i(to)g(use)h(UTF-16)e(or)h(UCS-4)g(as)h
+(internal)f(encodings)e(\(or)i(other)f(multibyte)396
+3170 y(encodings)g(which)g(are)h(not)g(ASCII-compatible\))e(unless)i
+(major)g(parts)g(of)g(the)g(parser)g(are)g(re)n(written)f(-)i(unlik)o
+(ely)-5 b(...\))396 3320 y(The)20 b(internal)g(encoding)e(may)h(be)h
+(dif)n(ferent)f(from)g(the)h(e)o(xternal)f(encoding)f(\(speci\002ed)i
+(in)g(the)g(XML)h(declaration)396 3428 y Fo(<)p Fq(?xml)44
+b(...)g(encoding="..."?)p Fo(>)p Fv(\);)18 b(in)j(this)f(case)h(the)f
+(strings)g(are)g(automatically)f(con)m(v)o(erted)f(to)i(the)g(internal)
+396 3535 y(encoding.)396 3685 y(If)g(the)h(internal)e(encoding)f(is)j
+(ISO-8859-1,)c(it)k(is)g(possible)f(that)g(there)g(are)g(characters)g
+(that)g(cannot)f(be)h(represented.)396 3793 y(In)g(this)h(case,)f(the)g
+(parser)g(ignores)f(such)h(characters)f(and)h(prints)g(a)h(w)o(arning)e
+(\(to)h(the)g Fq(collect_warning)e Fv(object)396 3901
+y(that)j(must)f(be)g(passed)g(when)g(the)g(parser)f(is)i(called\).)396
+4050 y(The)f(XML)g(speci\002cation)g(allo)n(ws)g(lines)h(to)f(be)g
+(separated)g(by)f(single)h(LF)h(characters,)e(by)h(CR)h(LF)g(character)
+396 4158 y(sequences,)e(or)h(by)g(single)g(CR)i(characters.)d
+(Internally)-5 b(,)18 b(these)i(separators)f(are)h(al)o(w)o(ays)h(con)m
+(v)o(erted)d(to)i(single)g(LF)396 4266 y(characters.)396
+4416 y(The)g(parser)g(guarantees)e(that)j(there)e(are)i(ne)n(v)o(er)d
+(tw)o(o)j(adjacent)e(data)h(nodes;)g(if)g(necessary)-5
+b(,)19 b(data)h(material)g(that)g(w)o(ould)396 4523 y(otherwise)g(be)g
+(represented)e(by)i(se)n(v)o(eral)g(nodes)f(is)i(collapsed)f(into)f
+(one)h(node.)f(Note)h(that)g(you)g(can)g(still)h(create)f(node)396
+4631 y(trees)h(with)f(adjacent)g(data)g(nodes;)f(ho)n(we)n(v)o(er)m(,)f
+(the)i(parser)g(does)f(not)h(return)f(such)h(trees.)p
+Black 3800 5278 a Fr(70)p Black eop
+%%Page: 71 71
+71 70 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fv(Note)g(that)h(CD)m(A)
+-9 b(T)h(A)20 b(sections)g(are)g(not)g(represented)f(specially;)h(such)
+g(sections)g(are)g(added)f(to)h(the)h(current)d(data)396
+687 y(material)i(that)g(being)g(collected)f(for)h(the)g(ne)o(xt)f(data)
+h(node.)-2 1056 y Fp(3.4.3.)35 b(The)f(representation)h(of)e(entities)h
+(within)g(documents)396 1224 y Fr(Entities)21 b(ar)m(e)f(not)g(r)m(epr)
+m(esented)f(within)i(documents!)d Fv(If)i(the)h(parser)e(\002nds)h(an)h
+(entity)e(reference)g(in)h(the)g(document)396 1332 y(content,)f(the)h
+(reference)f(is)i(immediately)e(e)o(xpanded,)e(and)j(the)g(parser)g
+(reads)g(the)g(e)o(xpansion)e(te)o(xt)i(instead)g(of)g(the)396
+1440 y(reference.)-2 1810 y Fp(3.4.4.)35 b(The)f(representation)h(of)e
+(attrib)n(utes)396 1977 y Fv(As)21 b(attrib)n(ute)f(v)n(alues)g(are)g
+(composed)e(of)i(Unicode)f(characters,)g(too,)h(the)g(same)h(problems)d
+(with)j(the)f(character)396 2085 y(encoding)e(arise)j(as)g(for)e
+(character)g(material.)h(Attrib)n(ute)g(v)n(alues)g(are)g(con)m(v)o
+(erted)d(to)k(the)f(internal)f(encoding,)f(too;)i(and)396
+2193 y(if)h(there)e(are)i(characters)e(that)h(cannot)f(be)h
+(represented,)e(these)j(are)f(dropped,)e(and)h(a)i(w)o(arning)e(is)i
+(printed.)396 2343 y(Attrib)n(ute)f(v)n(alues)g(are)g(normalized)e
+(before)h(the)o(y)h(are)g(returned)e(by)i(methods)f(lik)o(e)h
+Fq(attribute)p Fv(.)f(First,)i(an)o(y)396 2451 y(remaining)e(entity)h
+(references)e(are)i(e)o(xpanded;)e(if)j(necessary)-5
+b(,)19 b(e)o(xpansion)f(is)j(performed)c(recursi)n(v)o(ely)-5
+b(.)18 b(Second,)396 2558 y(ne)n(wline)i(characters)f(\(an)o(y)g(of)h
+(LF)-7 b(,)21 b(CR)g(LF)-7 b(,)21 b(or)f(CR)h(characters\))e(are)h(con)
+m(v)o(erted)e(to)i(single)g(space)h(characters.)e(Note)396
+2666 y(that)i(especially)e(the)i(latter)f(action)g(is)h(prescribed)d
+(by)i(the)g(XML)g(standard)f(\(b)n(ut)41 b(is)21 b(not)f(con)m(v)o
+(erted)e(such)i(that)g(it)h(is)396 2774 y(still)h(possible)e(to)g
+(include)f(line)h(feeds)g(into)g(attrib)n(utes\).)-2
+3144 y Fp(3.4.5.)35 b(The)f(representation)h(of)e(pr)n(ocessing)h
+(instructions)396 3312 y Fv(Processing)20 b(instructions)f(are)h
+(parsed)g(to)g(some)g(e)o(xtent:)f(The)h(\002rst)h(w)o(ord)f(of)g(the)g
+(PI)g(is)i(called)e(the)g(tar)o(get,)f(and)g(it)i(is)396
+3420 y(stored)f(separated)f(from)g(the)i(rest)f(of)g(the)g(PI:)396
+3600 y Fq()396 3791 y Fv(The)20 b(e)o(xact)g
+(location)f(where)h(a)g(PI)h(occurs)e(is)i(not)f(represented)f(\(by)g
+(def)o(ault\).)g(The)h(parser)f(puts)i(the)f(PI)g(into)g(the)396
+3899 y(object)g(that)g(represents)g(the)g(embracing)e(construct)h(\(an)
+h(element,)f(a)i(DTD,)f(or)g(the)g(whole)g(document\);)e(that)i(means)
+396 4007 y(you)g(can)g(\002nd)f(out)h(which)g(PIs)h(occur)e(in)h(a)h
+(certain)f(element,)f(in)h(the)h(DTD,)f(or)g(in)g(the)g(whole)g
+(document,)e(b)n(ut)i(you)396 4114 y(cannot)f(lookup)g(the)h(e)o(xact)g
+(position)f(within)h(the)g(construct.)396 4264 y(If)g(you)g(require)e
+(the)j(e)o(xact)e(location)h(of)g(PIs,)g(it)h(is)g(possible)f(to)g
+(create)g(e)o(xtra)g(nodes)f(for)h(them.)f(This)i(mode)e(is)396
+4372 y(controled)g(by)g(the)i(option)e Fq(enable_pinstr_nodes)p
+Fv(.)e(The)j(additional)f(nodes)g(ha)n(v)o(e)h(the)g(node)f(type)h
+Fq(T_pinstr)396 4480 y Fn(target)p Fv(,)g(and)f(are)i(created)e(from)g
+(special)h(e)o(x)o(emplars)f(contained)f(in)j(the)f Fq(spec)g
+Fv(\(see)g(pxp_document.mli\).)p Black 3800 5278 a Fr(71)p
+Black eop
+%%Page: 72 72
+72 71 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black -2 583 a Fp(3.4.6.)35
+b(The)f(representation)h(of)e(comments)396 751 y Fv(Normally)-5
+b(,)19 b(comments)g(are)h(not)g(represented;)e(the)o(y)i(are)g(dropped)
+e(by)h(def)o(ault.)h(Ho)n(we)n(v)o(er)m(,)e(if)i(you)f(require)g(them,)
+h(it)h(is)396 859 y(possible)f(to)h(create)e Fq(T_comment)h
+Fv(nodes)f(for)h(them.)f(This)i(mode)e(can)h(be)g(speci\002ed)g(by)g
+(the)g(option)396 967 y Fq(enable_comment_nodes)p Fv(.)d(Comment)j
+(nodes)f(are)h(created)g(from)f(special)h(e)o(x)o(emplars)f(contained)f
+(in)j(the)f Fq(spec)396 1075 y Fv(\(see)h(pxp_document.mli\).)15
+b(Y)-9 b(ou)19 b(can)h(access)h(the)f(contents)g(of)g(comments)f
+(through)f(the)i(method)f Fq(comment)p Fv(.)-2 1444 y
+Fp(3.4.7.)35 b(The)f(attrib)n(utes)f Fc(xml:lang)d Fp(and)k
+Fc(xml:space)396 1612 y Fv(These)20 b(attrib)n(utes)g(are)g(not)g
+(supported)f(specially;)h(the)o(y)f(are)h(handled)f(lik)o(e)h(an)o(y)g
+(other)f(attrib)n(ute.)-2 1982 y Fp(3.4.8.)35 b(And)f(what)f(about)h
+(namespaces?)396 2149 y Fv(Currently)-5 b(,)19 b(there)g(is)i(no)f
+(special)h(support)d(for)i(namespaces.)f(Ho)n(we)n(v)o(er)m(,)f(the)i
+(parser)g(allo)n(ws)g(it)h(that)f(the)h(colon)e(occurs)396
+2257 y(in)i(names)e(such)h(that)h(it)g(is)g(possible)f(to)g(implement)f
+(namespaces)g(on)h(top)g(of)g(the)g(current)f(API.)396
+2407 y(Some)h(future)f(release)h(of)g(PXP)h(will)g(support)e
+(namespaces)g(as)i(b)n(uilt-in)f(feature...)p Black 3800
+5278 a Fr(72)p Black eop
+%%Page: 73 73
+73 72 bop Black Black -2 621 a Fs(Chapter)48 b(4.)f(Con\002guring)j
+(and)e(calling)f(the)h(par)m(ser)-2 1055 y Fx(4.1.)39
+b(Over)q(vie)n(w)396 1235 y Fv(There)20 b(are)g(the)g(follo)n(wing)f
+(main)g(functions)g(in)m(v)n(oking)f(the)i(parser)g(\(in)g(Pxp_yacc\):)
+p Black 396 1558 a Ft(\225)p Black 60 w Fr(par)o(se_document_entity:)d
+Fv(Y)-9 b(ou)19 b(w)o(ant)i(to)f(parse)g(a)g(complete)g(and)f(closed)h
+(document)e(consisting)i(of)g(a)g(DTD)h(and)479 1666
+y(the)f(document)f(body;)g(the)h(body)f(is)i(v)n(alidated)e(against)g
+(the)h(DTD.)h(This)f(mode)f(is)i(interesting)f(if)g(you)f(ha)n(v)o(e)h
+(a)h(\002le)479 1835 y Fq()f
+()g(...)h( )396 1984 y Fv(and)20 b(you)f(can)h(accept)g(an)
+o(y)f(DTD)i(that)f(is)h(included)e(in)h(the)g(\002le)h(\(e.g.)f
+(because)f(the)h(\002le)h(is)g(under)e(your)g(control\).)p
+Black 396 2092 a Ft(\225)p Black 60 w Fr(par)o(se_wfdocument_entity:)e
+Fv(Y)-9 b(ou)20 b(w)o(ant)g(to)g(parse)g(a)h(complete)e(and)h(closed)f
+(document)g(consisting)g(of)h(a)h(DTD)479 2200 y(and)f(the)g(document)e
+(body;)h(b)n(ut)h(the)h(body)d(is)k(not)d(v)n(alidated,)g(only)h(check)
+o(ed)e(for)i(well-formedness.)e(This)i(mode)f(is)479
+2308 y(preferred)f(if)j(v)n(alidation)d(costs)j(too)f(much)f(time)i(or)
+f(if)g(the)g(DTD)h(is)g(missing.)p Black 396 2416 a Ft(\225)p
+Black 60 w Fr(par)o(se_dtd_entity:)d Fv(Y)-9 b(ou)20
+b(w)o(ant)g(only)f(to)i(parse)e(an)i(entity)e(\(\002le\))i(containing)d
+(the)i(e)o(xternal)f(subset)h(of)g(a)h(DTD.)479 2524
+y(Sometimes)f(it)h(is)g(interesting)e(to)i(read)e(such)h(a)h(DTD,)f
+(for)g(e)o(xample)e(to)j(compare)d(it)j(with)g(the)f(DTD)g(included)f
+(in)h(a)479 2632 y(document,)e(or)i(to)g(apply)g(the)g(ne)o(xt)f(mode:)
+p Black 396 2740 a Ft(\225)p Black 60 w Fr(par)o(se_content_entity:)e
+Fv(Y)-9 b(ou)20 b(w)o(ant)g(only)g(to)g(parse)g(an)g(entity)g
+(\(\002le\))g(containing)e(a)j(fragment)d(of)i(a)h(document)479
+2848 y(body;)e(this)i(fragment)d(is)j(v)n(alidated)f(against)f(the)h
+(DTD)h(you)e(pass)i(to)f(the)g(function.)e(Especially)-5
+b(,)19 b(the)i(fragment)479 2956 y(must)g(not)e(ha)n(v)o(e)h(a)65
+b Fo(<)p Fq(!DOCTYPE)p Fo(>)19 b Fv(clause,)h(and)g(must)g(directly)g
+(be)o(gin)f(with)h(an)g(element.)f(The)h(element)g(is)479
+3064 y(v)n(alidated)f(against)h(the)g(DTD.)g(This)h(mode)e(is)i
+(interesting)e(if)i(you)e(w)o(ant)h(to)h(check)e(documents)f(against)i
+(a)h(\002x)o(ed,)479 3172 y(immutable)e(DTD.)p Black
+396 3280 a Ft(\225)p Black 60 w Fr(par)o(se_wfcontent_entity:)f
+Fv(This)i(function)f(also)h(parses)g(a)h(single)f(element)g(without)f
+(DTD,)h(b)n(ut)g(does)g(not)g(v)n(alidate)479 3388 y(it.)p
+Black 396 3495 a Ft(\225)p Black 60 w Fr(e)n(xtr)o(act_dtd_fr)l
+(om_document_entity:)15 b Fv(This)20 b(function)f(e)o(xtracts)g(the)i
+(DTD)f(from)f(a)i(closed)f(document)479 3603 y(consisting)g(of)g(a)g
+(DTD)h(and)e(a)i(document)d(body)-5 b(.)18 b(Both)j(the)f(internal)f
+(and)h(the)g(e)o(xternal)f(subsets)h(are)h(e)o(xtracted.)396
+3794 y(In)f(man)o(y)f(cases,)i Fq(parse_document_entity)c
+Fv(is)k(the)f(preferred)e(mode)i(to)g(parse)g(a)g(document)f(in)h(a)h
+(v)n(alidating)396 3902 y(w)o(ay)-5 b(,)20 b(and)g Fq
+(parse_wfdocument_entity)c Fv(is)22 b(the)e(mode)f(of)h(choice)f(to)i
+(parse)f(a)g(\002le)h(while)f(only)g(checking)e(for)396
+4010 y(well-formedness.)396 4160 y(There)i(are)g(a)g(number)f(of)h(v)n
+(ariations)f(of)h(these)g(modes.)f(One)h(important)f(application)g(of)h
+(a)g(parser)g(is)h(to)f(check)396 4268 y(documents)f(of)h(an)g
+(untrusted)f(source)g(against)h(a)g(\002x)o(ed)g(DTD.)g(One)g(solution)
+f(is)i(to)g(not)f(allo)n(w)g(the)g Fo(<)p Fq(!DOCTYPE)p
+Fo(>)396 4375 y Fv(clause)g(in)h(these)f(documents,)e(and)i(treat)g
+(the)h(document)d(lik)o(e)i(a)h(fragment)d(\(using)i(mode)f
+Fr(par)o(se_content_entity)p Fv(\).)396 4483 y(This)i(is)g(v)o(ery)e
+(simple,)h(b)n(ut)g(in\003e)o(xible;)f(users)i(of)e(such)h(a)h(system)f
+(cannot)f(e)n(v)o(en)h(de\002ne)f(additional)g(entities)i(to)396
+4591 y(abbre)n(viate)e(frequent)f(phrases)i(of)g(their)g(te)o(xt.)396
+4741 y(It)h(may)e(be)i(necessary)e(to)h(ha)n(v)o(e)g(a)h(more)e
+(intelligent)g(check)o(er)-5 b(.)20 b(F)o(or)g(e)o(xample,)e(it)j(is)g
+(also)g(possible)e(to)i(parse)f(the)396 4849 y(document)e(to)j(check)e
+(fully)-5 b(,)19 b(i.e.)h(with)h(DTD,)f(and)f(to)i(compare)d(this)j
+(DTD)f(with)h(the)f(prescribed)f(one.)g(In)h(order)f(to)p
+Black 3800 5278 a Fr(73)p Black eop
+%%Page: 74 74
+74 73 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(fully)g(parse)g(the)g
+(document,)e(mode)h Fr(par)o(se_document_entity)e Fv(is)k(applied,)e
+(and)h(to)g(get)g(the)g(DTD)h(to)f(compare)f(with)396
+687 y(mode)g Fr(par)o(se_dtd_entity)f Fv(can)i(be)h(used.)396
+836 y(There)f(is)h(another)d(v)o(ery)i(important)e(con\002gurable)g
+(aspect)i(of)g(the)g(parser:)g(the)g(so-called)g(resolv)o(er)-5
+b(.)19 b(The)h(task)g(of)g(the)396 944 y(resolv)o(er)f(is)i(to)g
+(locate)f(the)g(contents)f(of)h(an)g(\(e)o(xternal\))f(entity)g(for)h
+(a)h(gi)n(v)o(en)e(entity)g(name,)h(and)f(to)i(mak)o(e)e(the)i
+(contents)396 1052 y(accessible)g(as)f(a)h(character)e(stream.)h
+(\(Furthermore,)d(it)k(also)f(normalizes)g(the)g(character)f(set;)i(b)n
+(ut)f(this)h(is)g(a)f(detail)h(we)396 1160 y(can)f(ignore)f(here.\))g
+(Consider)h(you)f(ha)n(v)o(e)h(a)g(\002le)h(called)f
+Fq("main.xml")f Fv(containing)396 1340 y Fq()396 1437 y(\045sub;)396
+1628 y Fv(and)20 b(a)h(\002le)f(stored)g(in)g(the)h(subdirectory)c
+Fq("sub")j Fv(with)h(name)e Fq("sub.xml")g Fv(containing)396
+1808 y Fq()
+396 1906 y(\045subsub;)396 2097 y Fv(and)20 b(a)g(\002le)h(stored)e(in)
+h(the)g(subdirectory)d Fq("subsub")j Fv(of)f Fq("sub")h
+Fv(with)g(name)f Fq("subsub.xml")g Fv(\(the)g(contents)h(of)f(this)396
+2204 y(\002le)i(do)f(not)g(matter\).)f(Here,)h(the)g(resolv)o(er)f
+(must)h(track)g(that)g(the)g(second)g(entity)g Fq(subsub)f
+Fv(is)i(located)f(in)g(the)h(directory)396 2312 y Fq("sub/subsub")p
+Fv(,)e(i.e.)h(the)g(dif)n(\002culty)f(is)i(to)g(interpret)e(the)h
+(system)g(\(\002le\))h(names)e(of)h(entities)h(relati)n(v)o(e)e(to)i
+(the)f(entities)396 2420 y(containing)f(them,)g(e)n(v)o(en)g(if)i(the)f
+(entities)h(are)f(deeply)f(nested.)396 2570 y(There)h(is)h(not)f(a)g
+(\002x)o(ed)g(resolv)o(er)f(already)g(doing)g(e)n(v)o(erything)e(right)
+j(-)g(resolving)f(entity)h(names)g(is)h(a)f(task)h(that)f(highly)396
+2678 y(depends)f(on)h(the)g(en)m(vironment.)d(The)j(XML)g
+(speci\002cation)f(only)h(demands)f(that)h Fq(SYSTEM)g
+Fv(entities)g(are)g(interpreted)396 2786 y(lik)o(e)h(URLs)g(\(which)e
+(is)i(not)f(v)o(ery)f(precise,)h(as)h(there)e(are)i(lots)f(of)g(URL)h
+(schemes)f(in)g(use\),)g(hoping)f(that)h(this)h(helps)396
+2894 y(o)o(v)o(ercoming)c(the)j(local)g(peculiarities)g(of)g(the)g(en)m
+(vironment;)d(the)k(idea)f(is)h(that)f(if)h(you)e(do)h(not)f(kno)n(w)h
+(your)396 3001 y(en)m(vironment)d(you)j(can)g(refer)f(to)h(other)g
+(entities)g(by)g(denoting)e(URLs)k(for)d(them.)h(I)g(think)g(that)g
+(this)h(interpretation)d(of)396 3109 y Fq(SYSTEM)i Fv(names)g(may)g(ha)
+n(v)o(e)f(some)h(applications)f(in)i(the)f(internet,)f(b)n(ut)h(it)h
+(is)g(not)f(the)g(\002rst)h(choice)f(in)g(general.)396
+3217 y(Because)h(of)f(this,)g(the)g(resolv)o(er)f(is)i(a)g(separate)f
+(module)e(of)i(the)h(parser)e(that)h(can)g(be)h(e)o(xchanged)c(by)j
+(another)f(one)g(if)396 3325 y(necessary;)h(more)f(precisely)-5
+b(,)19 b(the)h(parser)g(already)f(de\002nes)h(se)n(v)o(eral)f(resolv)o
+(ers.)396 3475 y(The)h(follo)n(wing)f(resolv)o(ers)g(do)h(already)f(e)o
+(xist:)p Black 396 3707 a Ft(\225)p Black 60 w Fv(Resolv)o(ers)h
+(reading)f(from)g(arbitrary)g(input)g(channels.)g(These)h(can)g(be)g
+(con\002gured)e(such)i(that)g(a)h(certain)f(ID)g(is)479
+3815 y(associated)g(with)h(the)f(channel;)f(in)h(this)h(case)g(inner)e
+(references)g(to)h(e)o(xternal)f(entities)i(can)f(be)g(resolv)o(ed.)e
+(There)i(is)479 3923 y(also)h(a)f(special)h(resolv)o(er)e(that)h
+(interprets)f(SYSTEM)i(IDs)f(as)h(URLs;)g(this)g(resolv)o(er)e(can)h
+(process)g(relati)n(v)o(e)479 4031 y(SYSTEM)h(names)e(and)h(determine)f
+(the)h(corresponding)d(absolute)i(URL.)p Black 396 4139
+a Ft(\225)p Black 60 w Fv(A)i(resolv)o(er)e(that)h(reads)g(al)o(w)o
+(ays)h(from)e(a)i(gi)n(v)o(en)d(O'Caml)j(string.)e(This)i(resolv)o(er)e
+(is)i(not)f(able)g(to)g(resolv)o(e)f(further)479 4247
+y(names)h(unless)g(the)h(string)f(is)h(not)f(associated)g(with)g(an)o
+(y)f(name,)h(i.e.)g(if)g(the)g(document)f(contained)f(in)j(the)f
+(string)479 4355 y(refers)g(to)g(an)g(e)o(xternal)f(entity)-5
+b(,)20 b(this)g(reference)f(cannot)g(be)h(follo)n(wed)f(in)h(this)h
+(case.)p Black 396 4463 a Ft(\225)p Black 60 w Fv(A)g(resolv)o(er)e
+(for)g(\002le)i(names.)f(The)g Fq(SYSTEM)g Fv(name)f(is)i(interpreted)e
+(as)i(\002le)f(URL)h(with)g(the)f(slash)h("/")f(as)h(separator)479
+4571 y(for)f(directories.)f(-)h(This)h(resolv)o(er)d(is)k(deri)n(v)o
+(ed)c(from)h(the)h(generic)f(URL)i(resolv)o(er)-5 b(.)396
+4720 y(The)20 b(interf)o(ace)f(a)i(resolv)o(er)e(must)h(ha)n(v)o(e)g
+(is)h(documented,)c(so)k(it)g(is)g(possible)f(to)g(write)g(your)f(o)n
+(wn)h(resolv)o(er)-5 b(.)19 b(F)o(or)396 4828 y(e)o(xample,)g(you)g
+(could)g(connect)g(the)h(parser)g(with)g(an)h(HTTP)f(client,)g(and)f
+(resolv)o(e)h(URLs)h(of)f(the)g(HTTP)g(namespace.)p Black
+3800 5278 a Fr(74)p Black eop
+%%Page: 75 75
+75 74 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(The)g(resolv)o(er)f
+(classes)i(support)e(that)h(se)n(v)o(eral)g(independent)e(resolv)o(ers)
+h(are)h(combined)e(to)i(one)g(more)f(po)n(werful)396
+687 y(resolv)o(er;)g(thus)h(it)h(is)g(possible)f(to)h(combine)d(a)j
+(self-written)e(resolv)o(er)g(with)i(the)f(already)f(e)o(xisting)g
+(resolv)o(ers.)396 836 y(Note)h(that)h(the)f(e)o(xisting)f(resolv)o
+(ers)h(only)f(interpret)g Fq(SYSTEM)h Fv(names,)f(not)h
+Fq(PUBLIC)g Fv(names.)g(If)g(it)h(helps)f(you,)f(it)h(is)396
+944 y(possible)g(to)f(de\002ne)h(resolv)o(ers)e(for)h
+Fq(PUBLIC)h Fv(names,)f(too;)g(for)g(e)o(xample,)f(such)i(a)g(resolv)o
+(er)e(could)h(look)g(up)g(the)h(public)396 1052 y(name)g(in)g(a)h(hash)
+f(table,)g(and)f(map)h(it)h(to)f(a)h(system)f(name)g(which)g(is)h
+(passed)f(o)o(v)o(er)f(to)h(the)g(e)o(xisting)g(resolv)o(er)e(for)396
+1160 y(system)j(names.)e(It)i(is)g(relati)n(v)o(ely)e(simple)h(to)g
+(pro)o(vide)f(such)g(a)i(resolv)o(er)-5 b(.)-2 1579 y
+Fx(4.2.)39 b(Resolver)n(s)e(and)i(sour)m(ces)-2 1907
+y Fp(4.2.1.)c(Using)f(the)g(b)n(uilt-in)f(resolver)n(s)i(\(called)g
+(sour)n(ces\))396 2075 y Fv(The)20 b(type)g Fq(source)g
+Fv(enumerates)e(the)j(tw)o(o)f(possibilities)h(where)e(the)h(document)f
+(to)h(parse)g(comes)g(from.)396 2255 y Fq(type)44 b(source)g(=)576
+2352 y(Entity)f(of)i(\(\(dtd)f(-)p Fo(>)g Fq(Pxp_entity.entity\))e(*)j
+(Pxp_reader.resolver\))486 2449 y(|)g(ExtID)f(of)g(\(ext_id)g(*)g
+(Pxp_reader.resolver\))396 2640 y Fv(Y)-9 b(ou)20 b(normally)e(need)i
+(not)g(to)g(w)o(orry)f(about)h(this)g(type)g(as)h(there)f(are)g(con)m
+(v)o(enience)d(functions)i(that)h(create)g Fq(source)396
+2748 y Fv(v)n(alues:)p Black 396 3105 a Ft(\225)p Black
+60 w Fq(from_file)44 b(s)p Fv(:)20 b(The)g(document)e(is)j(read)f(from)
+f(\002le)i Fq(s)p Fv(;)g(you)e(may)h(specify)f(absolute)h(or)g(relati)n
+(v)o(e)f(path)h(names.)479 3213 y(The)g(\002le)h(name)f(must)g(be)g
+(encoded)e(as)j(UTF-8)f(string.)479 3362 y(There)g(is)h(an)f(optional)f
+(ar)o(gument)f Fq(~system_encoding)g Fv(specifying)g(the)j(character)d
+(encoding)h(which)g(is)i(used)479 3470 y(for)f(the)g(names)g(of)g(the)g
+(\002le)h(system.)f(F)o(or)g(e)o(xample,)e(if)j(this)g(encoding)d(is)j
+(ISO-8859-1)c(and)j Fq(s)g Fv(is)i(also)e(a)479 3578
+y(ISO-8859-1)e(string,)h(you)h(can)g(form)f(the)h(source:)479
+3717 y Fq(let)45 b(s_utf8)88 b(=)i(recode_string)42 b
+(~in_enc:`Enc_iso88591)g(~out_enc:`Enc_utf8)g(s)i(in)479
+3814 y(from_file)g(~system_encoding:`Enc_iso88591)39
+b(s_utf8)479 4005 y Fv(This)21 b Fq(source)e Fv(has)i(the)f(adv)n
+(antage)e(that)j(it)f(is)i(able)e(to)g(resolv)o(e)f(inner)h(e)o
+(xternal)f(entities;)h(i.e.)g(if)h(your)e(document)479
+4113 y(includes)g(data)g(from)g(another)f(\002le)i(\(using)f(the)g
+Fq(SYSTEM)g Fv(attrib)n(ute\),)g(this)g(mode)g(will)h(\002nd)f(that)h
+(\002le.)g(Ho)n(we)n(v)o(er)m(,)d(this)479 4221 y(mode)j(cannot)f
+(resolv)o(e)g Fq(PUBLIC)h Fv(identi\002ers)f(nor)h Fq(SYSTEM)g
+Fv(identi\002ers)f(other)h(than)g("\002le:".)p Black
+396 4370 a Ft(\225)p Black 60 w Fq(from_channel)43 b(ch)p
+Fv(:)21 b(The)e(document)g(is)i(read)e(from)h(the)g(channel)f
+Fq(ch)p Fv(.)h(In)g(general,)f(this)h(source)g(also)g(supports)479
+4478 y(\002le)h(URLs)g(found)e(in)h(the)g(document;)f(ho)n(we)n(v)o(er)
+m(,)e(by)j(def)o(ault)f(only)h(absolute)f(URLs)i(are)f(understood.)e
+(It)i(is)479 4586 y(possible)g(to)h(associate)f(an)g(ID)g(with)h(the)f
+(channel)f(such)h(that)g(the)g(resolv)o(er)f(kno)n(ws)h(ho)n(w)f(to)i
+(interpret)e(relati)n(v)o(e)479 4694 y(URLs:)479 4832
+y Fq(from_channel)43 b(~id:\(System)g("file:///dir/dir1/"\))f(ch)p
+Black 3800 5278 a Fr(75)p Black eop
+%%Page: 76 76
+76 75 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(There)g(is)h(also)f
+(the)g(~system_encoding)e(ar)o(gument)f(specifying)i(ho)n(w)h(\002le)h
+(names)e(are)i(encoded.)d(-)i(The)g(e)o(xample)479 687
+y(from)f(abo)o(v)o(e)g(can)h(also)g(be)h(written)f(\(b)n(ut)f(it)i(is)g
+(no)f(longer)f(possible)h(to)g(interpret)f(relati)n(v)o(e)h(URLs)h
+(because)e(there)h(is)479 795 y(no)g(~id)g(ar)o(gument,)e(and)i
+(computing)d(this)k(ar)o(gument)d(is)j(relati)n(v)o(ely)e(complicated)g
+(because)g(it)i(must)f(be)h(a)f(v)n(alid)479 903 y(URL\):)479
+1041 y Fq(let)45 b(ch)f(=)h(open_in)e(s)i(in)479 1138
+y(let)g(src)f(=)h(from_channel)d(~system_encoding:`Enc_iso88591)e(ch)45
+b(in)479 1236 y(...;)479 1333 y(close_in)f(ch)p Black
+396 1482 a Ft(\225)p Black 60 w Fq(from_string)f(s)p
+Fv(:)21 b(The)f(string)g Fq(s)g Fv(is)h(the)g(document)d(to)i(parse.)g
+(This)g(mode)f(is)j(not)d(able)h(to)h(interpret)e(\002le)i(names)479
+1590 y(of)f Fq(SYSTEM)g Fv(clauses,)g(nor)g(it)h(can)f(look)f(up)h
+Fq(PUBLIC)f Fv(identi\002ers.)479 1740 y(Normally)-5
+b(,)19 b(the)h(encoding)e(of)i(the)g(string)g(is)h(detected)e(as)i
+(usual)f(by)g(analyzing)f(the)h(XML)g(declaration,)e(if)j(an)o(y)-5
+b(.)479 1847 y(Ho)n(we)n(v)o(er)m(,)18 b(it)j(is)g(also)g(possible)f
+(to)g(specify)g(the)g(encoding)e(directly:)479 1986 y
+Fq(let)45 b(src)f(=)h(from_string)e(~fixenc:`ISO-8859-2)e(s)p
+Black 396 2177 a Ft(\225)p Black 60 w Fq(ExtID)j(\(id,)g(r\))p
+Fv(:)21 b(The)f(document)e(to)i(parse)g(is)h(denoted)e(by)h(the)g
+(identi\002er)g Fq(id)g Fv(\(either)f(a)i Fq(SYSTEM)f
+Fv(or)g Fq(PUBLIC)479 2285 y Fv(clause\),)g(and)g(this)g(identi\002er)g
+(is)h(interpreted)d(by)i(the)g(resolv)o(er)f Fq(r)p Fv(.)i(Use)f(this)h
+(mode)e(if)i(you)e(ha)n(v)o(e)h(written)g(your)f(o)n(wn)479
+2393 y(resolv)o(er)-5 b(.)479 2542 y(Which)20 b(character)f(sets)j(are)
+e(possible)g(depends)e(on)i(the)g(passed)h(resolv)o(er)d
+Fq(r)p Fv(.)p Black 396 2692 a Ft(\225)p Black 60 w Fq(Entity)44
+b(\(get_entity,)f(r\))p Fv(:)20 b(The)g(document)e(to)j(parse)f(is)h
+(returned)d(by)i(the)g(function)f(in)m(v)n(ocation)479
+2800 y Fq(get_entity)43 b(dtd)p Fv(,)20 b(where)g Fq(dtd)g
+Fv(is)h(the)g(DTD)f(object)g(to)g(use)g(\(it)h(may)f(be)g(empty\).)f
+(Inner)f(e)o(xternal)h(references)479 2908 y(occuring)g(in)h(this)h
+(entity)e(are)i(resolv)o(ed)d(using)i(the)g(resolv)o(er)f
+Fq(r)p Fv(.)479 3057 y(Which)h(character)f(sets)j(are)e(possible)g
+(depends)e(on)i(the)g(passed)h(resolv)o(er)d Fq(r)p Fv(.)-2
+3510 y Fp(4.2.2.)35 b(The)f(resolver)g(API)396 3677 y
+Fv(A)21 b(resolv)o(er)e(is)i(an)f(object)g(that)g(can)g(be)g(opened)e
+(lik)o(e)j(a)f(\002le,)h(b)n(ut)f(you)f(do)h(not)g(pass)g(the)h(\002le)
+f(name)g(to)g(the)g(resolv)o(er)m(,)f(b)n(ut)396 3785
+y(the)h(XML)h(identi\002er)e(of)h(the)g(entity)g(to)h(read)e(from)g
+(\(either)h(a)g Fq(SYSTEM)g Fv(or)g Fq(PUBLIC)g Fv(clause\).)f(When)h
+(opened,)f(the)396 3893 y(resolv)o(er)g(must)h(return)f(the)i
+Fq(Lexing.lexbuf)d Fv(that)i(reads)g(the)h(characters.)e(The)g(resolv)o
+(er)g(can)h(be)h(closed,)e(and)h(it)396 4001 y(can)g(be)g(cloned.)f
+(Furthermore,)f(it)j(is)g(possible)f(to)g(tell)h(the)f(resolv)o(er)f
+(which)h(character)f(set)i(it)g(should)e(assume.)h(-)g(The)396
+4109 y(follo)n(wing)f(from)g(Pxp_reader:)396 4289 y Fq(exception)44
+b(Not_competent)396 4386 y(exception)g(Not_resolvable)e(of)j(exn)396
+4581 y(class)f(type)g(resolver)g(=)486 4678 y(object)576
+4775 y(method)f(init_rep_encoding)f(:)j(rep_encoding)e(->)h(unit)576
+4872 y(method)f(init_warner)g(:)i(collect_warnings)d(->)j(unit)p
+Black 3798 5278 a Fr(76)p Black eop
+%%Page: 77 77
+77 76 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 576 579 a Fq(method)43
+b(rep_encoding)g(:)i(rep_encoding)576 676 y(method)e(open_in)h(:)h
+(ext_id)f(->)g(Lexing.lexbuf)576 773 y(method)f(close_in)h(:)h(unit)576
+870 y(method)e(change_encoding)g(:)h(string)g(->)h(unit)576
+967 y(method)e(clone)h(:)h(resolver)576 1065 y(method)e(close_all)h(:)g
+(unit)486 1162 y(end)396 1353 y Fv(The)20 b(resolv)o(er)f(object)h
+(must)g(w)o(ork)f(as)i(follo)n(ws:)p Black 396 1627 a
+Ft(\225)p Black 60 w Fv(When)f(the)h(parser)e(is)i(called,)f(it)h
+(tells)g(the)f(resolv)o(er)f(the)h(w)o(arner)g(object)f(and)h(the)g
+(internal)g(encoding)e(by)i(in)m(v)n(oking)479 1735 y
+Fq(init_warner)f Fv(and)h Fq(init_rep_encoding)p Fv(.)d(The)j(resolv)o
+(er)f(should)g(store)i(these)f(v)n(alues.)f(The)h(method)479
+1843 y Fq(rep_encoding)f Fv(should)g(return)g(the)h(internal)g
+(encoding.)p Black 396 1950 a Ft(\225)p Black 60 w Fv(If)g(the)h
+(parser)e(w)o(ants)i(to)f(read)g(from)f(the)h(resolv)o(er)m(,)e(it)j
+(in)m(v)n(ok)o(es)f(the)g(method)f Fq(open_in)p Fv(.)g(Either)h(the)g
+(resolv)o(er)479 2058 y(succeeds,)g(in)g(which)g(case)g(the)h
+Fq(Lexing.lexbuf)d Fv(reading)h(from)g(the)h(\002le)h(or)f(stream)g
+(must)g(be)h(returned,)d(or)479 2166 y(opening)h(f)o(ails.)h(In)g(the)g
+(latter)h(case)f(the)h(method)d(implementation)g(should)h(raise)i(an)f
+(e)o(xception)e(\(see)j(belo)n(w\).)p Black 396 2274
+a Ft(\225)p Black 60 w Fv(If)f(the)h(parser)e(\002nishes)i(reading,)d
+(it)j(calls)g(the)f Fq(close_in)g Fv(method.)p Black
+396 2382 a Ft(\225)p Black 60 w Fv(If)g(the)h(parser)e(\002nds)h(a)h
+(reference)d(to)j(another)e(e)o(xternal)f(entity)i(in)h(the)f(input)f
+(stream,)h(it)h(calls)g Fq(clone)f Fv(to)g(get)h(a)479
+2490 y(second)f(resolv)o(er)f(which)g(must)h(be)h(initially)f(closed)g
+(\(not)f(yet)h(connected)f(with)h(an)g(input)f(stream\).)h(The)g
+(parser)479 2598 y(then)g(in)m(v)n(ok)o(es)f Fq(open_in)h
+Fv(and)f(the)i(other)e(methods)g(as)i(described.)p Black
+396 2706 a Ft(\225)p Black 60 w Fv(If)f(you)g(already)f(kno)n(w)g(the)h
+(character)f(set)i(of)f(the)g(input)g(stream,)f(you)h(should)f(recode)g
+(it)i(to)f(the)g(internal)479 2814 y(encoding,)e(and)i(de\002ne)f(the)i
+(method)d Fq(change_encoding)h Fv(as)i(an)f(empty)f(method.)p
+Black 396 2922 a Ft(\225)p Black 60 w Fv(If)h(you)g(w)o(ant)g(to)g
+(support)f(multiple)h(e)o(xternal)f(character)g(sets,)i(the)f(object)f
+(must)i(follo)n(w)e(a)i(much)e(more)479 3030 y(complicated)g(protocol.)
+f(Directly)i(after)g Fq(open_in)f Fv(has)i(been)e(called,)h(the)g
+(resolv)o(er)f(must)h(return)f(a)i(le)o(xical)f(b)n(uf)n(fer)479
+3138 y(that)h(only)e(reads)h(one)g(byte)f(at)i(a)g(time.)f(This)g(is)h
+(only)f(possible)f(if)i(you)e(create)h(the)g(le)o(xical)g(b)n(uf)n(fer)
+f(with)479 3246 y Fq(Lexing.from_function)p Fv(;)e(the)j(function)d
+(must)j(then)f(al)o(w)o(ays)h(return)e(1)i(if)f(the)h(EOF)g(is)g(not)f
+(yet)h(reached,)e(and)h(0)479 3354 y(if)i(EOF)f(is)h(reached.)e(If)h
+(the)g(parser)g(has)g(read)g(the)g(\002rst)h(line)f(of)g(the)h
+(document,)c(it)k(will)g(in)m(v)n(ok)o(e)479 3461 y Fq(change_encoding)
+e Fv(to)h(tell)h(the)f(resolv)o(er)f(which)h(character)e(set)j(to)g
+(assume.)f(From)f(this)i(moment,)e(the)h(object)479 3569
+y(can)g(return)f(more)h(than)f(one)h(byte)g(at)g(once.)g(The)g(ar)o
+(gument)d(of)j Fq(change_encoding)f Fv(is)i(either)e(the)i(parameter)d
+(of)479 3677 y(the)i("encoding")e(attrib)n(ute)i(of)g(the)g(XML)h
+(declaration,)d(or)i(the)g(empty)f(string)h(if)h(there)e(is)j(not)d(an)
+o(y)h(XML)479 3785 y(declaration)f(or)h(if)g(the)h(declaration)d(does)i
+(not)g(contain)f(an)h(encoding)e(attrib)n(ute.)479 3935
+y(At)j(the)f(be)o(ginning)e(the)i(resolv)o(er)f(must)h(only)g(return)f
+(one)g(character)g(e)n(v)o(ery)g(time)h(something)f(is)i(read)f(from)f
+(the)479 4043 y(le)o(xical)h(b)n(uf)n(fer)-5 b(.)19 b(The)h(reason)f
+(for)h(this)h(is)g(that)f(you)f(otherwise)h(w)o(ould)f(not)h(e)o
+(xactly)g(kno)n(w)f(at)h(which)g(position)f(in)479 4151
+y(the)h(input)g(stream)g(the)g(character)f(set)i(changes.)479
+4300 y(If)f(you)g(w)o(ant)g(automatic)f(recognition)f(of)i(the)g
+(character)f(set,)i(it)g(is)g(up)f(to)g(the)g(resolv)o(er)f(object)h
+(to)g(implement)f(this.)p Black 396 4449 a Ft(\225)p
+Black 60 w Fv(If)h(an)g(error)g(occurs,)f(the)h(parser)g(calls)g(the)h
+(method)d Fq(close_all)i Fv(for)f(the)h(top-le)n(v)o(el)f(resolv)o(er;)
+g(this)i(method)479 4557 y(should)e(close)i(itself)g(\(if)f(not)g
+(already)f(done\))f(and)i(all)h(clones.)396 4748 y Fu(Exceptions.)f
+Fv(It)h(is)g(possible)f(to)g(chain)g(resolv)o(ers)f(such)h(that)g(when)
+g(the)g(\002rst)h(resolv)o(er)e(is)i(not)f(able)g(to)g(open)f(the)396
+4856 y(entity)-5 b(,)20 b(the)g(other)f(resolv)o(ers)g(of)h(the)g
+(chain)g(are)g(tried)g(in)g(turn.)g(The)g(method)e Fq(open_in)i
+Fv(should)f(raise)i(the)f(e)o(xception)p Black 3797 5278
+a Fr(77)p Black eop
+%%Page: 78 78
+78 77 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fq(Not_competent)f
+Fv(to)h(indicate)g(that)g(the)g(ne)o(xt)g(resolv)o(er)f(should)g(try)h
+(to)g(open)f(the)i(entity)-5 b(.)19 b(If)h(the)g(resolv)o(er)f(is)i
+(able)f(to)396 687 y(handle)f(the)i(ID,)f(b)n(ut)g(some)g(other)f
+(error)g(occurs,)g(the)i(e)o(xception)d Fq(Not_resolvable)g
+Fv(should)i(be)g(raised)g(to)g(force)396 795 y(that)h(the)f(chain)f
+(breaks.)396 944 y(Example:)g(Ho)n(w)h(to)h(de\002ne)e(a)i(resolv)o(er)
+e(that)h(is)h(equi)n(v)n(alent)e(to)h(from_string:)e(...)-2
+1314 y Fp(4.2.3.)35 b(Prede\002ned)f(resolver)h(components)396
+1482 y Fv(There)20 b(are)g(some)g(classes)h(in)f(Pxp_reader)e(that)j
+(de\002ne)e(common)g(resolv)o(er)f(beha)n(viour)-5 b(.)396
+1662 y Fq(class)44 b(resolve_read_this_channel)d(:)576
+1759 y(?id:ext_id)i(->)576 1856 y(?fixenc:encoding)f(->)576
+1953 y(?auto_close:bool)g(->)576 2050 y(in_channel)h(->)755
+2147 y(resolver)396 2338 y Fv(Reads)21 b(from)e(the)h(passed)g(channel)
+f(\(it)i(may)f(be)g(e)n(v)o(en)f(a)i(pipe\).)e(If)h(the)g
+Fq(~id)g Fv(ar)o(gument)e(is)j(passed)f(to)h(the)f(object,)f(the)396
+2446 y(created)h(resolv)o(er)f(accepts)h(only)f(this)i(ID.)f(Otherwise)
+g(all)h(IDs)f(are)g(accepted.)f(-)i(Once)f(the)g(resolv)o(er)f(has)h
+(been)396 2554 y(cloned,)f(it)h(does)g(not)f(accept)h(an)o(y)f(ID.)g
+(This)h(means)g(that)g(this)g(resolv)o(er)e(cannot)h(handle)g(inner)g
+(references)f(to)i(e)o(xternal)396 2662 y(entities.)h(Note)f(that)g
+(you)f(can)h(combine)f(this)i(resolv)o(er)e(with)h(another)f(resolv)o
+(er)g(that)h(can)g(handle)f(inner)g(references)396 2770
+y(\(such)h(as)h(resolv)o(e_as_\002le\);)d(see)j(class)g('combine')d
+(belo)n(w)-5 b(.)19 b(-)h(If)g(you)g(pass)g(the)h Fq(~fixenc)e
+Fv(ar)o(gument,)f(the)i(encoding)396 2878 y(of)g(the)g(channel)f(is)i
+(set)g(to)g(the)f(passed)g(v)n(alue,)f(re)o(gardless)g(of)h(an)o(y)f
+(auto-recognition)e(or)j(an)o(y)f(XML)h(declaration.)f(-)h(If)396
+2986 y Fq(~auto_close)43 b(=)i(true)20 b Fv(\(which)f(is)i(the)g(def)o
+(ault\),)e(the)h(channel)f(is)i(closed)f(after)g(use.)g(If)g
+Fq(~auto_close)43 b(=)396 3094 y(false)p Fv(,)20 b(the)g(channel)f(is)i
+(left)g(open.)396 3315 y Fq(class)44 b(resolve_read_any_channel)d(:)576
+3413 y(?auto_close:bool)h(->)576 3510 y(channel_of_id:\(ext_id)f(->)j
+(\(in_channel)f(*)i(encoding)f(option\)\))f(->)755 3607
+y(resolver)396 3798 y Fv(This)21 b(resolv)o(er)e(calls)h(the)h
+(function)d Fq(~channel_of_id)h Fv(to)h(open)f(a)i(ne)n(w)f(channel)f
+(for)g(the)h(passed)g Fq(ext_id)p Fv(.)g(This)396 3906
+y(function)f(must)h(either)g(return)f(the)h(channel)f(and)h(the)g
+(encoding,)e(or)i(it)g(must)h(f)o(ail)f(with)h(Not_competent.)c(The)396
+4014 y(function)i(must)h(return)f Fq(None)h Fv(as)h(encoding)d(if)j
+(the)f(def)o(ault)f(mechanism)g(to)h(recognize)f(the)h(encoding)e
+(should)h(be)396 4122 y(used.)g(It)i(must)e(return)g
+Fq(Some)44 b(e)20 b Fv(if)g(it)h(is)f(already)f(kno)n(wn)f(that)i(the)g
+(encoding)d(of)j(the)f(channel)g(is)i Fq(e)p Fv(.)e(If)h
+Fq(~auto_close)396 4230 y(=)45 b(true)19 b Fv(\(which)g(is)h(the)f(def)
+o(ault\),)f(the)i(channel)e(is)i(closed)f(after)g(use.)h(If)f
+Fq(~auto_close)43 b(=)h(false)p Fv(,)19 b(the)h(channel)e(is)396
+4337 y(left)j(open.)396 4559 y Fq(class)44 b(resolve_read_url_channel)d
+(:)576 4656 y(?base_url:Neturl.url)g(->)576 4753 y(?auto_close:bool)h
+(->)576 4851 y(url_of_id:\(ext_id)g(->)i(Neturl.url\))f(->)p
+Black 3800 5278 a Fr(78)p Black eop
+%%Page: 79 79
+79 78 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 576 579 a Fq
+(channel_of_url:\(Neturl.url)40 b(->)45 b(\(in_channel)e(*)h(encoding)g
+(option\)\))f(->)755 676 y(resolver)396 867 y Fv(When)20
+b(this)h(resolv)o(er)e(gets)h(an)h(ID)f(to)g(read)g(from,)f(it)i(calls)
+g(the)f(function)e Fq(~url_of_id)h Fv(to)i(get)f(the)g(corresponding)
+396 975 y(URL.)h(This)f(URL)h(may)f(be)g(a)g(relati)n(v)o(e)g(URL;)h
+(ho)n(we)n(v)o(er)m(,)c(a)k(URL)g(scheme)f(must)g(be)g(used)g(which)f
+(contains)h(a)h(path.)396 1083 y(The)f(resolv)o(er)f(con)m(v)o(erts)g
+(the)h(URL)h(to)f(an)g(absolute)f(URL)i(if)g(necessary)-5
+b(.)19 b(The)g(second)h(function,)396 1191 y Fq(~channel_of_url)p
+Fv(,)e(is)j(fed)f(with)h(the)f(absolute)f(URL)i(as)g(input.)e(This)h
+(function)f(opens)g(the)i(resource)e(to)h(read)396 1299
+y(from,)f(and)h(returns)f(the)h(channel)f(and)h(the)g(encoding)e(of)i
+(the)g(resource.)396 1448 y(Both)g(functions,)f Fq(~url_of_id)g
+Fv(and)h Fq(~channel_of_url)p Fv(,)e(can)i(raise)g(Not_competent)e(to)i
+(indicate)g(that)g(the)396 1556 y(object)g(is)h(not)f(able)g(to)g(read)
+g(from)f(the)h(speci\002ed)g(resource.)f(Ho)n(we)n(v)o(er)m(,)f(there)i
+(is)h(a)f(dif)n(ference:)f(A)h(Not_competent)396 1664
+y(from)f Fq(~url_of_id)g Fv(is)j(left)e(as)h(it)g(is,)g(b)n(ut)f(a)h
+(Not_competent)c(from)i Fq(~channel_of_url)g Fv(is)i(con)m(v)o(erted)c
+(to)396 1772 y(Not_resolv)n(able.)h(So)i(only)g Fq(~url_of_id)f
+Fv(decides)h(which)f(URLs)i(are)f(accepted)g(by)f(the)i(resolv)o(er)e
+(and)g(which)h(not.)396 1921 y(The)g(function)f Fq(~channel_of_url)f
+Fv(must)i(return)f Fq(None)h Fv(as)h(encoding)d(if)j(the)f(def)o(ault)f
+(mechanism)g(to)i(recognize)396 2029 y(the)f(encoding)f(should)g(be)h
+(used.)g(It)g(must)g(return)f Fq(Some)44 b(e)21 b Fv(if)g(it)f(is)i
+(already)d(kno)n(wn)f(that)j(the)f(encoding)e(of)i(the)396
+2137 y(channel)f(is)i Fq(e)p Fv(.)396 2287 y(If)f Fq(~auto_close)43
+b(=)i(true)20 b Fv(\(which)f(is)i(the)g(def)o(ault\),)e(the)h(channel)f
+(is)i(closed)f(after)g(use.)g(If)g Fq(~auto_close)43
+b(=)396 2395 y(false)p Fv(,)20 b(the)g(channel)f(is)i(left)g(open.)396
+2544 y(Objects)f(of)g(this)g(class)h(contain)e(a)h(base)g(URL)g(relati)
+n(v)o(e)f(to)h(which)g(relati)n(v)o(e)f(URLs)h(are)g(interpreted.)e
+(When)i(creating)e(a)396 2652 y(ne)n(w)i(object,)g(you)f(can)h(specify)
+f(the)i(base)f(URL)h(by)f(passing)f(it)i(as)g Fq(~base_url)e
+Fv(ar)o(gument.)f(When)i(an)g(e)o(xisting)396 2760 y(object)g(is)h
+(cloned,)e(the)h(base)g(URL)h(of)f(the)g(clone)g(is)h(the)f(URL)h(of)f
+(the)g(original)f(object.)h(-)g(Note)g(that)g(the)h(term)f("base)396
+2868 y(URL")h(has)f(a)h(strict)g(de\002nition)e(in)h(RFC)i(1808.)396
+3089 y Fq(class)44 b(resolve_read_this_string)d(:)576
+3187 y(?id:ext_id)i(->)576 3284 y(?fixenc:encoding)f(->)576
+3381 y(string)h(->)755 3478 y(resolver)396 3669 y Fv(Reads)21
+b(from)e(the)h(passed)g(string.)g(If)g(the)g Fq(~id)h
+Fv(ar)o(gument)c(is)k(passed)g(to)f(the)g(object,)g(the)g(created)f
+(resolv)o(er)g(accepts)396 3777 y(only)h(this)g(ID.)g(Otherwise)g(all)h
+(IDs)g(are)f(accepted.)f(-)h(Once)g(the)g(resolv)o(er)f(has)i(been)e
+(cloned,)g(it)i(does)f(not)g(accept)g(an)o(y)396 3885
+y(ID.)g(This)h(means)f(that)g(this)h(resolv)o(er)e(cannot)g(handle)g
+(inner)g(references)g(to)h(e)o(xternal)f(entities.)i(Note)f(that)g(you)
+f(can)396 3993 y(combine)g(this)i(resolv)o(er)e(with)h(another)f
+(resolv)o(er)g(that)h(can)g(handle)f(inner)g(references)g(\(such)h(as)h
+(resolv)o(e_as_\002le\);)396 4101 y(see)g(class)g('combine')d(belo)n(w)
+-5 b(.)19 b(-)i(If)f(you)f(pass)i(the)f Fq(~fixenc)f
+Fv(ar)o(gument,)f(the)i(encoding)e(of)i(the)g(string)g(is)h(set)g(to)g
+(the)396 4209 y(passed)f(v)n(alue,)g(re)o(gardless)e(of)i(an)o(y)g
+(auto-recognition)c(or)k(an)o(y)f(XML)i(declaration.)396
+4430 y Fq(class)44 b(resolve_read_any_string)d(:)576
+4527 y(string_of_id:\(ext_id)g(->)k(\(string)e(*)i(encoding)e
+(option\)\))h(->)755 4625 y(resolver)p Black 3800 5278
+a Fr(79)p Black eop
+%%Page: 80 80
+80 79 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(This)h(resolv)o(er)e
+(calls)h(the)h(function)d Fq(~string_of_id)h Fv(to)h(get)g(the)g
+(string)g(for)g(the)g(passed)g Fq(ext_id)p Fv(.)g(This)g(function)396
+687 y(must)g(either)g(return)f(the)i(string)e(and)h(the)g(encoding,)e
+(or)i(it)h(must)f(f)o(ail)h(with)f(Not_competent.)e(The)h(function)g
+(must)396 795 y(return)g Fq(None)h Fv(as)h(encoding)d(if)j(the)f(def)o
+(ault)g(mechanism)e(to)j(recognize)d(the)i(encoding)f(should)g(be)h
+(used.)g(It)g(must)396 903 y(return)f Fq(Some)44 b(e)21
+b Fv(if)g(it)f(is)i(already)d(kno)n(wn)f(that)j(the)f(encoding)e(of)i
+(the)g(string)g(is)h Fq(e)p Fv(.)396 1124 y Fq(class)44
+b(resolve_as_file)f(:)576 1222 y(?file_prefix:[)f(`Not_recognized)g(|)j
+(`Allowed)f(|)g(`Required)g(])g(->)576 1319 y(?host_prefix:[)e
+(`Not_recognized)g(|)j(`Allowed)f(|)g(`Required)g(])g(->)576
+1416 y(?system_encoding:encoding)c(->)576 1513 y(?url_of_id:\(ext_id)h
+(->)k(Neturl.url\))e(->)576 1610 y(?channel_of_url:)f(\(Neturl.url)h
+(->)h(\(in_channel)f(*)i(encoding)e(option\)\))h(->)576
+1707 y(unit)g(->)755 1804 y(resolver)396 1995 y Fv(Reads)21
+b(from)e(the)h(local)g(\002le)h(system.)f(Ev)o(ery)f(\002le)i(name)f
+(is)h(interpreted)d(as)j(\002le)g(name)f(of)f(the)i(local)f(\002le)h
+(system,)f(and)396 2103 y(the)g(referred)f(\002le)i(is)g(read.)396
+2253 y(The)f(full)g(form)f(of)h(a)h(\002le)g(URL)g(is:)g
+(\002le://host/path,)e(where)h('host')f(speci\002es)i(the)f(host)g
+(system)g(where)g(the)g(\002le)396 2361 y(identi\002ed)g('path')f
+(resides.)h(host)g(=)g("")h(or)f(host)g(=)h("localhost")e(are)h
+(accepted;)f(other)h(v)n(alues)f(will)i(raise)396 2468
+y(Not_competent.)d(The)i(standard)f(for)g(\002le)i(URLs)g(is)g
+(de\002ned)e(in)i(RFC)g(1738.)396 2618 y(Option)f Fq(~file_prefix)p
+Fv(:)e(Speci\002es)j(ho)n(w)f(the)g("\002le:")h(pre\002x)e(of)h(\002le)
+h(names)f(is)h(handled:)p Black 396 2850 a Ft(\225)p
+Black 60 w Fq(`Not_recognized:)p Fv(The)c(pre\002x)j(is)h(not)f
+(recognized.)p Black 396 2958 a Ft(\225)p Black 60 w
+Fq(`Allowed:)g Fv(The)f(pre\002x)h(is)h(allo)n(wed)e(b)n(ut)i(not)f
+(required)e(\(the)i(def)o(ault\).)p Black 396 3066 a
+Ft(\225)p Black 60 w Fq(`Required:)f Fv(The)h(pre\002x)g(is)h
+(required.)396 3257 y(Option)f Fq(~host_prefix:)e Fv(Speci\002es)j(ho)n
+(w)e(the)i("//host")f(phrase)f(of)h(\002le)h(names)f(is)h(handled:)p
+Black 396 3490 a Ft(\225)p Black 60 w Fq(`Not_recognized:)p
+Fv(The)c(pre\002x)j(is)h(not)f(recognized.)p Black 396
+3598 a Ft(\225)p Black 60 w Fq(`Allowed:)g Fv(The)f(pre\002x)h(is)h
+(allo)n(wed)e(b)n(ut)i(not)f(required)e(\(the)i(def)o(ault\).)p
+Black 396 3706 a Ft(\225)p Black 60 w Fq(`Required:)f
+Fv(The)h(pre\002x)g(is)h(required.)396 3896 y(Option)f
+Fq(~system_encoding:)e Fv(Speci\002es)i(the)g(encoding)e(of)i(\002le)h
+(names)f(of)g(the)g(local)g(\002le)h(system.)f(Def)o(ault:)396
+4004 y(UTF-8.)396 4154 y(Options)g Fq(~url_of_id)p Fv(,)f
+Fq(~channel_of_url)p Fv(:)f(Not)i(for)g(the)g(casual)g(user!)396
+4376 y Fq(class)44 b(combine)g(:)576 4473 y(?prefer:resolver)e(->)576
+4570 y(resolver)h(list)h(->)755 4667 y(resolver)p Black
+3800 5278 a Fr(80)p Black eop
+%%Page: 81 81
+81 80 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(Combines)g(se)n(v)o
+(eral)f(resolv)o(er)g(objects.)h(If)g(a)h(concrete)e(entity)g(with)i
+(an)f Fq(ext_id)g Fv(is)h(to)f(be)g(opened,)f(the)h(combined)396
+687 y(resolv)o(er)f(tries)i(the)f(contained)f(resolv)o(ers)g(in)h(turn)
+g(until)g(a)g(resolv)o(er)f(accepts)h(opening)f(the)h(entity)g(\(i.e.)g
+(it)g(does)g(not)396 795 y(raise)h(Not_competent)c(on)j(open_in\).)396
+944 y(Clones:)h(If)f(the)g('clone')f(method)g(is)i(in)m(v)n(ok)o(ed)d
+(before)h('open_in',)e(all)k(contained)e(resolv)o(ers)g(are)h(cloned)f
+(separately)396 1052 y(and)h(again)f(combined.)f(If)i(the)g('clone')f
+(method)g(is)i(in)m(v)n(ok)o(ed)e(after)g('open_in')f(\(i.e.)i(while)g
+(the)g(resolv)o(er)f(is)i(open\),)396 1160 y(additionally)e(the)h
+(clone)f(of)h(the)h(acti)n(v)o(e)e(resolv)o(er)g(is)i(\003agged)f(as)g
+(being)g(preferred,)d(i.e.)k(it)f(is)i(tried)e(\002rst.)-2
+1662 y Fx(4.3.)39 b(The)g(DTD)g(c)m(lasses)396 1841 y
+Fr(Sorry)-5 b(,)21 b(not)f(yet)g(written.)h(P)-7 b(erhaps)20
+b(the)g(interface)g(de\002nition)e(of)j(Pxp_dtd)d(e)n(xpr)m(esses)j
+(the)f(same:)396 2063 y Fq(\(****************************************)o
+(******)o(******)o(******)o(******)o(*****)o(*\))396
+2160 y(\(*)3048 b(*\))396 2257 y(\(*)45 b(Pxp_dtd:)2643
+b(*\))396 2354 y(\(*)224 b(Object)44 b(model)g(of)g(document)g(type)g
+(declarations)939 b(*\))396 2452 y(\(*)3048 b(*\))396
+2549 y(\(****************************************)o(******)o(******)o
+(******)o(******)o(*****)o(*\))396 2743 y(\(*)45 b
+(======================================)o(======)o(======)o(======)o
+(======)o(=====)o(===)441 2840 y(*)g(OVERVIEW)441 2937
+y(*)441 3034 y(*)g(class)f(dtd)g(...............)e(represents)i(the)g
+(whole)g(DTD,)g(including)f(element)441 3132 y(*)1210
+b(declarations,)43 b(entity)h(declarations,)f(notation)441
+3229 y(*)1210 b(declarations,)43 b(and)h(processing)g(instructions)441
+3326 y(*)h(class)f(dtd_element)f(.......)g(represents)h(an)g(element)g
+(declaration)f(consisting)441 3423 y(*)1210 b(of)45 b(a)g(content)e
+(model)h(and)h(an)f(attribute)f(list)441 3520 y(*)1210
+b(declaration)441 3617 y(*)45 b(class)f(dtd_notation)f(......)g
+(represents)h(a)g(notation)g(declaration)441 3714 y(*)h(class)f
+(proc_instruction)e(..)i(represents)g(a)g(processing)f(instruction)441
+3811 y(*)i(======================================)o(======)o(======)o
+(======)o(======)o(=====)o(===)441 3909 y(*)441 4006
+y(*\))396 4297 y(class)f(dtd)h(:)486 4394 y(\(*)f(Creation:)531
+4491 y(*)134 b(new)44 b(dtd)531 4589 y(*)g(creates)g(a)h(new,)f(empty)g
+(DTD)g(object)g(without)g(any)g(declaration,)f(without)g(a)i(root)531
+4686 y(*)f(element,)g(without)g(an)g(ID.)531 4783 y(*\))p
+Black 3800 5278 a Fr(81)p Black eop
+%%Page: 82 82
+82 81 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 486 579 a Fq
+(Pxp_types.collect_warnings)40 b(-)p Fo(>)486 676 y Fq
+(Pxp_types.rep_encoding)h(-)p Fo(>)486 773 y Fq(object)576
+870 y(method)i(root)i(:)f(string)g(option)665 967 y(\(*)h(get)f(the)g
+(name)h(of)f(the)g(root)h(element)e(if)i(present)e(*\))576
+1162 y(method)g(set_root)h(:)h(string)e(-)p Fo(>)i Fq(unit)665
+1259 y(\(*)g(set)f(the)g(name)h(of)f(the)g(root)h(element.)e(This)h
+(method)g(can)g(be)h(invoked)710 1356 y(*)g(only)f(once)710
+1453 y(*\))576 1647 y(method)f(id)i(:)g(Pxp_types.dtd_id)d(option)665
+1745 y(\(*)j(get)f(the)g(identifier)g(for)g(this)g(DTD)g(*\))576
+1939 y(method)f(set_id)h(:)h(Pxp_types.dtd_id)d(-)p Fo(>)i
+Fq(unit)665 2036 y(\(*)h(set)f(the)g(identifier.)f(This)i(method)e(can)
+i(be)f(invoked)g(only)g(once)g(*\))576 2230 y(method)f(encoding)h(:)h
+(Pxp_types.rep_encoding)665 2327 y(\(*)g(returns)e(the)i(encoding)e
+(used)h(for)h(character)e(representation)g(*\))576 2619
+y(method)g(allow_arbitrary)g(:)h(unit)665 2716 y(\(*)h(After)f(this)g
+(method)g(has)g(been)g(invoked,)g(the)g(ob-)396 2813
+y(ject)g(changes)g(its)g(behaviour:)710 2910 y(*)h(-)f(elements)g(and)g
+(notations)g(that)g(have)g(not)g(been)g(added)g(may)h(be)f(used)g(in)h
+(an)710 3007 y(*)134 b(arbitrary)44 b(way;)g(the)g(methods)g("element")
+f(and)i("notation")e(indicate)g(this)710 3104 y(*)134
+b(by)45 b(raising)f(Undeclared)f(instead)g(of)i(Validation_error.)710
+3202 y(*\))576 3396 y(method)e(disallow_arbitrary)f(:)j(unit)576
+3590 y(method)e(arbitrary_allowed)f(:)j(bool)665 3687
+y(\(*)g(Returns)e(whether)h(arbitrary)f(contents)h(are)g(allowed)g(or)g
+(not.)h(*\))576 3882 y(method)e(standalone_declaration)f(:)i(bool)665
+3979 y(\(*)h(Whether)e(there)h(is)h(a)g('standalone')d(declaration)h
+(or)i(not.)f(Strictly)710 4076 y(*)h(speaking,)e(this)h(declaration)f
+(is)i(not)f(part)g(of)h(the)f(DTD,)g(but)h(it)f(is)710
+4173 y(*)h(included)e(here)h(because)g(of)h(practical)e(reasons.)710
+4270 y(*)i(If)f(not)h(set,)f(this)g(property)f(defaults)h(to)g
+('false'.)710 4367 y(*\))576 4561 y(method)f
+(set_standalone_declaration)e(:)k(bool)f(-)p Fo(>)g Fq(unit)665
+4659 y(\(*)h(Sets)f(the)g('standalone')f(declaration.)g(*\))p
+Black 3800 5278 a Fr(82)p Black eop
+%%Page: 83 83
+83 82 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 576 579 a Fq(method)43
+b(add_element)g(:)i(dtd_element)e(-)p Fo(>)h Fq(unit)665
+676 y(\(*)h(add)f(the)g(given)g(element)g(declaration)f(to)i(this)f
+(DTD.)g(Raises)g(Not_found)710 773 y(*)h(if)f(there)g(is)h(already)e
+(an)i(element)f(declaration)f(with)h(the)g(same)g(name.)710
+870 y(*\))576 1065 y(method)f(add_gen_entity)g(:)i(Pxp_entity.entity)d
+(-)p Fo(>)i Fq(bool)g(-)p Fo(>)g Fq(unit)665 1162 y(\(*)h
+(add_gen_entity)d(e)j(extdecl:)710 1259 y(*)g(add)f(the)g(entity)g('e')
+h(as)f(general)g(entity)g(to)g(this)g(DTD)h(\(general)e(entities)710
+1356 y(*)i(are)f(those)g(represented)f(by)i(&name;\).)e(If)i(there)f
+(is)g(already)g(a)g(declaration)710 1453 y(*)h(with)f(the)g(same)g
+(name,)g(the)h(second)f(definition)f(is)h(ignored;)g(as)g(excep-)396
+1550 y(tion)g(from)710 1647 y(*)h(this)f(rule,)g(entities)f(with)i
+(names)f("lt",)g("gt",)g("amp",)f("quot",)h(and)g("apos")710
+1745 y(*)h(may)f(only)g(be)h(redeclared)e(with)h(a)h(definition)e(that)
+h(is)h(equivalent)e(to)h(the)710 1842 y(*)h(standard)e(definition;)g
+(otherwise)h(a)g(Validation_error)e(is)j(raised.)710
+1939 y(*)710 2036 y(*)g('extdecl':)e('true')h(indicates)f(that)h(the)h
+(entity)e(declaration)g(occurs)h(in)710 2133 y(*)h(an)f(external)g
+(entity.)f(\(Used)h(for)h(the)f(standalone)f(check.\))710
+2230 y(*\))576 2424 y(method)g(add_par_entity)g(:)i(Pxp_entity.entity)d
+(-)p Fo(>)i Fq(unit)665 2522 y(\(*)h(add)f(the)g(given)g(entity)g(as)h
+(parameter)e(entity)h(to)g(this)h(DTD)f(\(parameter)710
+2619 y(*)h(entities)e(are)i(those)f(represented)f(by)h(\045name;\).)g
+(If)g(there)g(is)h(already)e(a)710 2716 y(*)i(declaration)e(with)h(the)
+g(same)g(name,)g(the)h(second)f(definition)f(is)h(ignored.)710
+2813 y(*\))576 3007 y(method)f(add_notation)g(:)i(dtd_notation)e(-)p
+Fo(>)h Fq(unit)665 3104 y(\(*)h(add)f(the)g(given)g(notation)g(to)g
+(this)h(DTD.)f(If)g(there)g(is)h(al-)396 3202 y(ready)f(a)h
+(declaration)710 3299 y(*)g(with)f(the)g(same)g(name,)g(a)h
+(Validation_error)d(is)j(raised.)710 3396 y(*\))576 3590
+y(method)e(add_pinstr)h(:)g(proc_instruction)e(-)p Fo(>)j
+Fq(unit)665 3687 y(\(*)g(add)f(the)g(given)g(processing)g(instruction)f
+(to)h(this)g(DTD.)g(*\))576 3882 y(method)f(element)h(:)h(string)f(-)p
+Fo(>)g Fq(dtd_element)665 3979 y(\(*)h(looks)f(up)g(the)h(element)e
+(declaration)g(with)h(the)h(given)f(name.)g(Raises)710
+4076 y(*)h(Validation_error)d(if)i(the)h(element)e(can-)396
+4173 y(not)i(be)f(found.)g(\(If)g("allow_arbitrary")710
+4270 y(*)h(has)f(been)g(invoked)g(before,)g(Unrestricted)e(is)j(raised)
+f(instead.\))710 4367 y(*\))576 4561 y(method)f(element_names)g(:)i
+(string)f(list)665 4659 y(\(*)h(returns)e(the)i(list)f(of)g(the)h
+(names)f(of)g(all)h(element)e(declarations.)g(*\))576
+4853 y(method)g(gen_entity)h(:)g(string)g(-)p Fo(>)g
+Fq(\(Pxp_entity.entity)e(*)j(bool\))p Black 3800 5278
+a Fr(83)p Black eop
+%%Page: 84 84
+84 83 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 665 579 a Fq(\(*)45
+b(let)f(e,)h(extdecl)e(=)i(obj)f(#)h(gen_entity)e(n:)710
+676 y(*)i(looks)f(up)g(the)h(general)e(entity)h('e')g(with)h(the)f
+(name)g('n'.)g(Raises)710 773 y(*)h(WF_error)e(if)i(the)f(entity)g
+(cannot)g(be)g(found.)710 870 y(*)h('extdecl':)e(indicates)g(whether)h
+(the)g(entity)g(declaration)f(occured)h(in)g(an)710 967
+y(*)h(external)e(entity.)710 1065 y(*\))576 1259 y(method)g
+(gen_entity_names)g(:)h(string)g(list)665 1356 y(\(*)h(returns)e(the)i
+(list)f(of)g(all)h(general)e(entity)h(names)g(*\))576
+1550 y(method)f(par_entity)h(:)g(string)g(-)p Fo(>)g
+Fq(Pxp_entity.entity)665 1647 y(\(*)h(looks)f(up)g(the)h(parameter)e
+(entity)h(with)g(the)g(given)g(name.)g(Raises)710 1745
+y(*)h(WF_error)e(if)i(the)f(entity)g(cannot)g(be)g(found.)710
+1842 y(*\))576 2036 y(method)f(par_entity_names)g(:)h(string)g(list)665
+2133 y(\(*)h(returns)e(the)i(list)f(of)g(all)h(parameter)e(entity)h
+(names)g(*\))576 2327 y(method)f(notation)h(:)h(string)e(-)p
+Fo(>)i Fq(dtd_notation)665 2424 y(\(*)g(looks)f(up)g(the)h(notation)e
+(declaration)g(with)h(the)h(given)f(name.)g(Raises)710
+2522 y(*)h(Validation_error)d(if)i(the)h(notation)e(can-)396
+2619 y(not)i(be)f(found.)g(\(If)g("allow_arbitrary")710
+2716 y(*)h(has)f(been)g(invoked)g(before,)g(Unrestricted)e(is)j(raised)
+f(instead.\))710 2813 y(*\))576 3007 y(method)f(notation_names)g(:)i
+(string)e(list)665 3104 y(\(*)i(Returns)e(the)i(list)f(of)g(the)h
+(names)f(of)g(all)h(added)f(notations)f(*\))576 3299
+y(method)g(pinstr)h(:)h(string)f(-)p Fo(>)g Fq(proc_instruction)e(list)
+665 3396 y(\(*)j(looks)f(up)g(all)h(processing)e(instructions)g(with)h
+(the)g(given)g(target.)710 3493 y(*)h(The)f("target")g(is)g(the)g
+(identifier)g(following)f(")p Fo(<)p Fq(?".)710 3590
+y(*)i(Note:)f(It)g(is)h(not)f(possible)g(to)g(find)g(out)h(the)f(exact)
+g(position)f(of)i(the)710 3687 y(*)g(processing)e(instruction.)710
+3784 y(*\))576 3979 y(method)g(pinstr_names)g(:)i(string)f(list)665
+4076 y(\(*)h(Returns)e(the)i(list)f(of)g(the)h(names)f(\(targets\))f
+(of)i(all)f(added)g(pinstrs)f(*\))576 4270 y(method)g(validate)h(:)h
+(unit)665 4367 y(\(*)g(ensures)e(that)i(the)f(DTD)g(is)h(valid.)f(This)
+g(method)g(is)g(optimized)f(such)h(that)710 4464 y(*)h(actual)f
+(validation)f(is)h(only)g(performed)g(if)g(DTD)h(has)f(changed.)710
+4561 y(*)h(If)f(the)h(DTD)f(is)g(invalid,)g(mostly)g(a)g
+(Validation_error)f(is)h(raised,)710 4659 y(*)h(but)f(other)g
+(exceptions)f(are)i(possible,)e(too.)710 4756 y(*\))p
+Black 3800 5278 a Fr(84)p Black eop
+%%Page: 85 85
+85 84 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 576 579 a Fq(method)43
+b(only_deterministic_models)e(:)k(unit)665 676 y(\(*)g(Succeeds)e(if)i
+(all)f(regexp)g(content)g(models)f(are)i(deterministic.)710
+773 y(*)g(Otherwise)e(Validation_error.)710 870 y(*\))576
+1065 y(method)g(write)h(:)h(Pxp_types.output_stream)c(-)p
+Fo(>)j Fq(Pxp_types.encoding)e(-)p Fo(>)j Fq(bool)f(-)396
+1162 y Fo(>)h Fq(unit)665 1259 y(\(*)g(write_compact_as_latin1)c(os)j
+(enc)h(doctype:)710 1356 y(*)g(Writes)f(the)g(DTD)g(as)h('enc'-encoded)
+d(string)i(to)h('os'.)f(If)g('doctype',)f(a)710 1453
+y(*)i(DTD)f(like)g Fo(<)p Fq(!DOCTYPE)f(root)i([)f(...)h(])p
+Fo(>)f Fq(is)g(written.)g(If)g('not)h(doctype',)710 1550
+y(*)g(only)f(the)g(declarations)f(are)h(written)g(\(the)g(material)g
+(within)g(the)710 1647 y(*)h(square)f(brackets\).)710
+1745 y(*\))576 1939 y(method)f(write_compact_as_latin1)e(:)k
+(Pxp_types.output_stream)c(-)p Fo(>)j Fq(bool)h(-)p Fo(>)f
+Fq(unit)665 2036 y(\(*)h(DEPRECATED)e(METHOD;)h(included)f(only)h(to)h
+(keep)f(compatibility)f(with)710 2133 y(*)i(older)f(versions)f(of)i
+(the)f(parser)710 2230 y(*\))576 2522 y
+(\(*---------------------------*\))576 2619 y(method)f(invalidate)h(:)g
+(unit)665 2716 y(\(*)h(INTERNAL)e(METHOD)h(*\))576 2813
+y(method)f(warner)h(:)h(Pxp_types.collect_warnings)665
+2910 y(\(*)g(INTERNAL)e(METHOD)h(*\))486 3007 y(end)396
+3396 y(\(*)h(--------------------------------------)o(------)o(---)39
+b(*\))396 3590 y(and)45 b(dtd_element)e(:)h(dtd)h(-)p
+Fo(>)f Fq(string)g(-)p Fo(>)486 3687 y Fq(\(*)g(Creation:)531
+3784 y(*)134 b(new)44 b(dtd_element)f(init_dtd)h(init_name:)531
+3882 y(*)g(creates)g(a)h(new)f(dtd_element)f(object)h(for)g(init_dtd)g
+(with)g(init_name.)531 3979 y(*)g(The)h(strings)e(are)i(represented)e
+(in)h(the)h(same)f(encoding)f(as)i(init_dtd.)531 4076
+y(*\))486 4173 y(object)576 4367 y(method)e(name)i(:)f(string)665
+4464 y(\(*)h(returns)e(the)i(name)f(of)g(the)h(declared)e(element)h
+(*\))576 4659 y(method)f(externally_declared)f(:)j(bool)665
+4756 y(\(*)g(returns)e(whether)h(the)g(element)g(declaration)f(occurs)h
+(in)g(an)h(external)710 4853 y(*)g(entity.)p Black 3800
+5278 a Fr(85)p Black eop
+%%Page: 86 86
+86 85 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 710 579 a Fq(*\))576
+773 y(method)43 b(content_model)g(:)i(Pxp_types.content_model_type)665
+870 y(\(*)g(get)f(the)g(content)g(model)g(of)h(this)f(element)f
+(declaration,)g(or)i(Unspecified)e(*\))576 1065 y(method)g(content_dfa)
+g(:)i(Pxp_dfa.dfa_definition)c(option)665 1162 y(\(*)k(return)f(the)g
+(DFA)g(of)h(the)f(content)g(model)g(if)g(there)g(is)h(a)f(DFA,)h(or)f
+(None.)710 1259 y(*)h(A)f(DFA)h(exists)f(only)g(for)g(regexp)g(style)g
+(content)g(models)f(which)h(are)710 1356 y(*)h(deterministic.)710
+1453 y(*\))576 1647 y(method)e(set_cm_and_extdecl)f(:)j
+(Pxp_types.content_model_type)40 b(-)p Fo(>)k Fq(bool)h(-)p
+Fo(>)f Fq(unit)665 1745 y(\(*)h(set_cm_and_extdecl)d(cm)i(extdecl:)710
+1842 y(*)h(set)f(the)g(content)g(model)g(to)h('cm'.)f(Once)g(the)g
+(content)g(model)g(is)g(not)710 1939 y(*)h(Unspecified,)e(it)h(cannot)g
+(be)g(set)h(to)f(a)h(different)e(value)h(again.)710 2036
+y(*)h(Furthermore,)e(it)h(is)h(set)f(whether)g(the)g(element)g(occurs)f
+(in)i(an)f(external)710 2133 y(*)h(entity)f(\('extdecl'\).)710
+2230 y(*\))576 2424 y(method)f(encoding)h(:)h(Pxp_types.rep_encoding)
+665 2522 y(\(*)g(Return)f(the)g(encoding)f(of)i(the)f(strings)g(*\))576
+2716 y(method)f(allow_arbitrary)g(:)h(unit)665 2813 y(\(*)h(After)f
+(this)g(method)g(has)g(been)g(invoked,)g(the)g(ob-)396
+2910 y(ject)g(changes)g(its)g(behaviour:)710 3007 y(*)h(-)f(attributes)
+g(that)g(have)g(not)g(been)g(added)g(may)h(be)f(used)g(in)h(an)710
+3104 y(*)134 b(arbitrary)44 b(way;)g(the)g(method)g("attribute")f
+(indicates)g(this)710 3202 y(*)134 b(by)45 b(raising)f(Undeclared)f
+(instead)g(of)i(Validation_error.)710 3299 y(*\))576
+3493 y(method)e(disallow_arbitrary)f(:)j(unit)576 3687
+y(method)e(arbitrary_allowed)f(:)j(bool)665 3784 y(\(*)g(Returns)e
+(whether)h(arbitrary)f(attributes)h(are)g(allowed)g(or)g(not.)g(*\))576
+3979 y(method)f(attribute)h(:)g(string)g(-)p Fo(>)1517
+4076 y Fq(Pxp_types.att_type)e(*)j(Pxp_types.att_default)665
+4173 y(\(*)g(get)f(the)g(type)h(and)f(default)g(value)g(of)g(a)h
+(declared)e(attribute,)g(or)i(raise)710 4270 y(*)g(Validation_error)d
+(if)i(the)h(attribute)e(does)h(not)h(exist.)710 4367
+y(*)g(If)f('arbitrary_allowed',)e(the)i(exception)f(Undeclared)h(is)g
+(raised)g(instead)710 4464 y(*)h(of)f(Validation_error.)710
+4561 y(*\))576 4756 y(method)f
+(attribute_violates_standalone_declaration)38 b(:)1069
+4853 y(string)44 b(-)p Fo(>)g Fq(string)g(option)g(-)p
+Fo(>)g Fq(bool)p Black 3798 5278 a Fr(86)p Black eop
+%%Page: 87 87
+87 86 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 665 579 a Fq(\(*)45
+b(attribute_violates_standalone_declarat)o(ion)39 b(name)44
+b(v:)710 676 y(*)h(Checks)f(whether)f(the)i(attribute)e('name')h
+(violates)f(the)i("standalone")710 773 y(*)g(declaration)e(if)h(it)h
+(has)f(value)g('v'.)710 870 y(*)h(The)f(method)g(returns)g(true)g(if:)
+710 967 y(*)h(-)f(The)h(attribute)e(declaration)g(occurs)h(in)g(an)h
+(external)e(entity,)710 1065 y(*)i(and)f(if)h(one)f(of)g(the)h(two)f
+(conditions)f(holds:)710 1162 y(*)i(-)f(v)h(=)g(None,)f(and)g(there)g
+(is)h(a)f(default)g(for)g(the)h(attribute)e(value)710
+1259 y(*)i(-)f(v)h(=)g(Some)f(s,)g(and)h(the)f(type)g(of)h(the)f
+(attribute)f(is)i(not)f(CDATA,)710 1356 y(*)134 b(and)45
+b(s)f(changes)g(if)h(normalized)e(according)g(to)i(the)f(rules)g(of)g
+(the)710 1453 y(*)134 b(attribute)44 b(type.)710 1550
+y(*)710 1647 y(*)h(The)f(method)g(raises)g(Validation_error)e(if)i(the)
+h(attribute)e(does)h(not)g(exist.)710 1745 y(*)h(If)f
+('arbitrary_allowed',)e(the)i(exception)f(Undeclared)h(is)g(raised)g
+(instead)710 1842 y(*)h(of)f(Validation_error.)710 1939
+y(*\))576 2133 y(method)f(attribute_names)g(:)h(string)g(list)665
+2230 y(\(*)h(get)f(the)g(list)h(of)f(all)g(declared)g(attributes)f(*\))
+576 2424 y(method)g(names_of_required_attributes)e(:)j(string)g(list)
+665 2522 y(\(*)h(get)f(the)g(list)h(of)f(all)g(attributes)g(that)g(are)
+g(specified)f(as)i(required)710 2619 y(*)g(attributes)710
+2716 y(*\))576 2910 y(method)e(id_attribute_name)f(:)j(string)f(option)
+665 3007 y(\(*)h(Returns)e(the)i(name)f(of)g(the)h(attribute)e(with)h
+(type)g(ID,)h(or)f(None.)g(*\))576 3202 y(method)f
+(idref_attribute_names)f(:)i(string)g(list)665 3299 y(\(*)h(Returns)e
+(the)i(names)f(of)g(the)h(attributes)e(with)h(type)g(IDREF)g(or)h
+(IDREFS.)e(*\))576 3493 y(method)g(add_attribute)g(:)i(string)f(-)p
+Fo(>)1607 3590 y Fq(Pxp_types.att_type)e(-)p Fo(>)531
+3687 y Fq(Pxp_types.att_default)f(-)p Fo(>)531 3784 y
+Fq(bool)j(-)p Fo(>)620 3882 y Fq(unit)665 3979 y(\(*)h(add_attribute)d
+(name)j(type)f(default)f(extdecl:)710 4076 y(*)i(add)f(an)h(attribute)e
+(declaration)g(for)h(an)h(attribute)e(with)h(the)h(given)e(name,)710
+4173 y(*)i(type,)f(and)g(default)g(value.)g(If)g(there)g(is)h(more)f
+(than)g(one)g(declaration)f(for)710 4270 y(*)i(an)f(attribute)g(name,)g
+(the)g(first)g(declara-)396 4367 y(tion)g(counts;)g(the)g(other)g
+(declarations)710 4464 y(*)h(are)f(ignored.)710 4561
+y(*)h('extdecl':)e(if)h(true,)g(the)h(attribute)e(declaration)g(occurs)
+h(in)g(an)h(external)710 4659 y(*)g(entity.)e(This)i(property)e(is)i
+(used)f(to)g(check)g(the)h("standalone")d(attribute.)710
+4756 y(*\))p Black 3797 5278 a Fr(87)p Black eop
+%%Page: 88 88
+88 87 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 576 579 a Fq(method)43
+b(validate)h(:)h(unit)665 676 y(\(*)g(checks)f(whether)f(this)h
+(element)g(declaration)f(\(i.e.)h(the)g(content)g(model)g(and)710
+773 y(*)h(all)f(attribute)f(declarations\))g(is)i(valid)f(for)g(the)g
+(associated)f(DTD.)710 870 y(*)i(Raises)f(mostly)f(Validation_error)g
+(if)h(the)g(validation)g(fails.)710 967 y(*\))576 1162
+y(method)f(write)h(:)h(Pxp_types.output_stream)c(-)p
+Fo(>)j Fq(Pxp_types.encoding)e(-)p Fo(>)j Fq(unit)665
+1259 y(\(*)g(write_compact_as_latin1)c(os)j(enc:)710
+1356 y(*)h(Writes)f(the)g Fo(<)p Fq(!ELEMENT)f(...)h
+Fo(>)h Fq(declaration)e(to)h('os')h(as)f('enc'-)396 1453
+y(encoded)g(string.)710 1550 y(*\))576 1745 y(method)f
+(write_compact_as_latin1)e(:)k(Pxp_types.output_stream)c(-)p
+Fo(>)j Fq(unit)665 1842 y(\(*)h(DEPRECATED)e(METHOD;)h(included)f(only)
+h(to)h(keep)f(compatibility)f(with)710 1939 y(*)i(older)f(versions)f
+(of)i(the)f(parser)710 2036 y(*\))486 2133 y(end)396
+2327 y(\(*)h(--------------------------------------)o(------)o(---)39
+b(*\))396 2522 y(and)45 b(dtd_notation)d(:)j(string)f(-)p
+Fo(>)g Fq(Pxp_types.ext_id)e(-)p Fo(>)j Fq(Pxp_types.rep_encoding)c(-)p
+Fo(>)486 2619 y Fq(\(*)j(Creation:)531 2716 y(*)179 b(new)44
+b(dtd_notation)f(a_name)h(an_external_ID)e(init_encoding)531
+2813 y(*)i(creates)g(a)h(new)f(dtd_notation)f(object)h(with)g(the)g
+(given)g(name)g(and)h(the)f(given)531 2910 y(*)g(external)g(ID.)531
+3007 y(*\))486 3104 y(object)576 3202 y(method)f(name)i(:)f(string)576
+3299 y(method)f(ext_id)h(:)h(Pxp_types.ext_id)576 3396
+y(method)e(encoding)h(:)h(Pxp_types.rep_encoding)576
+3590 y(method)e(write)h(:)h(Pxp_types.output_stream)c(-)p
+Fo(>)j Fq(Pxp_types.encoding)e(-)p Fo(>)j Fq(unit)665
+3687 y(\(*)g(write_compact_as_latin1)c(os)j(enc:)710
+3784 y(*)h(Writes)f(the)g Fo(<)p Fq(!NOTATION)f(...)h
+Fo(>)h Fq(declaration)e(to)h('os')g(as)h('enc'-encoded)710
+3882 y(*)g(string.)710 3979 y(*\))576 4173 y(method)e
+(write_compact_as_latin1)e(:)k(Pxp_types.output_stream)c(-)p
+Fo(>)j Fq(unit)665 4270 y(\(*)h(DEPRECATED)e(METHOD;)h(included)f(only)
+h(to)h(keep)f(compatibility)f(with)710 4367 y(*)i(older)f(versions)f
+(of)i(the)f(parser)710 4464 y(*\))486 4659 y(end)396
+4853 y(\(*)h(--------------------------------------)o(------)o(---)39
+b(*\))p Black 3800 5278 a Fr(88)p Black eop
+%%Page: 89 89
+89 88 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 676 a Fq(and)45
+b(proc_instruction)d(:)i(string)g(-)p Fo(>)h Fq(string)e(-)p
+Fo(>)i Fq(Pxp_types.rep_encoding)c(-)p Fo(>)486 773 y
+Fq(\(*)j(Creation:)531 870 y(*)134 b(new)44 b(proc_instruction)f
+(a_target)g(a_value)531 967 y(*)h(creates)g(a)h(new)f(proc_instruction)
+e(object)i(with)g(the)h(given)f(target)f(string)h(and)531
+1065 y(*)g(the)h(given)f(value)g(string.)531 1162 y(*)g(Note:)g(A)h
+(processing)e(instruction)g(is)i(written)e(as)i Fo(<)p
+Fq(?target)e(value?)p Fo(>)p Fq(.)531 1259 y(*\))486
+1356 y(object)576 1453 y(method)g(target)h(:)h(string)576
+1550 y(method)e(value)h(:)h(string)576 1647 y(method)e(encoding)h(:)h
+(Pxp_types.rep_encoding)576 1842 y(method)e(write)h(:)h
+(Pxp_types.output_stream)c(-)p Fo(>)j Fq(Pxp_types.encoding)e(-)p
+Fo(>)j Fq(unit)665 1939 y(\(*)g(write)f(os)g(enc:)710
+2036 y(*)h(Writes)f(the)g Fo(<)p Fq(?...?)p Fo(>)f Fq(PI)i(to)f('os')h
+(as)f('enc'-encoded)f(string.)710 2133 y(*\))576 2327
+y(method)g(write_compact_as_latin1)e(:)k(Pxp_types.output_stream)c(-)p
+Fo(>)j Fq(unit)665 2424 y(\(*)h(DEPRECATED)e(METHOD;)h(included)f(only)
+h(to)h(keep)f(compatibility)f(with)710 2522 y(*)i(older)f(versions)f
+(of)i(the)f(parser)710 2619 y(*\))576 2813 y(method)f(parse_pxp_option)
+g(:)h(\(string)g(*)h(string)e(*)i(\(string)f(*)g(string\))g(list\))665
+2910 y(\(*)h(Parses)f(a)g(PI)h(containing)e(a)i(PXP)f(option.)g(Such)g
+(PIs)g(are)g(formed)g(like:)710 3007 y(*)134 b Fo(<)p
+Fq(?target)44 b(option-name)f(option-att="value")f(option-att="value")f
+(...)k(?)p Fo(>)710 3104 y Fq(*)g(The)f(method)g(returns)g(a)g(triple)
+710 3202 y(*)134 b(\(target,)44 b(option-name,)f([option-att,)g(value;)
+g(...]\))710 3299 y(*)i(or)f(raises)g(Error.)710 3396
+y(*\))486 3590 y(end)396 3784 y(;;)-2 4286 y Fx(4.4.)39
+b(In)-6 b(v)l(oking)38 b(the)h(par)n(ser)396 4466 y Fv(Here)20
+b(a)h(description)e(of)h(Pxp_yacc.)-2 4794 y Fp(4.4.1.)35
+b(Defaults)p Black 3800 5278 a Fr(89)p Black eop
+%%Page: 90 90
+90 89 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(The)g(follo)n(wing)f
+(def)o(aults)g(are)i(a)n(v)n(ailable:)396 759 y Fq(val)45
+b(default_config)d(:)j(config)396 856 y(val)g(default_extension)d(:)i
+(\('a)h(node)f(extension\))f(as)h('a)396 953 y(val)h(default_spec)d(:)j
+(\('a)f(node)h(extension)e(as)h('a\))h(spec)-2 1406 y
+Fp(4.4.2.)35 b(P)l(ar)n(sing)f(functions)396 1574 y Fv(In)20
+b(the)g(follo)n(wing,)f(the)h(term)g("closed)g(document")e(refers)h(to)
+i(an)f(XML)g(structure)f(lik)o(e)396 1754 y Fo(<)p Fq(!DOCTYPE)43
+b(...)i([)f Fn(declarations)f Fq(])i Fo(>)396 1851 y(<)p
+Fn(root)p Fo(>)396 1948 y Fq(...)396 2045 y Fo(<)p Fq(/)p
+Fn(root)p Fo(>)396 2236 y Fv(The)20 b(term)g("fragment")e(refers)i(to)g
+(an)g(XML)h(structure)e(lik)o(e)396 2416 y Fo(<)p Fn(root)p
+Fo(>)396 2513 y Fq(...)396 2611 y Fo(<)p Fq(/)p Fn(root)p
+Fo(>)396 2802 y Fv(i.e.)h(only)g(to)g(one)g(isolated)g(element)f
+(instance.)396 3023 y Fq(val)45 b(parse_dtd_entity)d(:)i(config)g(->)h
+(source)f(->)g(dtd)396 3214 y Fv(P)o(arses)21 b(the)f(declarations)f
+(which)h(are)g(contained)e(in)j(the)f(entity)-5 b(,)19
+b(and)h(returns)f(them)h(as)h Fq(dtd)f Fv(object.)396
+3436 y Fq(val)45 b(extract_dtd_from_document_entity)39
+b(:)45 b(config)f(->)g(source)g(->)g(dtd)396 3627 y Fv(Extracts)20
+b(the)g(DTD)h(from)e(a)h(closed)g(document.)e(Both)i(the)h(internal)e
+(and)h(the)g(e)o(xternal)f(subsets)h(are)h(e)o(xtracted)d(and)396
+3735 y(combined)g(to)i(one)f Fq(dtd)h Fv(object.)f(This)h(function)e
+(does)h(not)h(parse)f(the)h(whole)f(document,)f(b)n(ut)i(only)e(the)i
+(parts)g(that)g(are)396 3843 y(necessary)g(to)g(e)o(xtract)f(the)i
+(DTD.)396 4064 y Fq(val)45 b(parse_document_entity)c(:)576
+4161 y(?transform_dtd:\(dtd)g(->)k(dtd\))f(->)576 4259
+y(?id_index:\('ext)e(index\))i(->)576 4356 y(config)f(->)576
+4453 y(source)g(->)576 4550 y('ext)h(spec)g(->)755 4647
+y('ext)g(document)p Black 3800 5278 a Fr(90)p Black eop
+%%Page: 91 91
+91 90 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(P)o(arses)h(a)g(closed)
+e(document)g(and)g(v)n(alidates)h(it)h(against)e(the)i(DTD)f(that)g(is)
+h(contained)e(in)h(the)h(document)d(\(internal)396 687
+y(and)i(e)o(xternal)f(subsets\).)h(The)g(option)f Fq(~transform_dtd)f
+Fv(can)i(be)g(used)g(to)g(transform)f(the)h(DTD)h(in)f(the)g(document,)
+396 795 y(and)g(to)g(use)h(the)f(transformed)e(DTD)i(for)g(v)n
+(alidation.)e(If)i Fq(~id_index)g Fv(is)h(speci\002ed,)e(an)h(inde)o(x)
+f(of)h(all)h(ID)f(attrib)n(utes)h(is)396 903 y(created.)396
+1124 y Fq(val)45 b(parse_wfdocument_entity)c(:)576 1222
+y(config)i(->)576 1319 y(source)g(->)576 1416 y('ext)h(spec)g(->)755
+1513 y('ext)g(document)396 1704 y Fv(P)o(arses)21 b(a)g(closed)e
+(document,)f(b)n(ut)j(checks)e(it)i(only)e(on)h(well-formedness.)396
+1926 y Fq(val)45 b(parse_content_entity)86 b(:)576 2023
+y(?id_index:\('ext)42 b(index\))i(->)576 2120 y(config)f(->)576
+2217 y(source)g(->)576 2314 y(dtd)h(->)576 2411 y('ext)g(spec)g(->)755
+2508 y('ext)g(node)396 2699 y Fv(P)o(arses)21 b(a)g(fragment,)d(and)h
+(v)n(alidates)h(the)g(element.)396 2921 y Fq(val)45 b
+(parse_wfcontent_entity)c(:)576 3018 y(config)i(->)576
+3115 y(source)g(->)576 3212 y('ext)h(spec)g(->)755 3310
+y('ext)g(node)396 3500 y Fv(P)o(arses)21 b(a)g(fragment,)d(b)n(ut)i
+(checks)g(it)g(only)g(on)g(well-formedness.)-2 3870 y
+Fp(4.4.3.)35 b(Con\002guration)f(options)396 4110 y Fq(type)44
+b(config)g(=)576 4207 y({)g(warner)g(:)h(collect_warnings;)665
+4304 y(errors_with_line_numbers)c(:)k(bool;)665 4401
+y(enable_pinstr_nodes)d(:)j(bool;)665 4499 y(enable_super_root_node)c
+(:)k(bool;)665 4596 y(enable_comment_nodes)d(:)i(bool;)665
+4693 y(encoding)g(:)g(rep_encoding;)665 4790 y
+(recognize_standalone_declaration)c(:)k(bool;)p Black
+3800 5278 a Fr(91)p Black eop
+%%Page: 92 92
+92 91 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 665 579 a Fq
+(store_element_positions)41 b(:)k(bool;)665 676 y(idref_pass)e(:)i
+(bool;)665 773 y(validate_by_dfa)e(:)h(bool;)665 870
+y(accept_only_deterministic_models)c(:)k(bool;)665 967
+y(...)576 1065 y(})p Black 396 1422 a Ft(\225)p Black
+60 w Fq(warner:)p Fv(The)19 b(parser)h(prints)f(w)o(arnings)h(by)f(in)m
+(v)n(oking)f(the)j(method)d Fq(warn)j Fv(for)e(this)i(w)o(arner)e
+(object.)h(\(Def)o(ault:)f(all)479 1530 y(w)o(arnings)h(are)g
+(dropped\))p Black 396 1637 a Ft(\225)p Black 60 w Fq
+(errors_with_line_numbers:)p Fv(If)c(true,)k(errors)f(contain)g(line)i
+(numbers;)d(if)j(f)o(alse,)f(errors)g(contain)f(only)g(byte)479
+1745 y(positions.)h(The)g(latter)g(mode)f(is)i(f)o(aster)-5
+b(.)21 b(\(Def)o(ault:)e(true\))p Black 396 1853 a Ft(\225)p
+Black 60 w Fq(enable_pinstr_nodes:)p Fv(If)e(true,)j(the)g(parser)f
+(creates)i(e)o(xtra)e(nodes)g(for)h(processing)f(instructions.)g(If)h
+(f)o(alse,)479 1961 y(processing)f(instructions)g(are)h(simply)g(added)
+f(to)i(the)f(element)f(or)h(document)f(surrounding)e(the)j
+(instructions.)479 2069 y(\(Def)o(ault:)g(f)o(alse\))p
+Black 396 2177 a Ft(\225)p Black 60 w Fq(enable_super_root_node:)p
+Fv(If)c(true,)k(the)g(parser)g(creates)g(an)g(e)o(xtra)g(node)f(which)g
+(is)j(the)e(parent)f(of)h(the)g(root)479 2285 y(of)g(the)g(document)f
+(tree.)h(This)g(node)f(is)i(called)f(super)g(root;)f(it)i(is)g(an)g
+(element)e(with)i(type)e Fq(T_super_root)p Fv(.)g(-)h(If)479
+2393 y(there)g(are)g(processing)f(instructions)g(outside)h(the)g(root)f
+(element)h(and)g(outside)f(the)i(DTD,)f(the)o(y)f(are)h(added)f(to)i
+(the)479 2501 y(super)f(root)f(instead)h(of)g(the)g(document.)e(-)j(If)
+f(f)o(alse,)g(the)g(super)g(root)g(node)f(is)i(not)f(created.)f(\(Def)o
+(ault:)h(f)o(alse\))p Black 396 2609 a Ft(\225)p Black
+60 w Fq(enable_comment_nodes:)p Fv(If)d(true,)i(the)i(parser)e(creates)
+h(nodes)g(for)f(comments)g(with)i(type)f Fq(T_comment)p
+Fv(;)f(if)479 2717 y(f)o(alse,)i(such)f(nodes)f(are)h(not)g(created.)f
+(\(Def)o(ault:)h(f)o(alse\))p Black 396 2825 a Ft(\225)p
+Black 60 w Fq(encoding:)p Fv(Speci\002es)f(the)i(internal)e(encoding)f
+(of)i(the)g(parser)-5 b(.)20 b(Most)g(strings)h(are)f(then)f
+(represented)g(according)479 2933 y(to)i(this)f(encoding;)f(ho)n(we)n
+(v)o(er)f(there)h(are)i(some)f(e)o(xceptions)e(\(especially)i
+Fq(ext_id)f Fv(v)n(alues)h(which)g(are)g(al)o(w)o(ays)479
+3041 y(UTF-8)g(encoded\).)e(\(Def)o(ault:)h(`Enc_iso88591\))p
+Black 396 3148 a Ft(\225)p Black 60 w Fq
+(recognize_standalone_declaration:)c Fv(If)21 b(true)e(and)h(if)h(the)f
+(parser)f(is)i(v)n(alidating,)e(the)479 3256 y Fq(standalone="yes")f
+Fv(declaration)h(forces)h(that)g(it)h(is)g(check)o(ed)e(whether)g(the)h
+(document)e(is)j(a)g(standalone)479 3364 y(document.)d(-)j(If)f(f)o
+(alse,)g(or)g(if)g(the)h(parser)e(is)i(in)g(well-formedness)d(mode,)h
+(such)h(declarations)f(are)h(ignored.)479 3472 y(\(Def)o(ault:)g
+(true\))p Black 396 3580 a Ft(\225)p Black 60 w Fq
+(store_element_positions:)d Fv(If)j(true,)g(for)f(e)n(v)o(ery)g
+(non-data)f(node)h(the)i(source)e(position)g(is)j(stored.)d(If)h(f)o
+(alse,)479 3688 y(the)g(position)g(information)e(is)j(lost.)f(If)g(a)n
+(v)n(ailable,)g(you)f(can)h(get)g(the)g(positions)g(of)g(nodes)f(by)h
+(in)m(v)n(oking)e(the)479 3796 y Fq(position)i Fv(method.)e(\(Def)o
+(ault:)i(true\))p Black 396 3904 a Ft(\225)p Black 60
+w Fq(idref_pass:)p Fv(If)e(true)i(and)g(if)g(there)g(is)h(an)f(ID)h
+(inde)o(x,)e(the)h(parser)f(checks)h(whether)f(e)n(v)o(ery)g(IDREF)i
+(or)e(IDREFS)479 4012 y(attrib)n(ute)h(refer)g(to)g(an)g(e)o(xisting)f
+(node;)h(this)g(requires)g(that)g(the)g(parser)g(tra)n(v)o(erses)g(the)
+g(whole)f(doument)g(tree.)h(If)479 4120 y(f)o(alse,)h(this)f(check)g
+(is)h(left)f(out.)g(\(Def)o(ault:)g(f)o(alse\))p Black
+396 4228 a Ft(\225)p Black 60 w Fq(validate_by_dfa:)p
+Fv(If)e(true)h(and)h(if)h(the)f(content)f(model)g(for)h(an)g(element)g
+(type)f(is)i(deterministic,)e(a)479 4336 y(deterministic)h(\002nite)g
+(automaton)e(is)j(used)f(to)h(v)n(alidate)e(whether)g(the)i(element)e
+(contents)h(match)f(the)i(content)479 4444 y(model)e(of)h(the)g(type.)g
+(If)g(f)o(alse,)g(or)g(if)g(a)g(DF)-6 b(A)21 b(is)g(not)f(a)n(v)n
+(ailable,)f(a)h(backtracking)e(algorithm)g(is)j(used)f(for)f(v)n
+(alidation.)479 4552 y(\(Def)o(ault:)h(true\))p Black
+396 4659 a Ft(\225)p Black 60 w Fq(accept_only_deterministic_models:)15
+b Fv(If)21 b(true,)e(only)h(deterministic)f(content)g(models)h(are)g
+(accepted;)f(if)479 4767 y(f)o(alse,)i(an)o(y)e(syntactically)h
+(correct)f(content)g(models)h(can)g(be)g(processed.)f(\(Def)o(ault:)g
+(true\))p Black 3800 5278 a Fr(92)p Black eop
+%%Page: 93 93
+93 92 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black -2 583 a Fp(4.4.4.)35
+b(Whic)o(h)f(con\002guration)g(should)g(I)f(use?)396
+751 y Fv(First,)21 b(I)f(recommend)e(to)i(v)n(ary)g(the)g(def)o(ault)f
+(con\002guration)f(instead)i(of)g(creating)f(a)i(ne)n(w)f
+(con\002guration)d(record.)i(F)o(or)396 859 y(instance,)h(to)g(set)h
+Fq(idref_pass)e Fv(to)i Fq(true)p Fv(,)e(change)g(the)i(def)o(ault)e
+(as)i(in:)396 1039 y Fq(let)45 b(config)e(=)i({)g(default_config)d
+(with)i(idref_pass)g(=)g(true)g(})396 1230 y Fv(The)20
+b(background)d(is)k(that)f(I)h(can)f(add)f(more)h(options)f(to)h(the)g
+(record)f(in)i(future)e(v)o(ersions)g(of)h(the)g(parser)f(without)396
+1338 y(breaking)g(your)f(programs.)396 1487 y Fu(Do)i(I)i(need)e(extra)
+f(nodes)i(f)n(or)f(pr)o(ocessing)g(instructions?)g Fv(By)g(def)o(ault,)
+g(such)g(nodes)f(are)h(not)g(created.)f(This)i(does)396
+1595 y(not)f(mean)g(that)g(the)g(processing)f(instructions)g(are)h
+(lost;)h(ho)n(we)n(v)o(er)m(,)d(you)h(cannot)g(\002nd)h(out)g(the)g(e)o
+(xact)g(location)f(where)396 1703 y(the)o(y)h(occur)-5
+b(.)19 b(F)o(or)h(e)o(xample,)e(the)j(follo)n(wing)d(XML)i(te)o(xt)396
+1883 y Fq( )396 2074 y Fv(will)h(normally)e
+(create)h(one)f(element)h(node)f(for)h Fq(x)g Fv(containing)e
+Fr(one)i Fv(subnode)f(for)g Fq(y)p Fv(.)h(The)g(processing)f
+(instructions)396 2182 y(are)h(attached)g(to)g Fq(x)h
+Fv(in)f(a)h(separate)e(hash)h(table;)h(you)e(can)h(access)h(them)e
+(using)h Fq(x)45 b(#)f(pinstr)g("pi1")20 b Fv(and)g Fq(x)44
+b(#)396 2290 y(pinstr)g("pi2")p Fv(,)20 b(respecti)n(v)o(ely)-5
+b(.)18 b(The)i(information)d(is)k(lost)g(where)f(the)g(instructions)f
+(occur)g(within)h Fq(x)p Fv(.)396 2439 y(If)g(the)h(option)d
+Fq(enable_pinstr_nodes)g Fv(is)j(turned)e(on,)h(the)g(parser)f(creates)
+i(e)o(xtra)e(nodes)g Fq(pi1)i Fv(and)e Fq(pi2)i Fv(such)f(that)396
+2547 y(the)g(subnodes)f(of)h Fq(x)h Fv(are)f(no)n(w:)396
+2728 y Fq(x)45 b(#)g(sub_nodes)e(=)i([)f(pi1;)g(y;)h(pi2)f(])396
+2919 y Fv(The)20 b(e)o(xtra)g(nodes)f(contain)g(the)h(processing)f
+(instructions)g(in)i(the)f(usual)g(w)o(ay)-5 b(,)20 b(i.e.)g(you)f(can)
+h(access)h(them)f(using)f Fq(pi1)396 3026 y(#)45 b(pinstr)f("pi1")20
+b Fv(and)f Fq(pi2)45 b(#)f(pinstr)g("pi2")p Fv(,)20 b(respecti)n(v)o
+(ely)-5 b(.)396 3176 y(Note)20 b(that)h(you)e(will)i(need)e(an)i(e)o(x)
+o(emplar)d(for)h(the)i(PI)f(nodes)g(\(see)g Fq(make_spec_from_alist)p
+Fv(\).)396 3325 y Fu(Do)g(I)i(need)e(a)h(super)g(r)o(oot)d(node?)i
+Fv(By)h(def)o(ault,)e(there)h(is)h(no)f(super)f(root)h(node.)f(The)h
+Fq(document)f Fv(object)h(refers)396 3433 y(directly)g(to)g(the)g(node)
+f(representing)f(the)j(root)e(element)h(of)g(the)g(document,)e(i.e.)396
+3613 y Fq(doc)45 b(#)f(root)g(=)h(r)396 3804 y Fv(if)21
+b Fq(r)f Fv(is)h(the)g(root)e(node.)g(This)h(is)i(sometimes)d(incon)m
+(v)o(enient:)f(\(1\))h(Some)h(algorithms)f(become)g(simpler)h(if)g(e)n
+(v)o(ery)f(node)396 3912 y(has)i(a)f(parent,)f(e)n(v)o(en)g(the)i(root)
+e(node.)g(\(2\))h(Some)g(standards)f(such)h(as)h(XP)o(ath)f(call)g(the)
+h("root)e(node")g(the)h(node)f(whose)396 4020 y(child)h(represents)f
+(the)i(root)e(of)h(the)g(document.)e(\(3\))i(The)g(super)f(root)h(node)
+f(can)h(serv)o(e)f(as)i(a)g(container)e(for)g(processing)396
+4128 y(instructions)g(outside)h(the)g(root)g(element.)f(Because)i(of)e
+(these)i(reasons,)e(it)i(is)g(possible)f(to)h(create)f(an)g(e)o(xtra)f
+(super)h(root)396 4236 y(node,)f(whose)h(child)g(is)h(the)f(root)g
+(node:)396 4416 y Fq(doc)45 b(#)f(root)g(=)h(sr)403 b(&&)396
+4513 y(sr)45 b(#)f(sub_nodes)g(=)g([)h(r)g(])396 4704
+y Fv(When)20 b(e)o(xtra)g(nodes)f(are)h(also)h(created)e(for)h
+(processing)f(instructions,)g(these)h(nodes)f(can)h(be)h(added)e(to)h
+(the)g(super)g(root)396 4812 y(node)f(if)h(the)o(y)e(occur)h(outside)g
+(the)g(root)g(element)g(\(reason)f(\(3\)\),)h(and)g(the)g(order)g
+(re\003ects)g(the)h(order)e(in)i(the)f(source)g(te)o(xt.)p
+Black 3800 5278 a Fr(93)p Black eop
+%%Page: 94 94
+94 93 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(Note)g(that)h(you)e
+(will)i(need)e(an)i(e)o(x)o(emplar)d(for)h(the)i(super)e(root)h(node)f
+(\(see)h Fq(make_spec_from_alist)p Fv(\).)396 728 y Fu(What)g(is)h(the)
+g(effect)e(of)h(the)h(UTF-8)e(encoding?)h Fv(By)h(def)o(ault,)e(the)h
+(parser)g(represents)f(strings)h(\(with)g(fe)n(w)396
+836 y(e)o(xceptions\))e(as)j(ISO-8859-1)c(strings.)i(These)h(are)g
+(well-kno)n(wn,)d(and)j(there)f(are)h(tools)g(and)f(fonts)g(for)h(this)
+g(encoding.)396 986 y(Ho)n(we)n(v)o(er)m(,)e(internationalization)g
+(may)h(require)g(that)i(you)e(switch)h(o)o(v)o(er)f(to)i(UTF-8)e
+(encoding.)f(In)i(most)396 1094 y(en)m(vironments,)d(the)k(immediate)e
+(ef)n(fect)h(will)g(be)h(that)f(you)f(cannot)g(read)h(strings)g(with)g
+(character)f(codes)h(>=)h(160)e(an)o(y)396 1202 y(longer;)g(your)g
+(terminal)h(will)h(only)e(sho)n(w)h(funn)o(y)e(glyph)h(combinations.)f
+(It)i(is)h(strongly)e(recommended)e(to)k(install)396
+1310 y(Unicode)e(fonts)h(\(GNU)g(Unifont)f
+(\(http://czyborra.com/unifon)o(t/\),)c(Markus)k(K)o(uhn')-5
+b(s)19 b(fonts)396 1417 y(\(http://www)-5 b(.cl.cam.ac.uk/~mgk25)o(/do)
+m(wnlo)o(ad/u)o(cs-fo)o(nts.tar)g(.g)o(z\)\))14 b(and)20
+b(terminal)f(emulators)h(that)g(can)g(handle)396 1525
+y(UTF-8)g(byte)g(sequences)f(\(http://myweb)m(.clark.net/pub/d)o(ick)o
+(e)o(y)o(/xter)o(m/x)o(term.)o(html\))o(.)c(Furthermore,)i(a)k(Unicode)
+396 1633 y(editor)f(may)f(be)i(helpful)e(\(such)g(as)i(Y)-9
+b(udit)20 b(\(ftp://metalab)m(.unc.edu/pub)o(/Linu)o(x/ap)o(ps/ed)o
+(itors/X/\)\))o(.)15 b(There)k(are)h(also)396 1741 y(F)-6
+b(A)h(Q)21 b(\(http://www)-5 b(.cl.cam.ac.uk/~mgk25)o(/unico)o(de)o
+(.htm)o(l\))15 b(by)20 b(Markus)f(K)o(uhn.)396 1891 y(By)i(setting)f
+Fq(encoding)f Fv(to)i Fq(`Enc_utf8)e Fv(all)i(strings)f(originating)e
+(from)h(the)i(parsed)e(XML)h(document)e(are)396 1999
+y(represented)h(as)i(UTF-8)e(strings.)h(This)h(includes)e(not)h(only)f
+(character)g(data)h(and)g(attrib)n(ute)g(v)n(alues)g(b)n(ut)g(also)g
+(element)396 2107 y(names,)g(attrib)n(ute)g(names)g(and)f(so)i(on,)e
+(as)i(it)g(is)g(possible)f(to)h(use)f(an)o(y)f(Unicode)g(letter)i(to)f
+(form)f(such)h(names.)g(Strictly)396 2214 y(speaking,)f(PXP)i(is)g
+(only)e(XML-compliant)f(if)j(the)f(UTF-8)g(mode)f(is)i(used;)f
+(otherwise)g(it)h(will)g(ha)n(v)o(e)e(dif)n(\002culties)396
+2322 y(when)h(v)n(alidating)f(documents)f(containing)g
+(non-ISO-8859-1-names.)396 2472 y(This)j(mode)e(does)h(not)g(ha)n(v)o
+(e)f(an)o(y)h(impact)f(on)h(the)g(e)o(xternal)f(representation)f(of)i
+(documents.)f(The)g(character)g(set)396 2580 y(assumed)h(when)g
+(reading)e(a)j(document)d(is)j(set)g(in)g(the)f(XML)g(declaration,)e
+(and)i(character)f(set)i(when)e(writing)h(a)396 2688
+y(document)e(must)j(be)f(passed)g(to)g(the)g Fq(write)g
+Fv(method.)396 2837 y Fu(Ho)o(w)g(do)h(I)g(check)f(that)g(nodes)h
+(exist)f(which)h(ar)o(e)e(r)o(eferr)o(ed)g(by)i(IDREF)g(attrib)n(utes?)
+e Fv(First,)i(you)e(must)h(create)g(an)396 2945 y(inde)o(x)f(of)h(all)h
+(occurring)d(ID)i(attrib)n(utes:)396 3125 y Fq(let)45
+b(index)f(=)g(new)h(hash_index)396 3316 y Fv(This)21
+b(inde)o(x)e(must)h(be)g(passed)g(to)g(the)h(parsing)e(function:)396
+3496 y Fq(parse_document_entity)486 3593 y(~id_index:\(index)42
+b(:>)j(index\))486 3691 y(config)f(source)g(spec)396
+3882 y Fv(Ne)o(xt,)20 b(you)f(must)h(turn)g(on)g(the)g
+Fq(idref_pass)f Fv(mode:)396 4062 y Fq(let)45 b(config)e(=)i({)g
+(default_config)d(with)i(idref_pass)g(=)g(true)g(})396
+4253 y Fv(Note)20 b(that)h(no)n(w)e(the)i(whole)e(document)f(tree)j
+(will)g(be)f(tra)n(v)o(ersed,)f(and)g(e)n(v)o(ery)g(node)g(will)i(be)f
+(check)o(ed)f(for)h(IDREF)g(and)396 4361 y(IDREFS)h(attrib)n(utes.)f
+(If)g(the)g(tree)g(is)h(big,)f(this)h(may)f(tak)o(e)g(some)g(time.)396
+4510 y Fu(What)g(ar)o(e)g(deterministic)g(content)g(models?)g
+Fv(These)g(type)g(of)g(models)g(can)g(speed)f(up)h(the)g(v)n(alidation)
+f(checks;)396 4618 y(furthermore)f(the)o(y)h(ensure)g
+(SGML-compatibility)-5 b(.)18 b(In)i(particular)m(,)e(a)j(content)e
+(model)g(is)i(deterministic)e(if)i(the)f(parser)396 4726
+y(can)g(determine)f(the)h(actually)g(used)g(alternati)n(v)o(e)f(by)g
+(inspecting)g(only)h(the)g(current)f(tok)o(en.)g(F)o(or)h(e)o(xample,)e
+(this)396 4834 y(element)i(has)g(non-deterministic)e(contents:)p
+Black 3800 5278 a Fr(94)p Black eop
+%%Page: 95 95
+95 94 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fq()396 770 y Fv(If)20
+b(the)h(\002rst)f(element)g(in)g Fq(x)h Fv(is)g Fq(u)p
+Fv(,)f(the)h(parser)e(does)h(not)g(kno)n(w)f(which)h(of)g(the)g
+(alternati)n(v)o(es)f Fq(\(u,v\))h Fv(or)g Fq(\(u,y+\))g
+Fv(will)396 878 y(w)o(ork;)g(the)g(parser)g(must)g(also)g(inspect)g
+(the)h(second)e(element)g(to)i(be)f(able)g(to)g(distinguish)g(between)f
+(the)h(alternati)n(v)o(es.)396 986 y(Because)h(such)f(look-ahead)d
+(\(or)j("guessing"\))e(is)k(required,)c(this)i(e)o(xample)f(is)i
+(non-deterministic.)396 1135 y(The)f(XML)g(standard)f(demands)g(that)i
+(content)e(models)g(must)i(be)f(deterministic.)f(So)h(it)h(is)g
+(recommended)c(to)k(turn)e(the)396 1243 y(option)g Fq
+(accept_only_deterministic_models)d Fv(on;)j(ho)n(we)n(v)o(er)m(,)f
+(PXP)j(can)f(also)h(process)e(non-deterministic)396 1351
+y(models)h(using)g(a)g(backtracking)e(algorithm.)396
+1500 y(Deterministic)i(models)g(ensure)f(that)h(v)n(alidation)f(can)h
+(be)g(performed)e(in)i(linear)g(time.)g(In)g(order)f(to)h(get)g(the)396
+1608 y(maximum)f(bene\002ts,)h(PXP)h(also)f(implements)f(a)i(special)f
+(v)n(alidator)f(that)h(pro\002ts)g(from)f(deterministic)h(models;)f
+(this)396 1716 y(is)i(the)g(deterministic)e(\002nite)h(automaton)f
+(\(DF)-6 b(A\).)19 b(This)i(v)n(alidator)d(is)k(enabled)d(per)g
+(element)h(type)g(if)g(the)g(element)396 1824 y(type)g(has)g(a)h
+(deterministic)e(model)h(and)f(if)i(the)f(option)f Fq(validate_by_dfa)f
+Fv(is)j(turned)e(on.)396 1974 y(In)h(general,)f(I)h(e)o(xpect)g(that)g
+(the)g(DF)-6 b(A)21 b(method)e(is)i(f)o(aster)f(than)g(the)g
+(backtracking)e(method;)g(especially)i(in)h(the)f(w)o(orst)396
+2082 y(case)h(the)f(DF)-6 b(A)21 b(tak)o(es)f(only)g(linear)f(time.)i
+(Ho)n(we)n(v)o(er)m(,)d(if)i(the)g(content)g(model)f(has)h(only)g(fe)n
+(w)g(alternati)n(v)o(es)f(and)h(the)396 2190 y(alternati)n(v)o(es)f(do)
+h(not)g(nest,)g(the)h(backtracking)c(algorithm)i(may)g(be)i(better)-5
+b(.)-2 2691 y Fx(4.5.)39 b(Updates)396 2871 y Fr(Some)20
+b(\(often)f(later)i(added\))d(featur)m(es)i(that)g(ar)m(e)h(otherwise)f
+(not)g(e)n(xplained)f(in)h(the)h(manual)d(b)n(ut)j(worth)f(to)g(be)396
+2979 y(mentioned.)p Black 396 3211 a Ft(\225)p Black
+60 w Fv(Methods)g(node_position,)d(node_path,)g(nth_node,)h(pre)n
+(vious_node,)e(ne)o(xt_node)h(for)j(nodes:)f(See)479
+3319 y(pxp_document.mli)p Black 396 3427 a Ft(\225)p
+Black 60 w Fv(Functions)h(to)g(determine)f(the)h(document)e(order)h(of)
+h(nodes:)f(compare,)g(create_ord_inde)o(x,)c(ord_number)m(,)479
+3535 y(ord_compare:)i(See)k(pxp_document.mli)p Black
+3800 5278 a Fr(95)p Black eop
+%%Page: 96 96
+96 95 bop Black Black Black Black eop
+%%Trailer
+end
+userdict /end-hook known{end-hook}if
+%%EOF
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/dtd.mli.ent b/helm/DEVEL/pxp/pxp/doc/manual/src/dtd.mli.ent
new file mode 100644
index 000000000..f2e0eb85c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/src/dtd.mli.ent
@@ -0,0 +1,374 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/getcode.ml b/helm/DEVEL/pxp/pxp/doc/manual/src/getcode.ml
new file mode 100755
index 000000000..4db669036
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/src/getcode.ml
@@ -0,0 +1,56 @@
+#! /bin/sh
+# (*
+exec ocamlfattop "$0"
+*) directory ".";;
+
+open Str;;
+
+let name_re = regexp "(\\*\\$[ \t]*\\([a-zA-Z0-9.-]*\\)[ \t]*\\*)";;
+let subst_re = regexp "[<>&'%]";;
+
+let begin_entity name =
+ "\n"
+;;
+
+
+let text = ref "" in
+let within_entity = ref false in
+try
+ while true do
+ let line = read_line() in
+ if string_match name_re line 0 then begin
+ let name = matched_group 1 line in
+ if !within_entity then
+ text := !text ^ "\n" ^ end_entity();
+ within_entity := false;
+ if name <> "-" then begin
+ text := !text ^ begin_entity name;
+ within_entity := true
+ end
+ end
+ else
+ if !within_entity then begin
+ let line' =
+ global_substitute subst_re
+ (fun s ->
+ let s' = matched_group 0 s in
+ match s' with
+ "<" -> "<"
+ | ">" -> ">"
+ | "&" -> "&"
+ | "'" -> "'"
+ | "%" -> "&percent;"
+ | _ -> assert false)
+ line
+ in
+ text := !text ^ "\n" ^ line'
+ end
+ done;
+with End_of_file ->
+ if !within_entity then
+ text := !text ^ "\n" ^ end_entity();
+ print_string !text
+;;
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/markup.css b/helm/DEVEL/pxp/pxp/doc/manual/src/markup.css
new file mode 100644
index 000000000..67dfaecb7
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/src/markup.css
@@ -0,0 +1,4 @@
+.acronym {
+ font-weight: bold;
+ color: #c71585
+}
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/markup.dsl b/helm/DEVEL/pxp/pxp/doc/manual/src/markup.dsl
new file mode 100644
index 000000000..cd9b1e2bf
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/src/markup.dsl
@@ -0,0 +1,74 @@
+
+
+
+
+
+]]>
+
+
+]]>
+]>
+
+
+
+
+;; HTML:
+
+
+
+;; printing:
+
+
+
+;; both:
+
+(define %section-autolabel%
+ ;; Are sections enumerated?
+ #t)
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/markup.sgml b/helm/DEVEL/pxp/pxp/doc/manual/src/markup.sgml
new file mode 100644
index 000000000..1cb2064cb
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/src/markup.sgml
@@ -0,0 +1,5109 @@
+PXP">
+PXP">
+
+
+
+
+
+%readme.code.to-html;
+%get.markup-yacc.mli;
+%get.markup-dtd.mli;
+
+
+
+]>
+
+
+
+
+ The PXP user's guide
+
+
+
+
+ Gerd
+ Stolpmann
+
+
+
+ gerd@gerd-stolpmann.de
+
+
+
+
+
+
+
+ 1999, 2000 Gerd Stolpmann
+
+
+
+
+
+&markup; is a validating parser for XML-1.0 which has been
+written entirely in Objective Caml.
+
+
+ Download &markup;:
+
+The free &markup; library can be downloaded at
+
+http://www.ocaml-programming.de/packages/
+ . This user's guide is included.
+Newest releases of &markup; will be announced in
+The OCaml Link
+Database .
+
+
+
+
+
+ License
+
+This document, and the described software, "&markup;", are copyright by
+Gerd Stolpmann.
+
+
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this document and the "&markup;" software (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+
+The Software is provided ``as is'', without warranty of any kind, express
+or implied, including but not limited to the warranties of
+merchantability, fitness for a particular purpose and noninfringement.
+In no event shall Gerd Stolpmann be liable for any claim, damages or
+other liability, whether in an action of contract, tort or otherwise,
+arising from, out of or in connection with the Software or the use or
+other dealings in the software.
+
+
+
+
+
+
+
+
+
+ User's guide
+
+
+ What is XML?
+
+
+ Introduction
+
+ XML (short for Extensible Markup Language )
+generalizes the idea that text documents are typically structured in sections,
+sub-sections, paragraphs, and so on. The format of the document is not fixed
+(as, for example, in HTML), but can be declared by a so-called DTD (document
+type definition). The DTD describes only the rules how the document can be
+structured, but not how the document can be processed. For example, if you want
+to publish a book that uses XML markup, you will need a processor that converts
+the XML file into a printable format such as Postscript. On the one hand, the
+structure of XML documents is configurable; on the other hand, there is no
+longer a canonical interpretation of the elements of the document; for example
+one XML DTD might want that paragraphes are delimited by
+para tags, and another DTD expects p tags
+for the same purpose. As a result, for every DTD a new processor is required.
+
+
+
+Although XML can be used to express structured text documents it is not limited
+to this kind of application. For example, XML can also be used to exchange
+structured data over a network, or to simply store structured data in
+files. Note that XML documents cannot contain arbitrary binary data because
+some characters are forbidden; for some applications you need to encode binary
+data as text (e.g. the base 64 encoding).
+
+
+
+
+ The "hello world" example
+
+The following example shows a very simple DTD, and a corresponding document
+instance. The document is structured such that it consists of sections, and
+that sections consist of paragraphs, and that paragraphs contain plain text:
+
+
+
+
+
+
+]]>
+
+
+ The following document is an instance of this DTD:
+
+
+
+
+
+
+ This is a paragraph of the first section.
+ This is another paragraph of the first section.
+
+
+ This is the only paragraph of the second section.
+
+
+]]>
+
+
+ As in HTML (and, of course, in grand-father SGML), the "pieces" of
+the document are delimited by element braces, i.e. such a piece begins with
+<name-of-the-type-of-the-piece> and ends with
+</name-of-the-type-of-the-piece> , and the pieces are
+called elements . Unlike HTML and SGML, both start tags and
+end tags (i.e. the delimiters written in angle brackets) can never be left
+out. For example, HTML calls the paragraphs simply p , and
+because paragraphs never contain paragraphs, a sequence of several paragraphs
+can be written as:
+
+First paragraph
+Second paragraph]]>
+
+This is not possible in XML; continuing our example above we must always write
+
+First paragraph
+Second paragraph ]]>
+
+The rationale behind that is to (1) simplify the development of XML parsers
+(you need not convert the DTD into a deterministic finite automaton which is
+required to detect omitted tags), and to (2) make it possible to parse the
+document independent of whether the DTD is known or not.
+
+
+
+The first line of our sample document,
+
+
+]]>
+
+
+is the so-called XML declaration . It expresses that the
+document follows the conventions of XML version 1.0, and that the document is
+encoded using characters from the ISO-8859-1 character set (often known as
+"Latin 1", mostly used in Western Europe). Although the XML declaration is not
+mandatory, it is good style to include it; everybody sees at the first glance
+that the document uses XML markup and not the similar-looking HTML and SGML
+markup languages. If you omit the XML declaration, the parser will assume
+that the document is encoded as UTF-8 or UTF-16 (there is a rule that makes
+it possible to distinguish between UTF-8 and UTF-16 automatically); these
+are encodings of Unicode's universal character set. (Note that &pxp;, unlike its
+predecessor "Markup", fully supports Unicode.)
+
+
+
+The second line,
+
+
+]]>
+
+
+names the DTD that is going to be used for the rest of the document. In
+general, it is possible that the DTD consists of two parts, the so-called
+external and the internal subset. "External" means that the DTD exists as a
+second file; "internal" means that the DTD is included in the same file. In
+this example, there is only an external subset, and the system identifier
+"simple.dtd" specifies where the DTD file can be found. System identifiers are
+interpreted as URLs; for instance this would be legal:
+
+
+]]>
+
+
+Please note that &pxp; cannot interpret HTTP identifiers by default, but it is
+possible to change the interpretation of system identifiers.
+
+
+
+The word immediately following DOCTYPE determines which of
+the declared element types (here "document", "section", and "paragraph") is
+used for the outermost element, the root element . In this
+example it is document because the outermost element is
+delimited by <document> and
+</document> .
+
+
+
+The DTD consists of three declarations for element types:
+document , section , and
+paragraph . Such a declaration has two parts:
+
+
+<!ELEMENT name content-model >
+
+
+The content model is a regular expression which describes the possible inner
+structure of the element. Here, document contains one or
+more sections, and a section contains one or more
+paragraphs. Note that these two element types are not allowed to contain
+arbitrary text. Only the paragraph element type is declared
+such that parsed character data (indicated by the symbol
+#PCDATA ) is permitted.
+
+
+
+See below for a detailed discussion of content models.
+
+
+
+
+ XML parsers and processors
+
+XML documents are human-readable, but this is not the main purpose of this
+language. XML has been designed such that documents can be read by a program
+called an XML parser . The parser checks that the document
+is well-formatted, and it represents the document as objects of the programming
+language. There are two aspects when checking the document: First, the document
+must follow some basic syntactic rules, such as that tags are written in angle
+brackets, that for every start tag there must be a corresponding end tag and so
+on. A document respecting these rules is
+well-formed . Second, the document must match the DTD in
+which case the document is valid . Many parsers check only
+on well-formedness and ignore the DTD; &pxp; is designed such that it can
+even validate the document.
+
+
+
+A parser does not make a sensible application, it only reads XML
+documents. The whole application working with XML-formatted data is called an
+XML processor . Often XML processors convert documents into
+another format, such as HTML or Postscript. Sometimes processors extract data
+of the documents and output the processed data again XML-formatted. The parser
+can help the application processing the document; for example it can provide
+means to access the document in a specific manner. &pxp; supports an
+object-oriented access layer specially.
+
+
+
+
+ Discussion
+
+As we have seen, there are two levels of description: On the one hand, XML can
+define rules about the format of a document (the DTD), on the other hand, XML
+expresses structured documents. There are a number of possible applications:
+
+
+
+
+
+XML can be used to express structured texts. Unlike HTML, there is no canonical
+interpretation; one would have to write a backend for the DTD that translates
+the structured texts into a format that existing browsers, printers
+etc. understand. The advantage of a self-defined document format is that it is
+possible to design the format in a more problem-oriented way. For example, if
+the task is to extract reports from a database, one can use a DTD that reflects
+the structure of the report or the database. A possible approach would be to
+have an element type for every database table and for every column. Once the
+DTD has been designed, the report procedure can be splitted up in a part that
+selects the database rows and outputs them as an XML document according to the
+DTD, and in a part that translates the document into other formats. Of course,
+the latter part can be solved in a generic way, e.g. there may be configurable
+backends for all DTDs that follow the approach and have element types for
+tables and columns.
+
+
+
+XML plays the role of a configurable intermediate format. The database
+extraction function can be written without having to know the details of
+typesetting; the backends can be written without having to know the details of
+the database.
+
+
+
+Of course, there are traditional solutions. One can define an ad hoc
+intermediate text file format. This disadvantage is that there are no names for
+the pieces of the format, and that such formats usually lack of documentation
+because of this. Another solution would be to have a binary representation,
+either as language-dependent or language-independent structure (example of the
+latter can be found in RPC implementations). The disadvantage is that it is
+harder to view such representations, one has to write pretty printers for this
+purpose. It is also more difficult to enter test data; XML is plain text that
+can be written using an arbitrary editor (Emacs has even a good XML mode,
+PSGML). All these alternatives suffer from a missing structure checker,
+i.e. the programs processing these formats usually do not check the input file
+or input object in detail; XML parsers check the syntax of the input (the
+so-called well-formedness check), and the advanced parsers like &markup; even
+verify that the structure matches the DTD (the so-called validation).
+
+
+
+
+
+
+XML can be used as configurable communication language. A fundamental problem
+of every communication is that sender and receiver must follow the same
+conventions about the language. For data exchange, the question is usually
+which data records and fields are available, how they are syntactically
+composed, and which values are possible for the various fields. Similar
+questions arise for text document exchange. XML does not answer these problems
+completely, but it reduces the number of ambiguities for such conventions: The
+outlines of the syntax are specified by the DTD (but not necessarily the
+details), and XML introduces canonical names for the components of documents
+such that it is simpler to describe the rest of the syntax and the semantics
+informally.
+
+
+
+
+
+XML is a data storage format. Currently, every software product tends to use
+its own way to store data; commercial software often does not describe such
+formats, and it is a pain to integrate such software into a bigger project.
+XML can help to improve this situation when several applications share the same
+syntax of data files. DTDs are then neutral instances that check the format of
+data files independent of applications.
+
+
+
+
+
+
+
+
+
+
+
+
+ Highlights of XML
+
+
+This section explains many of the features of XML, but not all, and some
+features not in detail. For a complete description, see the XML
+specification .
+
+
+
+ The DTD and the instance
+
+The DTD contains various declarations; in general you can only use a feature if
+you have previously declared it. The document instance file may contain the
+full DTD, but it is also possible to split the DTD into an internal and an
+external subset. A document must begin as follows if the full DTD is included:
+
+
+<?xml version="1.0" encoding="Your encoding "?>
+<!DOCTYPE root [
+ Declarations
+]>
+
+
+These declarations are called the internal subset . Note
+that the usage of entities and conditional sections is restricted within the
+internal subset.
+
+
+If the declarations are located in a different file, you can refer to this file
+as follows:
+
+
+<?xml version="1.0" encoding="Your encoding "?>
+<!DOCTYPE root SYSTEM "file name ">
+
+
+The declarations in the file are called the external
+subset . The file name is called the system
+identifier .
+It is also possible to refer to the file by a so-called
+public identifier , but most XML applications won't use
+this feature.
+
+
+You can also specify both internal and external subsets. In this case, the
+declarations of both subsets are mixed, and if there are conflicts, the
+declaration of the internal subset overrides those of the external subset with
+the same name. This looks as follows:
+
+
+<?xml version="1.0" encoding="Your encoding "?>
+<!DOCTYPE root SYSTEM "file name " [
+ Declarations
+]>
+
+
+
+
+The XML declaration (the string beginning with <?xml and
+ending at ?> ) should specify the encoding of the
+file. Common values are UTF-8, and the ISO-8859 series of character sets. Note
+that every file parsed by the XML processor can begin with an XML declaration
+and that every file may have its own encoding.
+
+
+
+The name of the root element must be mentioned directly after the
+DOCTYPE string. This means that a full document instance
+looks like
+
+
+<?xml version="1.0" encoding="Your encoding "?>
+<!DOCTYPE root SYSTEM "file name " [
+ Declarations
+]>
+
+<root >
+ inner contents
+</root >
+
+
+
+
+
+
+
+ Reserved characters
+
+Some characters are generally reserved to indicate markup such that they cannot
+be used for character data. These characters are <, >, and
+&. Furthermore, single and double quotes are sometimes reserved. If you
+want to include such a character as character, write it as follows:
+
+
+
+
+< instead of <
+
+
+
+
+> instead of >
+
+
+
+
+& instead of &
+
+
+
+
+' instead of '
+
+
+
+
+" instead of "
+
+
+
+
+All other characters are free in the document instance. It is possible to
+include a character by its position in the Unicode alphabet:
+
+
+&#n ;
+
+
+where n is the decimal number of the
+character. Alternatively, you can specify the character by its hexadecimal
+number:
+
+
+&#xn ;
+
+
+In the scope of declarations, the character % is no longer free. To include it
+as character, you must use the notations % or
+% .
+
+
+ Note that besides <, >, &,
+', and " there are no predefines character entities. This is
+different from HTML which defines a list of characters that can be referenced
+by name (e.g. ä for ä); however, if you prefer named characters, you
+can declare such entities yourself (see below).
+
+
+
+
+
+
+ Elements and ELEMENT declarations
+
+
+Elements structure the document instance in a hierarchical way. There is a
+top-level element, the root element , which contains a
+sequence of inner elements and character sections. The inner elements are
+structured in the same way. Every element has an element
+type . The beginning of the element is indicated by a start
+tag , written
+
+
+<element-type >
+
+
+and the element continues until the corresponding end tag
+is reached:
+
+
+</element-type >
+
+
+In XML, it is not allowed to omit start or end tags, even if the DTD would
+permit this. Note that there are no special rules how to interpret spaces or
+newlines near start or end tags; all spaces and newlines count.
+
+
+
+Every element type must be declared before it can be used. The declaration
+consists of two parts: the ELEMENT declaration describes the content model,
+i.e. which inner elements are allowed; the ATTLIST declaration describes the
+attributes of the element.
+
+
+
+An element can simply allow everything as content. This is written:
+
+
+<!ELEMENT name ANY>
+
+
+On the opposite, an element can be forced to be empty; declared by:
+
+
+<!ELEMENT name EMPTY>
+
+
+Note that there is an abbreviated notation for empty element instances:
+<name /> .
+
+
+
+There are two more sophisticated forms of declarations: so-called
+mixed declarations , and regular
+expressions . An element with mixed content contains character data
+interspersed with inner elements, and the set of allowed inner elements can be
+specified. In contrast to this, a regular expression declaration does not allow
+character data, but the inner elements can be described by the more powerful
+means of regular expressions.
+
+
+
+A declaration for mixed content looks as follows:
+
+
+<!ELEMENT name (#PCDATA | element1 | ... | elementn )*>
+
+
+or if you do not want to allow any inner element, simply
+
+
+<!ELEMENT name (#PCDATA)>
+
+
+
+
+
+ Example
+
+If element type q is declared as
+
+
+]]>
+
+
+this is a legal instance:
+
+
+This is character data with inner elements ]]>
+
+
+But this is illegal because t has not been enumerated in the
+declaration:
+
+
+This is character data with inner elements ]]>
+
+
+
+
+
+The other form uses a regular expression to describe the possible contents:
+
+
+<!ELEMENT name regexp >
+
+
+The following well-known regexp operators are allowed:
+
+
+
+
+element-name
+
+
+
+
+
+(subexpr1 , ... , subexprn )
+
+
+
+
+
+(subexpr1 | ... | subexprn )
+
+
+
+
+
+subexpr *
+
+
+
+
+
+subexpr +
+
+
+
+
+
+subexpr ?
+
+
+
+
+The , operator indicates a sequence of sub-models, the
+| operator describes alternative sub-models. The
+* indicates zero or more repetitions, and
++ one or more repetitions. Finally, ? can
+be used for optional sub-models. As atoms the regexp can contain names of
+elements; note that it is not allowed to include #PCDATA .
+
+
+
+The exact syntax of the regular expressions is rather strange. This can be
+explained best by a list of constraints:
+
+
+
+
+The outermost expression must not be
+element-name .
+
+ Illegal:
+]]> ; this must be written as
+]]> .
+
+
+
+For the unary operators subexpr * ,
+subexpr + , and
+subexpr ? , the
+subexpr must not be again an
+unary operator.
+
+ Illegal:
+]]> ; this must be written as
+]]> .
+
+
+
+Between ) and one of the unary operatory
+* , + , or ? , there must
+not be whitespace.
+ Illegal:
+]]> ; this must be written as
+]]> .
+
+ There is the additional constraint that the
+right parenthsis must be contained in the same entity as the left parenthesis;
+see the section about parsed entities below.
+
+
+
+
+
+
+Note that there is another restriction on regular expressions which must be
+deterministic. This means that the parser must be able to see by looking at the
+next token which alternative is actually used, or whether the repetition
+stops. The reason for this is simply compatability with SGML (there is no
+intrinsic reason for this rule; XML can live without this restriction).
+
+
+
+ Example
+
+The elements are declared as follows:
+
+
+
+
+
+
+]]>
+
+This is a legal instance:
+
+
+Some characters ]]>
+
+
+(Note: <s/> is an abbreviation for
+<s></s> .)
+
+It would be illegal to leave ]]> out because at
+least one instance of s or t must be
+present. It would be illegal, too, if characters existed outside the
+r element; the only exception is white space. -- This is
+legal, too:
+
+
+ ]]>
+
+
+
+
+
+
+
+
+
+ Attribute lists and ATTLIST declarations
+
+Elements may have attributes. These are put into the start tag of an element as
+follows:
+
+
+<element-name attribute1 ="value1 " ... attributen ="valuen ">
+
+
+Instead of
+"valuek "
+it is also possible to use single quotes as in
+'valuek ' .
+Note that you cannot use double quotes literally within the value of the
+attribute if double quotes are the delimiters; the same applies to single
+quotes. You can generally not use < and & as characters in attribute
+values. It is possible to include the paraphrases <, >,
+&, ', and " (and any other reference to a general
+entity as long as the entity is not defined by an external file) as well as
+&#n ;.
+
+
+
+Before you can use an attribute you must declare it. An ATTLIST declaration
+looks as follows:
+
+
+<!ATTLIST element-name
+ attribute-name attribute-type attribute-default
+ ...
+ attribute-name attribute-type attribute-default
+>
+
+
+There are a lot of types, but most important are:
+
+
+
+
+CDATA : Every string is allowed as attribute value.
+
+
+
+
+NMTOKEN : Every nametoken is allowed as attribute
+value. Nametokens consist (mainly) of letters, digits, ., :, -, _ in arbitrary
+order.
+
+
+
+
+NMTOKENS : A space-separated list of nametokens is allowed as
+attribute value.
+
+
+
+
+The most interesting default declarations are:
+
+
+
+
+#REQUIRED : The attribute must be specified.
+
+
+
+
+#IMPLIED : The attribute can be specified but also can be
+left out. The application can find out whether the attribute was present or
+not.
+
+
+
+
+"value " or
+'value ' : This particular value is
+used as default if the attribute is omitted in the element.
+
+
+
+
+
+
+ Example
+
+This is a valid attribute declaration for element type r :
+
+
+
+]]>
+
+This means that x is a required attribute that cannot be
+left out, while y and z are optional. The
+XML parser indicates the application whether y is present or
+not, but if z is missing the default value
+"one two three" is returned automatically.
+
+
+
+This is a valid example of these attributes:
+
+
+]]>
+
+
+
+
+
+
+
+ Parsed entities
+
+Elements describe the logical structure of the document, while
+entities determine the physical structure. Entities are
+the pieces of text the parser operates on, mostly files and macros. Entities
+may be parsed in which case the parser reads the text and
+interprets it as XML markup, or unparsed which simply
+means that the data of the entity has a foreign format (e.g. a GIF icon).
+
+
+ If the parsed entity is going to be used as part of the DTD, it
+is called a parameter entity . You can declare a parameter
+entity with a fixed text as content by:
+
+
+<!ENTITY % name "value ">
+
+
+Within the DTD, you can refer to this entity, i.e. read
+the text of the entity, by:
+
+
+%name ;
+
+
+Such entities behave like macros, i.e. when they are referred to, the
+macro text is inserted and read instead of the original text.
+
+
+ Example
+
+For example, you can declare two elements with the same content model by:
+
+
+
+
+
+]]>
+
+
+
+
+
+If the contents of the entity are given as string constant, the entity is
+called an internal entity. It is also possible to name a
+file to be used as content (an external entity):
+
+
+<!ENTITY % name SYSTEM "file name ">
+
+
+There are some restrictions for parameter entities:
+
+
+
+
+If the internal parameter entity contains the first token of a declaration
+(i.e. <! ), it must also contain the last token of the
+declaration, i.e. the > . This means that the entity
+either contains a whole number of complete declarations, or some text from the
+middle of one declaration.
+
+Illegal:
+
+">
+ Because <! is contained in the main
+entity, and the corresponding > is contained in the
+entity e .
+
+
+
+If the internal parameter entity contains a left paranthesis, it must also
+contain the corresponding right paranthesis.
+
+Illegal:
+
+
+
+]]> Because ( is contained in the entity
+e , and the corresponding ) is
+contained in the main entity.
+
+
+
+When reading text from an entity, the parser automatically inserts one space
+character before the entity text and one space character after the entity
+text. However, this rule is not applied within the definition of another
+entity.
+Legal:
+
+
+
+]]> Because %suffix; is referenced within
+the definition text for iconfile , no additional spaces are
+added.
+
+Illegal:
+
+
+
+]]>
+Because %suffix; is referenced outside the definition
+text of another entity, the parser replaces %suffix; by
+space testspace .
+Illegal:
+
+
+
+]]> Because there is a whitespace between )
+and * , which is illegal.
+
+
+
+An external parameter entity must always consist of a whole number of complete
+declarations.
+
+
+
+
+In the internal subset of the DTD, a reference to a parameter entity (internal
+or external) is only allowed at positions where a new declaration can start.
+
+
+
+
+
+
+If the parsed entity is going to be used in the document instance, it is called
+a general entity . Such entities can be used as
+abbreviations for frequent phrases, or to include external files. Internal
+general entities are declared as follows:
+
+
+<!ENTITY name "value ">
+
+
+External general entities are declared this way:
+
+
+<!ENTITY name SYSTEM "file name ">
+
+
+References to general entities are written as:
+
+
+&name ;
+
+
+The main difference between parameter and general entities is that the former
+are only recognized in the DTD and that the latter are only recognized in the
+document instance. As the DTD is parsed before the document, the parameter
+entities are expanded first; for example it is possible to use the content of a
+parameter entity as the name of a general entity:
+&%name;; This construct is only
+allowed within the definition of another entity; otherwise extra spaces would
+be added (as explained above). Such indirection is not recommended.
+
+Complete example:
+
+
+
+
+
+]]>
+You can now write &text; in the document instance, and
+depending on the value of variant either
+text-a or text-b is inserted.
+ .
+
+
+General entities must respect the element hierarchy. This means that there must
+be an end tag for every start tag in the entity value, and that end tags
+without corresponding start tags are not allowed.
+
+
+
+ Example
+
+If the author of a document changes sometimes, it is worthwhile to set up a
+general entity containing the names of the authors. If the author changes, you
+need only to change the definition of the entity, and do not need to check all
+occurrences of authors' names:
+
+
+
+]]>
+
+
+In the document text, you can now refer to the author names by writing
+&authors; .
+
+
+
+Illegal:
+The following two entities are illegal because the elements in the definition
+do not nest properly:
+
+
+">
+">
+]]>
+
+
+
+
+Earlier in this introduction we explained that there are substitutes for
+reserved characters: <, >, &, ', and
+". These are simply predefined general entities; note that they are
+the only predefined entities. It is allowed to define these entities again
+as long as the meaning is unchanged.
+
+
+
+
+ Notations and unparsed entities
+
+Unparsed entities have a foreign format and can thus not be read by the XML
+parser. Unparsed entities are always external. The format of an unparsed entity
+must have been declared, such a format is called a
+notation . The entity can then be declared by referring to
+this notation. As unparsed entities do not contain XML text, it is not possible
+to include them directly into the document; you can only declare attributes
+such that names of unparsed entities are acceptable values.
+
+
+
+As you can see, unparsed entities are too complicated in order to have any
+purpose. It is almost always better to simply pass the name of the data file as
+normal attribute value, and let the application recognize and process the
+foreign format.
+
+
+
+
+
+
+
+
+
+
+ A complete example: The readme DTD
+
+The reason for readme was that I often wrote two versions
+of files such as README and INSTALL which explain aspects of a distributed
+software archive; one version was ASCII-formatted, the other was written in
+HTML. Maintaining both versions means double amount of work, and changes
+of one version may be forgotten in the other version. To improve this situation
+I invented the readme DTD which allows me to maintain only
+one source written as XML document, and to generate the ASCII and the HTML
+version from it.
+
+
+
+In this section, I explain only the DTD. The readme DTD is
+contained in the &markup; distribution together with the two converters to
+produce ASCII and HTML. Another section of this manual describes the HTML
+converter.
+
+
+
+The documents have a simple structure: There are up to three levels of nested
+sections, paragraphs, item lists, footnotes, hyperlinks, and text emphasis. The
+outermost element has usually the type readme , it is
+declared by
+
+
+
+
+]]>
+
+This means that this element contains one or more sections of the first level
+(element type sect1 ), and that the element has a required
+attribute title containing character data (CDATA). Note that
+readme elements must not contain text data.
+
+
+
+The three levels of sections are declared as follows:
+
+
+
+
+
+
+
+]]>
+
+Every section has a title element as first subelement. After
+the title an arbitrary but non-empty sequence of inner sections, paragraphs and
+item lists follows. Note that the inner sections must belong to the next higher
+section level; sect3 elements must not contain inner
+sections because there is no next higher level.
+
+
+
+Obviously, all three declarations allow paragraphs (p ) and
+item lists (ul ). The definition can be simplified at this
+point by using a parameter entity:
+
+
+
+
+
+
+
+
+
+]]>
+
+Here, the entity p.like is nothing but a macro abbreviating
+the same sequence of declarations; if new elements on the same level as
+p and ul are later added, it is
+sufficient only to change the entity definition. Note that there are some
+restrictions on the usage of entities in this context; most important, entities
+containing a left paranthesis must also contain the corresponding right
+paranthesis.
+
+
+
+Note that the entity p.like is a
+parameter entity, i.e. the ENTITY declaration contains a
+percent sign, and the entity is referred to by
+%p.like; . This kind of entity must be used to abbreviate
+parts of the DTD; the general entities declared without
+percent sign and referred to as &name; are not allowed
+in this context.
+
+
+
+The title element specifies the title of the section in
+which it occurs. The title is given as character data, optionally interspersed
+with line breaks (br ):
+
+
+
+]]>
+
+Compared with the title attribute of
+the readme element, this element allows inner markup
+(i.e. br ) while attribute values do not: It is an error if
+an attribute value contains the left angle bracket < literally such that it
+is impossible to include inner elements.
+
+
+
+The paragraph element p has a structure similar to
+title , but it allows more inner elements:
+
+
+
+
+
+]]>
+
+Line breaks do not have inner structure, so they are declared as being empty:
+
+
+
+]]>
+
+This means that really nothing is allowed within br ; you
+must always write ]]> or abbreviated
+]]> .
+
+
+
+Code samples should be marked up by the code tag; emphasized
+text can be indicated by em :
+
+
+
+
+
+]]>
+
+That code elements are not allowed to contain further markup
+while em elements do is a design decision by the author of
+the DTD.
+
+
+
+Unordered lists simply consists of one or more list items, and a list item may
+contain paragraph-level material:
+
+
+
+
+
+]]>
+
+Footnotes are described by the text of the note; this text may contain
+text-level markup. There is no mechanism to describe the numbering scheme of
+footnotes, or to specify how footnote references are printed.
+
+
+
+]]>
+
+Hyperlinks are written as in HTML. The anchor tag contains the text describing
+where the link points to, and the href attribute is the
+pointer (as URL). There is no way to describe locations of "hash marks". If the
+link refers to another readme document, the attribute
+readmeref should be used instead of href .
+The reason is that the converted document has usually a different system
+identifier (file name), and the link to a converted document must be
+converted, too.
+
+
+
+
+]]>
+
+Note that although it is only sensible to specify one of the two attributes,
+the DTD has no means to express this restriction.
+
+
+
+So far the DTD. Finally, here is a document for it:
+
+
+
+
+
+
+ Usage
+
+ The readme converter is invoked on the command line by:
+
+
+ readme [ -text | -html ] input.xml
+
+
+ Here a list of options:
+
+
+
+ The input file must be given on the command line. The converted output is
+ printed to stdout .
+
+
+
+ Author
+
+ The program has been written by
+ Gerd Stolpmann .
+
+
+
+]]>
+
+
+
+
+
+
+
+
+
+
+ Using &markup;
+
+
+ Validation
+
+The parser can be used to validate a document. This means
+that all the constraints that must hold for a valid document are actually
+checked. Validation is the default mode of &markup;, i.e. every document is
+validated while it is being parsed.
+
+
+
+In the examples directory of the distribution you find the
+pxpvalidate application. It is invoked in the following way:
+
+
+pxpvalidate [ -wf ] file ...
+
+
+The files mentioned on the command line are validated, and every warning and
+every error messages are printed to stderr.
+
+
+
+The -wf switch modifies the behaviour such that a well-formedness parser is
+simulated. In this mode, the ELEMENT, ATTLIST, and NOTATION declarations of the
+DTD are ignored, and only the ENTITY declarations will take effect. This mode
+is intended for documents lacking a DTD. Please note that the parser still
+scans the DTD fully and will report all errors in the DTD; such checks are not
+required by a well-formedness parser.
+
+
+
+The pxpvalidate application is the simplest sensible program
+using &markup;, you may consider it as "hello world" program.
+
+
+
+
+
+
+
+
+ How to parse a document from an application
+
+Let me first give a rough overview of the object model of the parser. The
+following items are represented by objects:
+
+
+
+
+Documents: The document representation is more or less the
+anchor for the application; all accesses to the parsed entities start here. It
+is described by the class document contained in the module
+Pxp_document . You can get some global information, such
+as the XML declaration the document begins with, the DTD of the document,
+global processing instructions, and most important, the document tree.
+
+
+
+
+
+The contents of documents: The contents have the structure
+of a tree: Elements contain other elements and textElements may
+also contain processing instructions. Unlike other document models, &markup;
+separates processing instructions from the rest of the text and provides a
+second interface to access them (method pinstr ). However,
+there is a parser option (enable_pinstr_nodes ) which changes
+the behaviour of the parser such that extra nodes for processing instructions
+are included into the tree.
+Furthermore, the tree does normally not contain nodes for XML comments;
+they are ignored by default. Again, there is an option
+(enable_comment_nodes ) changing this.
+ .
+
+The common type to represent both kinds of content is node
+which is a class type that unifies the properties of elements and character
+data. Every node has a list of children (which is empty if the element is empty
+or the node represents text); nodes may have attributes; nodes have always text
+contents. There are two implementations of node , the class
+element_impl for elements, and the class
+data_impl for text data. You find these classes and class
+types in the module Pxp_document , too.
+
+
+
+Note that attribute lists are represented by non-class values.
+
+
+
+
+
+The node extension: For advanced usage, every node of the
+document may have an associated extension which is simply
+a second object. This object must have the three methods
+clone , node , and
+set_node as bare minimum, but you are free to add methods as
+you want. This is the preferred way to add functionality to the document
+treeDue to the typing system it is more or less impossible to
+derive recursive classes in O'Caml. To get around this, it is common practice
+to put the modifiable or extensible part of recursive objects into parallel
+objects. . The class type extension is
+defined in Pxp_document , too.
+
+
+
+
+
+The DTD: Sometimes it is necessary to access the DTD of a
+document; the average application does not need this feature. The class
+dtd describes DTDs, and makes it possible to get
+representations of element, entity, and notation declarations as well as
+processing instructions contained in the DTD. This class, and
+dtd_element , dtd_notation , and
+proc_instruction can be found in the module
+Pxp_dtd . There are a couple of classes representing
+different kinds of entities; these can be found in the module
+Pxp_entity .
+
+
+
+
+Additionally, the following modules play a role:
+
+
+
+
+Pxp_yacc: Here the main parsing functions such as
+parse_document_entity are located. Some additional types and
+functions allow the parser to be configured in a non-standard way.
+
+
+
+
+
+Pxp_types: This is a collection of basic types and
+exceptions.
+
+
+
+
+There are some further modules that are needed internally but are not part of
+the API.
+
+
+
+Let the document to be parsed be stored in a file called
+doc.xml . The parsing process is started by calling the
+function
+
+
+val parse_document_entity : config -> source -> 'ext spec -> 'ext document
+
+
+defined in the module Pxp_yacc . The first argument
+specifies some global properties of the parser; it is recommended to start with
+the default_config . The second argument determines where the
+document to be parsed comes from; this may be a file, a channel, or an entity
+ID. To parse doc.xml , it is sufficient to pass
+from_file "doc.xml" .
+
+
+
+The third argument passes the object specification to use. Roughly
+speaking, it determines which classes implement the node objects of which
+element types, and which extensions are to be used. The 'ext
+polymorphic variable is the type of the extension. For the moment, let us
+simply pass default_spec as this argument, and ignore it.
+
+
+
+So the following expression parses doc.xml :
+
+
+open Pxp_yacc
+let d = parse_document_entity default_config (from_file "doc.xml") default_spec
+
+
+Note that default_config implies that warnings are collected
+but not printed. Errors raise one of the exception defined in
+Pxp_types ; to get readable errors and warnings catch the
+exceptions as follows:
+
+
+
+ print_endline (Pxp_types.string_of_exn e)
+]]>
+
+Now d is an object of the document
+class. If you want the node tree, you can get the root element by
+
+
+let root = d # root
+
+
+and if you would rather like to access the DTD, determine it by
+
+
+let dtd = d # dtd
+
+
+As it is more interesting, let us investigate the node tree now. Given the root
+element, it is possible to recursively traverse the whole tree. The children of
+a node n are returned by the method
+sub_nodes , and the type of a node is returned by
+node_type . This function traverses the tree, and prints the
+type of each node:
+
+
+
+ print_endline ("Element of type " ^ name);
+ let children = n # sub_nodes in
+ List.iter print_structure children
+ | T_data ->
+ print_endline "Data"
+ | _ ->
+ (* Other node types are not possible unless the parser is configured
+ differently.
+ *)
+ assert false
+]]>
+
+You can call this function by
+
+
+print_structure root
+
+
+The type returned by node_type is either T_element
+name or T_data . The name of the
+element type is the string included in the angle brackets. Note that only
+elements have children; data nodes are always leaves of the tree.
+
+
+
+There are some more methods in order to access a parsed node tree:
+
+
+
+
+n # parent : Returns the parent node, or raises
+Not_found if the node is already the root
+
+
+
+
+n # root : Returns the root of the node tree.
+
+
+
+
+n # attribute a : Returns the value of the attribute with
+name a . The method returns a value for every
+declared attribute, independently of whether the attribute
+instance is defined or not. If the attribute is not declared,
+Not_found will be raised. (In well-formedness mode, every
+attribute is considered as being implicitly declared with type
+CDATA .)
+
+
+
+The following return values are possible: Value s ,
+Valuelist sl , and Implied_value .
+The first two value types indicate that the attribute value is available,
+either because there is a definition
+a ="value "
+in the XML text, or because there is a default value (declared in the
+DTD). Only if both the instance definition and the default declaration are
+missing, the latter value Implied_value will be returned.
+
+
+
+In the DTD, every attribute is typed. There are single-value types (CDATA, ID,
+IDREF, ENTITY, NMTOKEN, enumerations), in which case the method passes
+Value s back, where s is the normalized
+string value of the attribute. The other types (IDREFS, ENTITIES, NMTOKENS)
+represent list values, and the parser splits the XML literal into several
+tokens and returns these tokens as Valuelist sl .
+
+
+
+Normalization means that entity references (the
+&name ; tokens) and
+character references
+(&#number ; ) are replaced
+by the text they represent, and that white space characters are converted into
+plain spaces.
+
+
+
+
+n # data : Returns the character data contained in the
+node. For data nodes, the meaning is obvious as this is the main content of
+data nodes. For element nodes, this method returns the concatenated contents of
+all inner data nodes.
+
+
+Note that entity references included in the text are resolved while they are
+being parsed; for example the text will be returned
+as b"]]> by this method. Spaces of data nodes are always
+preserved. Newlines are preserved, but always converted to \n characters even
+if newlines are encoded as \r\n or \r. Normally you will never see two adjacent
+data nodes because the parser collapses all data material at one location into
+one node. (However, if you create your own tree or transform the parsed tree,
+it is possible to have adjacent data nodes.)
+
+
+Note that elements that do not allow #PCDATA as content
+will not have data nodes as children. This means that spaces and newlines, the
+only character material allowed for such elements, are silently dropped.
+
+
+
+
+For example, if the task is to print all contents of elements with type
+"valuable" whose attribute "priority" is "1", this function can help:
+
+
+
+ print_endline "Valuable node with priotity 1 found:";
+ print_endline (n # data)
+ | (T_element _ | T_data) ->
+ let children = n # sub_nodes in
+ List.iter print_valuable_prio1 children
+ | _ ->
+ assert false
+]]>
+
+You can call this function by:
+
+
+print_valuable_prio1 root
+
+
+If you like a DSSSL-like style, you can make the function
+process_children explicit:
+
+
+
+ print_endline "Valuable node with priority 1 found:";
+ print_endline (n # data)
+ | (T_element _ | T_data) ->
+ process_children n
+ | _ ->
+ assert false
+]]>
+
+So far, O'Caml is now a simple "style-sheet language": You can form a big
+"match" expression to distinguish between all significant cases, and provide
+different reactions on different conditions. But this technique has
+limitations; the "match" expression tends to get larger and larger, and it is
+difficult to store intermediate values as there is only one big
+recursion. Alternatively, it is also possible to represent the various cases as
+classes, and to use dynamic method lookup to find the appropiate class. The
+next section explains this technique in detail.
+
+
+
+
+
+
+
+
+
+ Class-based processing of the node tree
+
+By default, the parsed node tree consists of objects of the same class; this is
+a good design as long as you want only to access selected parts of the
+document. For complex transformations, it may be better to use different
+classes for objects describing different element types.
+
+
+
+For example, if the DTD declares the element types a ,
+b , and c , and if the task is to convert
+an arbitrary document into a printable format, the idea is to define for every
+element type a separate class that has a method print . The
+classes are eltype_a , eltype_b , and
+eltype_c , and every class implements
+print such that elements of the type corresponding to the
+class are converted to the output format.
+
+
+
+The parser supports such a design directly. As it is impossible to derive
+recursive classes in O'CamlThe problem is that the subclass is
+usually not a subtype in this case because O'Caml has a contravariant subtyping
+rule. , the specialized element classes cannot be formed by
+simply inheriting from the built-in classes of the parser and adding methods
+for customized functionality. To get around this limitation, every node of the
+document tree is represented by two objects, one called
+"the node" and containing the recursive definition of the tree, one called "the
+extension". Every node object has a reference to the extension, and the
+extension has a reference to the node. The advantage of this model is that it
+is now possible to customize the extension without affecting the typing
+constraints of the recursive node definition.
+
+
+
+Every extension must have the three methods clone ,
+node , and set_node . The method
+clone creates a deep copy of the extension object and
+returns it; node returns the node object for this extension
+object; and set_node is used to tell the extension object
+which node is associated with it, this method is automatically called when the
+node tree is initialized. The following definition is a good starting point
+for these methods; usually clone must be further refined
+when instance variables are added to the class:
+
+
+}
+ method node =
+ match node with
+ None ->
+ assert false
+ | Some n -> n
+ method set_node n =
+ node <- Some n
+
+ end
+]]>
+
+
+This part of the extension is usually the same for all classes, so it is a good
+idea to consider custom_extension as the super-class of the
+further class definitions. Continuining the example of above, we can define the
+element type classes as follows:
+
+
+ unit
+ end
+
+class eltype_a =
+ object (self)
+ inherit custom_extension
+ method print ch = ...
+ end
+
+class eltype_b =
+ object (self)
+ inherit custom_extension
+ method print ch = ...
+ end
+
+class eltype_c =
+ object (self)
+ inherit custom_extension
+ method print ch = ...
+ end
+]]>
+
+The method print can now be implemented for every element
+type separately. Note that you get the associated node by invoking
+
+
+self # node
+
+
+and you get the extension object of a node n by writing
+
+
+n # extension
+
+
+It is guaranteed that
+
+
+self # node # extension == self
+
+
+always holds.
+
+
+ Here are sample definitions of the print
+methods:
+
+... are only containers: *)
+ output_string ch "(";
+ List.iter
+ (fun n -> n # extension # print ch)
+ (self # node # sub_nodes);
+ output_string ch ")";
+ end
+
+class eltype_b =
+ object (self)
+ inherit custom_extension
+ method print ch =
+ (* Print the value of the CDATA attribute "print": *)
+ match self # node # attribute "print" with
+ Value s -> output_string ch s
+ | Implied_value -> output_string ch ""
+ | Valuelist l -> assert false
+ (* not possible because the att is CDATA *)
+ end
+
+class eltype_c =
+ object (self)
+ inherit custom_extension
+ method print ch =
+ (* Print the contents of this element: *)
+ output_string ch (self # node # data)
+ end
+
+class null_extension =
+ object (self)
+ inherit custom_extension
+ method print ch = assert false
+ end
+]]>
+
+
+
+
+The remaining task is to configure the parser such that these extension classes
+are actually used. Here another problem arises: It is not possible to
+dynamically select the class of an object to be created. As workaround,
+&markup; allows the user to specify exemplar objects for
+the various element types; instead of creating the nodes of the tree by
+applying the new operator the nodes are produced by
+duplicating the exemplars. As object duplication preserves the class of the
+object, one can create fresh objects of every class for which previously an
+exemplar has been registered.
+
+
+
+Exemplars are meant as objects without contents, the only interesting thing is
+that exemplars are instances of a certain class. The creation of an exemplar
+for an element node can be done by:
+
+
+let element_exemplar = new element_impl extension_exemplar
+
+
+And a data node exemplar is created by:
+
+
+let data_exemplar = new data_impl extension_exemplar
+
+
+The classes element_impl and data_impl
+are defined in the module Pxp_document . The constructors
+initialize the fresh objects as empty objects, i.e. without children, without
+data contents, and so on. The extension_exemplar is the
+initial extension object the exemplars are associated with.
+
+
+
+Once the exemplars are created and stored somewhere (e.g. in a hash table), you
+can take an exemplar and create a concrete instance (with contents) by
+duplicating it. As user of the parser you are normally not concerned with this
+as this is part of the internal logic of the parser, but as background knowledge
+it is worthwhile to mention that the two methods
+create_element and create_data actually
+perform the duplication of the exemplar for which they are invoked,
+additionally apply modifications to the clone, and finally return the new
+object. Moreover, the extension object is copied, too, and the new node object
+is associated with the fresh extension object. Note that this is the reason why
+every extension object must have a clone method.
+
+
+
+The configuration of the set of exemplars is passed to the
+parse_document_entity function as third argument. In our
+example, this argument can be set up as follows:
+
+
+
+
+The ~element_alist function argument defines the mapping
+from element types to exemplars as associative list. The argument
+~data_exemplar specifies the exemplar for data nodes, and
+the ~default_element_exemplar is used whenever the parser
+finds an element type for which the associative list does not define an
+exemplar.
+
+
+
+The configuration is now complete. You can still use the same parsing
+functions, only the initialization is a bit different. For example, call the
+parser by:
+
+
+let d = parse_document_entity default_config (from_file "doc.xml") spec
+
+
+Note that the resulting document d has a usable type;
+especially the print method we added is visible. So you can
+print your document by
+
+
+d # root # extension # print stdout
+
+
+
+
+This object-oriented approach looks rather complicated; this is mostly caused
+by working around some problems of the strict typing system of O'Caml. Some
+auxiliary concepts such as extensions were needed, but the practical
+consequences are low. In the next section, one of the examples of the
+distribution is explained, a converter from readme
+documents to HTML.
+
+
+
+
+
+
+
+
+
+ Example: An HTML backend for the readme
+DTD
+
+ The converter from readme documents to HTML
+documents follows strictly the approach to define one class per element
+type. The HTML code is similar to the readme source,
+because of this most elements can be converted in the following way: Given the
+input element
+
+
+content]]>
+
+
+the conversion text is the concatenation of a computed prefix, the recursively
+converted content, and a computed suffix.
+
+
+
+Only one element type cannot be handled by this scheme:
+footnote . Footnotes are collected while they are found in
+the input text, and they are printed after the main text has been converted and
+printed.
+
+
+
+ Header
+
+&readme.code.header;
+
+
+
+
+ Type declarations
+
+&readme.code.footnote-printer;
+
+
+
+
+ Class store
+
+The store is a container for footnotes. You can add a
+footnote by invoking alloc_footnote ; the argument is an
+object of the class footnote_printer , the method returns the
+number of the footnote. The interesting property of a footnote is that it can
+be converted to HTML, so a footnote_printer is an object
+with a method footnote_to_html . The class
+footnote which is defined below has a compatible method
+footnote_to_html such that objects created from it can be
+used as footnote_printer s.
+
+
+The other method, print_footnotes prints the footnotes as
+definition list, and is typically invoked after the main material of the page
+has already been printed. Every item of the list is printed by
+footnote_to_html .
+
+
+
+&readme.code.store;
+
+
+
+
+ Function escape_html
+
+This function converts the characters <, >, &, and " to their HTML
+representation. For example,
+escape_html "<>" = "<>" . Other
+characters are left unchanged.
+
+&readme.code.escape-html;
+
+
+
+
+ Virtual class shared
+
+This virtual class is the abstract superclass of the extension classes shown
+below. It defines the standard methods clone ,
+node , and set_node , and declares the type
+of the virtual method to_html . This method recursively
+traverses the whole element tree, and prints the converted HTML code to the
+output channel passed as second argument. The first argument is the reference
+to the global store object which collects the footnotes.
+
+&readme.code.shared;
+
+
+
+
+ Class only_data
+
+This class defines to_html such that the character data of
+the current node is converted to HTML. Note that self is an
+extension object, self # node is the node object, and
+self # node # data returns the character data of the node.
+
+&readme.code.only-data;
+
+
+
+
+ Class readme
+
+This class converts elements of type readme to HTML. Such an
+element is (by definition) always the root element of the document. First, the
+HTML header is printed; the title attribute of the element
+determines the title of the HTML page. Some aspects of the HTML page can be
+configured by setting certain parameter entities, for example the background
+color, the text color, and link colors. After the header, the
+body tag, and the headline have been printed, the contents
+of the page are converted by invoking to_html on all
+children of the current node (which is the root node). Then, the footnotes are
+appended to this by telling the global store object to print
+the footnotes. Finally, the end tags of the HTML pages are printed.
+
+
+
+This class is an example how to access the value of an attribute: The value is
+determined by invoking self # node # attribute "title" . As
+this attribute has been declared as CDATA and as being required, the value has
+always the form Value s where s is the
+string value of the attribute.
+
+
+
+You can also see how entity contents can be accessed. A parameter entity object
+can be looked up by self # node # dtd # par_entity "name" ,
+and by invoking replacement_text the value of the entity
+is returned after inner parameter and character entities have been
+processed. Note that you must use gen_entity instead of
+par_entity to access general entities.
+
+
+
+&readme.code.readme;
+
+
+
+
+ Classes section , sect1 ,
+sect2 , and sect3
+
+As the conversion process is very similar, the conversion classes of the three
+section levels are derived from the more general section
+class. The HTML code of the section levels only differs in the type of the
+headline, and because of this the classes describing the section levels can be
+computed by replacing the class argument the_tag of
+section by the HTML name of the headline tag.
+
+
+
+Section elements are converted to HTML by printing a headline and then
+converting the contents of the element recursively. More precisely, the first
+sub-element is always a title element, and the other
+elements are the contents of the section. This structure is declared in the
+DTD, and it is guaranteed that the document matches the DTD. Because of this
+the title node can be separated from the rest without any checks.
+
+
+
+Both the title node, and the body nodes are then converted to HTML by calling
+to_html on them.
+
+
+
+&readme.code.section;
+
+
+
+
+ Classes map_tag , p ,
+em , ul , li
+
+Several element types are converted to HTML by simply mapping them to
+corresponding HTML element types. The class map_tag
+implements this, and the class argument the_target_tag
+determines the tag name to map to. The output consists of the start tag, the
+recursively converted inner elements, and the end tag.
+
+&readme.code.map-tag;
+
+
+
+
+ Class br
+
+Element of type br are mapped to the same HTML type. Note
+that HTML forbids the end tag of br .
+
+&readme.code.br;
+
+
+
+
+ Class code
+
+The code type is converted to a pre
+section (preformatted text). As the meaning of tabs is unspecified in HTML,
+tabs are expanded to spaces.
+
+&readme.code.code;
+
+
+
+
+ Class a
+
+Hyperlinks, expressed by the a element type, are converted
+to the HTML a type. If the target of the hyperlink is given
+by href , the URL of this attribute can be used
+directly. Alternatively, the target can be given by
+readmeref in which case the ".html" suffix must be added to
+the file name.
+
+
+
+Note that within a only #PCDATA is allowed, so the contents
+can be converted directly by applying escape_html to the
+character data contents.
+
+&readme.code.a;
+
+
+
+
+ Class footnote
+
+The footnote class has two methods:
+to_html to convert the footnote reference to HTML, and
+footnote_to_html to convert the footnote text itself.
+
+
+
+The footnote reference is converted to a local hyperlink; more precisely, to
+two anchor tags which are connected with each other. The text anchor points to
+the footnote anchor, and the footnote anchor points to the text anchor.
+
+
+
+The footnote must be allocated in the store object. By
+allocating the footnote, you get the number of the footnote, and the text of
+the footnote is stored until the end of the HTML page is reached when the
+footnotes can be printed. The to_html method stores simply
+the object itself, such that the footnote_to_html method is
+invoked on the same object that encountered the footnote.
+
+
+
+The to_html only allocates the footnote, and prints the
+reference anchor, but it does not print nor convert the contents of the
+note. This is deferred until the footnotes actually get printed, i.e. the
+recursive call of to_html on the sub nodes is done by
+footnote_to_html .
+
+
+
+Note that this technique does not work if you make another footnote within a
+footnote; the second footnote gets allocated but not printed.
+
+
+
+&readme.code.footnote;
+
+
+
+
+ The specification of the document model
+
+This code sets up the hash table that connects element types with the exemplars
+of the extension classes that convert the elements to HTML.
+
+&readme.code.tag-map;
+
+
+
+
+
+
+
+
+
+
+
+ The objects representing the document
+
+
+This description might be out-of-date. See the module interface files
+for updated information.
+
+
+ The document class
+
+
+
+ object
+ method init_xml_version : string -> unit
+ method init_root : 'ext node -> unit
+
+ method xml_version : string
+ method xml_standalone : bool
+ method dtd : dtd
+ method root : 'ext node
+
+ method encoding : Pxp_types.rep_encoding
+
+ method add_pinstr : proc_instruction -> unit
+ method pinstr : string -> proc_instruction list
+ method pinstr_names : string list
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+
+ end
+;;
+]]>
+
+
+The methods beginning with init_ are only for internal use
+of the parser.
+
+
+
+
+
+xml_version : returns the version string at the beginning of
+the document. For example, "1.0" is returned if the document begins with
+<?xml version="1.0"?> .
+
+
+
+xml_standalone : returns the boolean value of
+standalone declaration in the XML declaration. If the
+standalone attribute is missing, false is
+returned.
+
+
+
+dtd : returns a reference to the global DTD object.
+
+
+
+root : returns a reference to the root element.
+
+
+
+encoding : returns the internal encoding of the
+document. This means that all strings of which the document consists are
+encoded in this character set.
+
+
+
+
+pinstr : returns the processing instructions outside the DTD
+and outside the root element. The argument passed to the method names a
+target , and the method returns all instructions with this
+target. The target is the first word inside <? and
+?> .
+
+
+
+pinstr_names : returns the names of the processing instructions
+
+
+
+add_pinstr : adds another processing instruction. This method
+is used by the parser itself to enter the instructions returned by
+pinstr , but you can also enter additional instructions.
+
+
+
+
+write : writes the document to the passed stream as XML
+text using the passed (external) encoding. The generated text is always valid
+XML and can be parsed by PXP; however, the text is badly formatted (this is not
+a pretty printer).
+
+
+
+
+
+
+
+ The class type node
+
+
+From Pxp_document :
+
+
+type node_type =
+ T_data
+| T_element of string
+| T_super_root
+| T_pinstr of string
+| T_comment
+and some other, reserved types
+;;
+
+class type [ 'ext ] node =
+ object ('self)
+ constraint 'ext = 'ext node #extension
+
+ (* *)
+
+ method extension : 'ext
+ method dtd : dtd
+ method parent : 'ext node
+ method root : 'ext node
+ method sub_nodes : 'ext node list
+ method iter_nodes : ('ext node &fun; unit) &fun; unit
+ method iter_nodes_sibl :
+ ('ext node option &fun; 'ext node &fun; 'ext node option &fun; unit) &fun; unit
+ method node_type : node_type
+ method encoding : Pxp_types.rep_encoding
+ method data : string
+ method position : (string * int * int)
+ method comment : string option
+ method pinstr : string &fun; proc_instruction list
+ method pinstr_names : string list
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+
+ (* *)
+
+ method attribute : string &fun; Pxp_types.att_value
+ method required_string_attribute : string &fun; string
+ method optional_string_attribute : string &fun; string option
+ method required_list_attribute : string &fun; string list
+ method optional_list_attribute : string &fun; string list
+ method attribute_names : string list
+ method attribute_type : string &fun; Pxp_types.att_type
+ method attributes : (string * Pxp_types.att_value) list
+ method id_attribute_name : string
+ method id_attribute_value : string
+ method idref_attribute_names : string
+
+ (* *)
+
+ method add_node : ?force:bool &fun; 'ext node &fun; unit
+ method add_pinstr : proc_instruction &fun; unit
+ method delete : unit
+ method set_nodes : 'ext node list &fun; unit
+ method quick_set_attributes : (string * Pxp_types.att_value) list &fun; unit
+ method set_comment : string option &fun; unit
+
+ (* *)
+
+ method orphaned_clone : 'self
+ method orphaned_flat_clone : 'self
+ method create_element :
+ ?position:(string * int * int) &fun;
+ dtd &fun; node_type &fun; (string * string) list &fun;
+ 'ext node
+ method create_data : dtd &fun; string &fun; 'ext node
+ method keep_always_whitespace_mode : unit
+
+ (* *)
+
+ method local_validate : ?use_dfa:bool -> unit -> unit
+
+ (* ... Internal methods are undocumented. *)
+
+ end
+;;
+
+
+In the module Pxp_types you can find another type
+definition that is important in this context:
+
+
+type Pxp_types.att_value =
+ Value of string
+ | Valuelist of string list
+ | Implied_value
+;;
+
+
+
+
+ The structure of document trees
+
+
+A node represents either an element or a character data section. There are two
+classes implementing the two aspects of nodes: element_impl
+and data_impl . The latter class does not implement all
+methods because some methods do not make sense for data nodes.
+
+
+
+(Note: PXP also supports a mode which forces that processing instructions and
+comments are represented as nodes of the document tree. However, these nodes
+are instances of element_impl with node types
+T_pinstr and T_comment ,
+respectively. This mode must be explicitly configured; the basic representation
+knows only element and data nodes.)
+
+
+ The following figure
+( ) shows an example how
+a tree is constructed from element and data nodes. The circular areas
+represent element nodes whereas the ovals denote data nodes. Only elements
+may have subnodes; data nodes are always leaves of the tree. The subnodes
+of an element can be either element or data nodes; in both cases the O'Caml
+objects storing the nodes have the class type node .
+
+ Attributes (the clouds in the picture) are not directly
+integrated into the tree; there is always an extra link to the attribute
+list. This is also true for processing instructions (not shown in the
+picture). This means that there are separated access methods for attributes and
+processing instructions.
+
+
+A tree with element nodes, data nodes, and attributes
+
+
+
+ Only elements, data sections, attributes and processing
+instructions (and comments, if configured) can, directly or indirectly, occur
+in the document tree. It is impossible to add entity references to the tree; if
+the parser finds such a reference, not the reference as such but the referenced
+text (i.e. the tree representing the structured text) is included in the
+tree.
+
+ Note that the parser collapses as much data material into one
+data node as possible such that there are normally never two adjacent data
+nodes. This invariant is enforced even if data material is included by entity
+references or CDATA sections, or if a data sequence is interrupted by
+comments. So a & b <-- comment --> c <![CDATA[
+<> d]]> is represented by only one data node, for
+instance. However, you can create document trees manually which break this
+invariant; it is only the way the parser forms the tree.
+
+
+
+Nodes are doubly linked trees
+
+
+
+
+The node tree has links in both directions: Every node has a link to its parent
+(if any), and it has links to the subnodes (see
+figure ). Obviously,
+this doubly-linked structure simplifies the navigation in the tree; but has
+also some consequences for the possible operations on trees.
+
+
+Because every node must have at most one parent node,
+operations are illegal if they violate this condition. The following figure
+( ) shows on the left side
+that node y is added to x as new subnode
+which is allowed because y does not have a parent yet. The
+right side of the picture illustrates what would happen if y
+had a parent node; this is illegal because y would have two
+parents after the operation.
+
+
+A node can only be added if it is a root
+
+
+
+
+
+The "delete" operation simply removes the links between two nodes. In the
+picture ( ) the node
+x is deleted from the list of subnodes of
+y . After that, x becomes the root of the
+subtree starting at this node.
+
+
+A deleted node becomes the root of the subtree
+
+
+
+
+It is also possible to make a clone of a subtree; illustrated in
+ . In this case, the
+clone is a copy of the original subtree except that it is no longer a
+subnode. Because cloning never keeps the connection to the parent, the clones
+are called orphaned .
+
+
+
+The clone of a subtree
+
+
+
+
+
+ The methods of the class type node
+
+
+
+
+ General observers
+
+
+
+
+
+
+extension : The reference to the extension object which
+belongs to this node (see ...).
+
+
+
+dtd : Returns a reference to the global DTD. All nodes
+of a tree must share the same DTD.
+
+
+
+
+parent : Get the father node. Raises
+Not_found in the case the node does not have a
+parent, i.e. the node is the root.
+
+
+
+root : Gets the reference to the root node of the tree.
+Every node is contained in a tree with a root, so this method always
+succeeds. Note that this method searches the root,
+which costs time proportional to the length of the path to the root.
+
+
+
+
+sub_nodes : Returns references to the children. The returned
+list reflects the order of the children. For data nodes, this method returns
+the empty list.
+
+
+
+
+iter_nodes f : Iterates over the children, and calls
+f for every child in turn.
+
+
+
+
+iter_nodes_sibl f : Iterates over the children, and calls
+f for every child in turn. f gets as
+arguments the previous node, the current node, and the next node.
+
+
+
+node_type : Returns either T_data which
+means that the node is a data node, or T_element n
+which means that the node is an element of type n .
+If configured, possible node types are also T_pinstr t
+indicating that the node represents a processing instruction with target
+t , and T_comment in which case the node
+is a comment.
+
+
+
+
+encoding : Returns the encoding of the strings.
+
+
+
+data : Returns the character data of this node and all
+children, concatenated as one string. The encoding of the string is what
+the method encoding returns.
+- For data nodes, this method simply returns the represented characters.
+For elements, the meaning of the method has been extended such that it
+returns something useful, i.e. the effectively contained characters, without
+markup. (For T_pinstr and T_comment
+nodes, the method returns the empty string.)
+
+
+
+
+position : If configured, this method returns the position of
+the element as triple (entity, line, byteposition). For data nodes, the
+position is not stored. If the position is not available the triple
+"?", 0, 0 is returned.
+
+
+
+
+comment : Returns Some text for comment
+nodes, and None for other nodes. The text
+is everything between the comment delimiters <-- and
+--> .
+
+
+
+
+pinstr n : Returns all processing instructions that are
+directly contained in this element and that have a target
+specification of n . The target is the first word after
+the <? .
+
+
+
+
+pinstr_names : Returns the list of all targets of processing
+instructions directly contained in this element.
+
+
+
+write s enc : Prints the node and all subnodes to the passed
+output stream as valid XML text, using the passed external encoding.
+
+
+
+
+
+
+
+
+
+ Attribute observers
+
+
+
+
+
+attribute n : Returns the value of the attribute with name
+n . This method returns a value for every declared
+attribute, and it raises Not_found for any undeclared
+attribute. Note that it even returns a value if the attribute is actually
+missing but is declared as #IMPLIED or has a default
+value. - Possible values are:
+
+
+
+Implied_value : The attribute has been declared with the
+keyword #IMPLIED , and the attribute is missing in the
+attribute list of this element.
+
+
+
+Value s : The attribute has been declared as type
+CDATA , as ID , as
+IDREF , as ENTITY , or as
+NMTOKEN , or as enumeration or notation, and one of the two
+conditions holds: (1) The attribute value is present in the attribute list in
+which case the value is returned in the string s . (2) The
+attribute has been omitted, and the DTD declared the attribute with a default
+value. The default value is returned in s .
+- Summarized, Value s is returned for non-implied, non-list
+attribute values.
+
+
+
+
+Valuelist l : The attribute has been declared as type
+IDREFS , as ENTITIES , or
+as NMTOKENS , and one of the two conditions holds: (1) The
+attribute value is present in the attribute list in which case the
+space-separated tokens of the value are returned in the string list
+l . (2) The attribute has been omitted, and the DTD declared
+the attribute with a default value. The default value is returned in
+l .
+- Summarized, Valuelist l is returned for all list-type
+attribute values.
+
+
+
+
+Note that before the attribute value is returned, the value is normalized. This
+means that newlines are converted to spaces, and that references to character
+entities (i.e. &#n ; ) and
+general entities
+(i.e. &name ; ) are expanded;
+if necessary, expansion is performed recursively.
+
+
+
+In well-formedness mode, there is no DTD which could declare an
+attribute. Because of this, every occuring attribute is considered as a CDATA
+attribute.
+
+
+
+
+required_string_attribute n : returns the Value attribute
+called n, or the Valuelist attribute as a string where the list elements
+are separated by spaces. If the attribute value is implied, or if the
+attribute does not exists, the method will fail. - This method is convenient
+if you expect a non-implied and non-list attribute value.
+
+
+
+
+optional_string_attribute n : returns the Value attribute
+called n, or the Valuelist attribute as a string where the list elements
+are separated by spaces. If the attribute value is implied, or if the
+attribute does not exists, the method returns None. - This method is
+convenient if you expect a non-list attribute value including the implied
+value.
+
+
+
+
+required_list_attribute n : returns the Valuelist attribute
+called n, or the Value attribute as a list with a single element.
+If the attribute value is implied, or if the
+attribute does not exists, the method will fail. - This method is
+convenient if you expect a list attribute value.
+
+
+
+
+optional_list_attribute n : returns the Valuelist attribute
+called n, or the Value attribute as a list with a single element.
+If the attribute value is implied, or if the
+attribute does not exists, an empty list will be returned. - This method
+is convenient if you expect a list attribute value or the implied value.
+
+
+
+
+attribute_names : returns the list of all attribute names of
+this element. As this is a validating parser, this list is equal to the
+list of declared attributes.
+
+
+
+
+attribute_type n : returns the type of the attribute called
+n . See the module Pxp_types for a
+description of the encoding of the types.
+
+
+
+
+attributes : returns the list of pairs of names and values
+for all attributes of
+this element.
+
+
+
+id_attribute_name : returns the name of the attribute that is
+declared with type ID. There is at most one such attribute. The method raises
+Not_found if there is no declared ID attribute for the
+element type.
+
+
+
+id_attribute_value : returns the value of the attribute that
+is declared with type ID. There is at most one such attribute. The method raises
+Not_found if there is no declared ID attribute for the
+element type.
+
+
+
+idref_attribute_names : returns the list of attribute names
+that are declared as IDREF or IDREFS.
+
+
+
+
+
+
+
+
+ Modifying methods
+
+
+
+The following methods are only defined for element nodes (more exactly:
+the methods are defined for data nodes, too, but fail always).
+
+
+
+
+add_node sn : Adds sub node sn to the list
+of children. This operation is illustrated in the picture
+ . This method expects that
+sn is a root, and it requires that sn and
+the current object share the same DTD.
+
+
+Because add_node is the method the parser itself uses
+to add new nodes to the tree, it performs by default some simple validation
+checks: If the content model is a regular expression, it is not allowed to add
+data nodes to this node unless the new nodes consist only of whitespace. In
+this case, the new data nodes are silently dropped (you can change this by
+invoking keep_always_whitespace_mode ).
+
+
+If the document is flagged as stand-alone, these data nodes only
+containing whitespace are even forbidden if the element declaration is
+contained in an external entity. This case is detected and rejected.
+
+If the content model is EMPTY , it is not allowed to
+add any data node unless the data node is empty. In this case, the new data
+node is silently dropped.
+
+
+These checks only apply if there is a DTD. In well-formedness mode, it is
+assumed that every element is declared with content model
+ANY which prohibits any validation check. Furthermore, you
+turn these checks off by passing ~force:true as first
+argument.
+
+
+
+add_pinstr pi : Adds the processing instruction
+pi to the list of processing instructions.
+
+
+
+
+
+delete : Deletes this node from the tree. After this
+operation, this node is no longer the child of the former father node; and the
+node loses the connection to the father as well. This operation is illustrated
+by the figure .
+
+
+
+
+set_nodes nl : Sets the list of children to
+nl . It is required that every member of nl
+is a root, and that all members and the current object share the same DTD.
+Unlike add_node , no validation checks are performed.
+
+
+
+
+quick_set_attributes atts : sets the attributes of this
+element to atts . It is not checked
+whether atts matches the DTD or not; it is up to the
+caller of this method to ensure this. (This method may be useful to transform
+the attribute values, i.e. apply a mapping to every attribute.)
+
+
+
+
+set_comment text : This method is only applicable to
+T_comment nodes; it sets the comment text contained by such
+nodes.
+
+
+
+
+
+
+
+
+ Cloning methods
+
+
+
+
+
+
+orphaned_clone : Returns a clone of the node and the complete
+tree below this node (deep clone). The clone does not have a parent (i.e. the
+reference to the parent node is not cloned). While
+copying the subtree, strings are skipped; it is likely that the original tree
+and the copy tree share strings. Extension objects are cloned by invoking
+the clone method on the original objects; how much of
+the extension objects is cloned depends on the implemention of this method.
+
+ This operation is illustrated by the figure
+ .
+
+
+
+
+orphaned_flat_clone : Returns a clone of the node,
+but sets the list of sub nodes to [], i.e. the sub nodes are not cloned.
+
+
+
+
+
+create_element dtd nt al : Returns a flat copy of this node
+(which must be an element) with the following modifications: The DTD is set to
+dtd ; the node type is set to nt , and the
+new attribute list is set to al (given as list of
+(name,value) pairs). The copy does not have children nor a parent. It does not
+contain processing instructions. See
+ the example below.
+
+
+ Note that you can specify the position of the new node
+by the optional argument ~position .
+
+
+
+
+create_data dtd cdata : Returns a flat copy of this node
+(which must be a data node) with the following modifications: The DTD is set to
+dtd ; the node type is set to T_data ; the
+attribute list is empty (data nodes never have attributes); the list of
+children and PIs is empty, too (same reason). The new node does not have a
+parent. The value cdata is the new character content of the
+node. See
+ the example below.
+
+
+
+
+keep_always_whitespace_mode : Even data nodes which are
+normally dropped because they only contain ignorable whitespace, can added to
+this node once this mode is turned on. (This mode is useful to produce
+canonical XML.)
+
+
+
+
+
+
+
+
+
+ Validating methods
+
+
+There is one method which locally validates the node, i.e. checks whether the
+subnodes match the content model of this node.
+
+
+
+
+local_validate : Checks that this node conforms to the
+DTD by comparing the type of the subnodes with the content model for this
+node. (Applications need not call this method unless they add new nodes
+themselves to the tree.)
+
+
+
+
+
+
+
+
+ The class element_impl
+
+This class is an implementation of node which
+realizes element nodes:
+
+
+ [ 'ext ] node
+]]>
+
+
+
+
+ Constructor
+
+You can create a new instance by
+
+
+new element_impl extension_object
+
+
+which creates a special form of empty element which already contains a
+reference to the extension_object , but is
+otherwise empty. This special form is called an
+exemplar . The purpose of exemplars is that they serve as
+patterns that can be duplicated and filled with data. The method
+
+create_element is designed to perform this action.
+
+
+
+
+
+ Example
+
+ First, create an exemplar by
+
+
+let exemplar_ext = ... in
+let exemplar = new element_impl exemplar_ext in
+
+
+The exemplar is not used in node trees, but only as
+a pattern when the element nodes are created:
+
+
+let element = exemplar # create_element dtd (T_element name) attlist
+
+
+The element is a copy of exemplar
+(even the extension exemplar_ext has been copied)
+which ensures that element and its extension are objects
+of the same class as the exemplars; note that you need not to pass a
+class name or other meta information. The copy is initially connected
+with the dtd , it gets a node type, and the attribute list
+is filled. The element is now fully functional; it can
+be added to another element as child, and it can contain references to
+subnodes.
+
+
+
+
+
+
+ The class data_impl
+
+This class is an implementation of node which
+should be used for all character data nodes:
+
+
+ [ 'ext ] node
+]]>
+
+
+
+
+
+ Constructor
+
+You can create a new instance by
+
+
+new data_impl extension_object
+
+
+which creates an empty exemplar node which is connected to
+extension_object . The node does not contain a
+reference to any DTD, and because of this it cannot be added to node trees.
+
+
+
+ To get a fully working data node, apply the method
+create_data
+ to the exemplar (see example).
+
+
+
+
+ Example
+
+ First, create an exemplar by
+
+
+let exemplar_ext = ... in
+let exemplar = new exemplar_ext data_impl in
+
+
+The exemplar is not used in node trees, but only as
+a pattern when the data nodes are created:
+
+
+let data_node = exemplar # create_data dtd "The characters contained in the data node"
+
+
+The data_node is a copy of exemplar .
+The copy is initially connected
+with the dtd , and it is filled with character material.
+The data_node is now fully functional; it can
+be added to an element as child.
+
+
+
+
+
+ The type spec
+
+The type spec defines a way to handle the details of
+creating nodes from exemplars.
+
+
+ ?comment_exemplar : 'ext node ->
+ ?default_pinstr_exemplar : 'ext node ->
+ ?pinstr_mapping : (string, 'ext node) Hashtbl.t ->
+ data_exemplar: 'ext node ->
+ default_element_exemplar: 'ext node ->
+ element_mapping: (string, 'ext node) Hashtbl.t ->
+ unit ->
+ 'ext spec
+
+val make_spec_from_alist :
+ ?super_root_exemplar : 'ext node ->
+ ?comment_exemplar : 'ext node ->
+ ?default_pinstr_exemplar : 'ext node ->
+ ?pinstr_alist : (string * 'ext node) list ->
+ data_exemplar: 'ext node ->
+ default_element_exemplar: 'ext node ->
+ element_alist: (string * 'ext node) list ->
+ unit ->
+ 'ext spec
+]]>
+
+The two functions make_spec_from_mapping and
+make_spec_from_alist create spec
+values. Both functions are functionally equivalent and the only difference is
+that the first function prefers hashtables and the latter associative lists to
+describe mappings from names to exemplars.
+
+
+
+You can specify exemplars for the various kinds of nodes that need to be
+generated when an XML document is parsed:
+
+
+
+ ~super_root_exemplar : This exemplar
+is used to create the super root. This special node is only created if the
+corresponding configuration option has been selected; it is the parent node of
+the root node which may be convenient if every working node must have a parent.
+
+
+ ~comment_exemplar : This exemplar is
+used when a comment node must be created. Note that such nodes are only created
+if the corresponding configuration option is "on".
+
+
+
+ ~default_pinstr_exemplar : If a node
+for a processing instruction must be created, and the instruction is not listed
+in the table passed by ~pinstr_mapping or
+~pinstr_alist , this exemplar is used.
+Again the configuration option must be "on" in order to create such nodes at
+all.
+
+
+
+ ~pinstr_mapping or
+~pinstr_alist : Map the target names of processing
+instructions to exemplars. These mappings are only used when nodes for
+processing instructions are created.
+
+
+ ~data_exemplar : The exemplar for
+ordinary data nodes.
+
+
+ ~default_element_exemplar : This
+exemplar is used if an element node must be created, but the element type
+cannot be found in the tables element_mapping or
+element_alist .
+
+
+ ~element_mapping or
+~element_alist : Map the element types to exemplars. These
+mappings are used to create element nodes.
+
+
+
+In most cases, you only want to create spec values to pass
+them to the parser functions found in Pxp_yacc . However, it
+might be useful to apply spec values directly.
+
+
+The following functions create various types of nodes by selecting the
+corresponding exemplar from the passed spec value, and by
+calling create_element or create_data on
+the exemplar.
+
+
+ dtd ->
+ (* data material: *) string ->
+ 'ext node
+
+val create_element_node :
+ ?position:(string * int * int) ->
+ 'ext spec ->
+ dtd ->
+ (* element type: *) string ->
+ (* attributes: *) (string * string) list ->
+ 'ext node
+
+val create_super_root_node :
+ ?position:(string * int * int) ->
+ 'ext spec ->
+ dtd ->
+ 'ext node
+
+val create_comment_node :
+ ?position:(string * int * int) ->
+ 'ext spec ->
+ dtd ->
+ (* comment text: *) string ->
+ 'ext node
+
+val create_pinstr_node :
+ ?position:(string * int * int) ->
+ 'ext spec ->
+ dtd ->
+ proc_instruction ->
+ 'ext node
+]]>
+
+
+
+
+ Examples
+
+
+ Building trees.
+
+ Here is the piece of code that creates the tree of
+the figure . The extension
+object and the DTD are beyond the scope of this example.
+
+
+let exemplar_ext = ... (* some extension *) in
+let dtd = ... (* some DTD *) in
+
+let element_exemplar = new element_impl exemplar_ext in
+let data_exemplar = new data_impl exemplar_ext in
+
+let a1 = element_exemplar # create_element dtd (T_element "a") ["att", "apple"]
+and b1 = element_exemplar # create_element dtd (T_element "b") []
+and c1 = element_exemplar # create_element dtd (T_element "c") []
+and a2 = element_exemplar # create_element dtd (T_element "a") ["att", "orange"]
+in
+
+let cherries = data_exemplar # create_data dtd "Cherries" in
+let orange = data_exemplar # create_data dtd "An orange" in
+
+a1 # add_node b1;
+a1 # add_node c1;
+b1 # add_node a2;
+b1 # add_node cherries;
+a2 # add_node orange;
+
+
+Alternatively, the last block of statements could also be written as:
+
+
+a1 # set_nodes [b1; c1];
+b1 # set_nodes [a2; cherries];
+a2 # set_nodes [orange];
+
+
+The root of the tree is a1 , i.e. it is true that
+
+
+x # root == a1
+
+
+for every x from { a1 , a2 ,
+b1 , c1 , cherries ,
+orange }.
+
+
+
+Furthermore, the following properties hold:
+
+
+ a1 # attribute "att" = Value "apple"
+& a2 # attribute "att" = Value "orange"
+
+& cherries # data = "Cherries"
+& orange # data = "An orange"
+& a1 # data = "CherriesAn orange"
+
+& a1 # node_type = T_element "a"
+& a2 # node_type = T_element "a"
+& b1 # node_type = T_element "b"
+& c1 # node_type = T_element "c"
+& cherries # node_type = T_data
+& orange # node_type = T_data
+
+& a1 # sub_nodes = [ b1; c1 ]
+& a2 # sub_nodes = [ orange ]
+& b1 # sub_nodes = [ a2; cherries ]
+& c1 # sub_nodes = []
+& cherries # sub_nodes = []
+& orange # sub_nodes = []
+
+& a2 # parent == a1
+& b1 # parent == b1
+& c1 # parent == a1
+& cherries # parent == b1
+& orange # parent == a2
+
+
+
+ Searching nodes.
+
+ The following function searches all nodes of a tree
+for which a certain condition holds:
+
+
+let rec search p t =
+ if p t then
+ t :: search_list p (t # sub_nodes)
+ else
+ search_list p (t # sub_nodes)
+
+and search_list p l =
+ match l with
+ [] -> []
+ | t :: l' -> (search p t) @ (search_list p l')
+;;
+
+
+
+
+ For example, if you want to search all elements of a certain
+type et , the function search can be
+applied as follows:
+
+
+let search_element_type et t =
+ search (fun x -> x # node_type = T_element et) t
+;;
+
+
+
+
+ Getting attribute values.
+
+ Suppose we have the declaration:
+
+]]>
+
+
+In this case, every element e must have an attribute
+a , otherwise the parser would indicate an error. If
+the O'Caml variable n holds the node of the tree
+corresponding to the element, you can get the value of the attribute
+a by
+
+
+let value_of_a = n # required_string_attribute "a"
+
+
+which is more or less an abbreviation for
+
+ s
+ | _ -> assert false]]>
+
+
+- as the attribute is required, the attribute method always
+returns a Value .
+
+
+
+ In contrast to this, the attribute b can be
+omitted. In this case, the method required_string_attribute
+works only if the attribute is there, and the method will fail if the attribute
+is missing. To get the value, you can apply the method
+optional_string_attribute :
+
+
+let value_of_b = n # optional_string_attribute "b"
+
+
+Now, value_of_b is of type string option ,
+and None represents the omitted attribute. Alternatively,
+you could also use attribute :
+
+ Some s
+ | Implied_value -> None
+ | _ -> assert false]]>
+
+
+
+ The attribute c behaves much like
+a , because it has always a value. If the attribute is
+omitted, the default, here "12345", will be returned instead. Because of this,
+you can again use required_string_attribute to get the
+value.
+
+
+ The type CDATA is the most general string
+type. The types NMTOKEN , ID ,
+IDREF , ENTITY , and all enumerators and
+notations are special forms of string types that restrict the possible
+values. From O'Caml, they behave like CDATA , i.e. you can
+use the methods required_string_attribute and
+optional_string_attribute , too.
+
+
+ In contrast to this, the types NMTOKENS ,
+IDREFS , and ENTITIES mean lists of
+strings. Suppose we have the declaration:
+
+]]>
+
+
+The type NMTOKENS stands for lists of space-separated
+tokens; for example the value "1 abc 23ef" means the list
+["1"; "abc"; "23ef"] . (Again, IDREFS
+and ENTITIES have more restricted values.) To get the
+value of attribute d , one can use
+
+
+let value_of_d = n # required_list_attribute "d"
+
+
+or
+
+ l
+ | _ -> assert false]]>
+
+
+As d is required, the attribute cannot be omitted, and
+the attribute method returns always a
+Valuelist .
+
+
+ For optional attributes like e , apply
+
+
+let value_of_e = n # optional_list_attribute "e"
+
+
+or
+
+ l
+ | Implied_value -> []
+ | _ -> assert false]]>
+
+
+Here, the case that the attribute is missing counts like the empty list.
+
+
+
+
+
+
+ Iterators
+
+ There are also several iterators in Pxp_document; please see
+the mli file for details. You can find examples for them in the
+"simple_transformation" directory.
+
+
+ f:('ext node -> bool) -> 'ext node -> 'ext node
+
+val find_all : ?deeply:bool ->
+ f:('ext node -> bool) -> 'ext node -> 'ext node list
+
+val find_element : ?deeply:bool ->
+ string -> 'ext node -> 'ext node
+
+val find_all_elements : ?deeply:bool ->
+ string -> 'ext node -> 'ext node list
+
+exception Skip
+val map_tree : pre:('exta node -> 'extb node) ->
+ ?post:('extb node -> 'extb node) ->
+ 'exta node ->
+ 'extb node
+
+
+val map_tree_sibl :
+ pre: ('exta node option -> 'exta node -> 'exta node option ->
+ 'extb node) ->
+ ?post:('extb node option -> 'extb node -> 'extb node option ->
+ 'extb node) ->
+ 'exta node ->
+ 'extb node
+
+val iter_tree : ?pre:('ext node -> unit) ->
+ ?post:('ext node -> unit) ->
+ 'ext node ->
+ unit
+
+val iter_tree_sibl :
+ ?pre: ('ext node option -> 'ext node -> 'ext node option -> unit) ->
+ ?post:('ext node option -> 'ext node -> 'ext node option -> unit) ->
+ 'ext node ->
+ unit
+]]>
+
+
+
+
+
+
+
+
+ The class type extension
+
+
+
+ unit
+ (* "set_node" is invoked once the extension is associated to a new
+ * node object.
+ *)
+ end
+]]>
+
+
+This is the type of classes used for node extensions. For every node of the
+document tree, there is not only the node object, but also
+an extension object. The latter has minimal
+functionality; it has only the necessary methods to be attached to the node
+object containing the details of the node instance. The extension object is
+called extension because its purpose is extensibility.
+
+ For some reasons, it is impossible to derive the
+node classes (i.e. element_impl and
+data_impl ) such that the subclasses can be extended by new
+new methods. But
+subclassing nodes is a great feature, because it allows the user to provide
+different classes for different types of nodes. The extension objects are a
+workaround that is as powerful as direct subclassing, the costs are
+some notation overhead.
+
+
+
+The structure of nodes and extensions
+
+
+
+
+ The picture shows how the nodes and extensions are linked
+together. Every node has a reference to its extension, and every extension has
+a reference to its node. The methods extension and
+node follow these references; a typical phrase is
+
+
+self # node # attribute "xy"
+
+
+to get the value of an attribute from a method defined in the extension object;
+or
+
+
+self # node # iter
+ (fun n -> n # extension # my_method ...)
+
+
+to iterate over the subnodes and to call my_method of the
+corresponding extension objects.
+
+
+ Note that extension objects do not have references to subnodes
+(or "subextensions") themselves; in order to get one of the children of an
+extension you must first go to the node object, then get the child node, and
+finally reach the extension that is logically the child of the extension you
+started with.
+
+
+ How to define an extension class
+
+ At minimum, you must define the methods
+clone , node , and
+set_node such that your class is compatible with the type
+extension . The method set_node is called
+during the initialization of the node, or after a node has been cloned; the
+node object invokes set_node on the extension object to tell
+it that this node is now the object the extension is linked to. The extension
+must return the node object passed as argument of set_node
+when the node method is called.
+
+ The clone method must return a copy of the
+extension object; at least the object itself must be duplicated, but if
+required, the copy should deeply duplicate all objects and values that are
+referred by the extension, too. Whether this is required, depends on the
+application; clone is invoked by the node object when one of
+its cloning methods is called.
+
+ A good starting point for an extension class:
+
+
+}
+
+ method node =
+ match node with
+ None ->
+ assert false
+ | Some n -> n
+
+ method set_node n =
+ node <- Some n
+
+ end
+]]>
+
+
+This class is compatible with extension . The purpose of
+defining such a class is, of course, adding further methods; and you can do it
+without restriction.
+
+
+ Often, you want not only one extension class. In this case,
+it is the simplest way that all your classes (for one kind of document) have
+the same type (with respect to the interface; i.e. it does not matter if your
+classes differ in the defined private methods and instance variables, but
+public methods count). This approach avoids lots of coercions and problems with
+type incompatibilities. It is simple to implement:
+
+
+
+
+
+If a class does not need a method (e.g. because it does not make sense, or it
+would violate some important condition), it is possible to define the method
+and to always raise an exception when the method is invoked
+(e.g. assert false ).
+
+
+ The latter is a strong recommendation: do not try to further
+specialize the types of extension objects. It is difficult, sometimes even
+impossible, and almost never worth-while.
+
+
+
+ How to bind extension classes to element types
+
+ Once you have defined your extension classes, you can bind them
+to element types. The simplest case is that you have only one class and that
+this class is to be always used. The parsing functions in the module
+Pxp_yacc take a spec argument which
+can be customized. If your single class has the name c ,
+this argument should be
+
+
+let spec =
+ make_spec_from_alist
+ ~data_exemplar: (new data_impl c)
+ ~default_element_exemplar: (new element_impl c)
+ ~element_alist: []
+ ()
+
+
+This means that data nodes will be created from the exemplar passed by
+~data_exemplar and that all element nodes will be made from the exemplar
+specified by ~default_element_exemplar. In ~element_alist, you can
+pass that different exemplars are to be used for different element types; but
+this is an optional feature. If you do not need it, pass the empty list.
+
+
+
+Remember that an exemplar is a (node, extension) pair that serves as pattern
+when new nodes (and the corresponding extension objects) are added to the
+document tree. In this case, the exemplar contains c as
+extension, and when nodes are created, the exemplar is cloned, and cloning
+makes also a copy of c such that all nodes of the document
+tree will have a copy of c as extension.
+
+
+ The ~element_alist argument can bind
+specific element types to specific exemplars; as exemplars may be instances of
+different classes it is effectively possible to bind element types to
+classes. For example, if the element type "p" is implemented by class "c_p",
+and "q" is realized by "c_q", you can pass the following value:
+
+
+let spec =
+ make_spec_from_alist
+ ~data_exemplar: (new data_impl c)
+ ~default_element_exemplar: (new element_impl c)
+ ~element_alist:
+ [ "p", new element_impl c_p;
+ "q", new element_impl c_q;
+ ]
+ ()
+
+
+The extension object c is still used for all data nodes and
+for all other element types.
+
+
+
+
+
+
+
+
+
+ Details of the mapping from XML text to the tree representation
+
+
+
+ The representation of character-free elements
+
+ If an element declaration does not allow the element to
+contain character data, the following rules apply.
+
+ If the element must be empty, i.e. it is declared with the
+keyword EMPTY , the element instance must be effectively
+empty (it must not even contain whitespace characters). The parser guarantees
+that a declared EMPTY element does never contain a data
+node, even if the data node represents the empty string.
+
+ If the element declaration only permits other elements to occur
+within that element but not character data, it is still possible to insert
+whitespace characters between the subelements. The parser ignores these
+characters, too, and does not create data nodes for them.
+
+
+ Example.
+
+ Consider the following element types:
+
+
+
+
+]]>
+
+Only x may contain character data, the keyword
+#PCDATA indicates this. The other types are character-free.
+
+
+
+ The XML term
+
+
+]]>
+
+will be internally represented by an element node for x
+with three subnodes: the first z element, a data node
+containing the space character, and the second z element.
+In contrast to this, the term
+
+
+]]>
+
+is represented by an element node for y with only
+two subnodes, the two z elements. There
+is no data node for the space character because spaces are ignored in the
+character-free element y .
+
+
+
+
+
+ The representation of character data
+
+ The XML specification allows all Unicode characters in XML
+texts. This parser can be configured such that UTF-8 is used to represent the
+characters internally; however, the default character encoding is
+ISO-8859-1. (Currently, no other encodings are possible for the internal string
+representation; the type Pxp_types.rep_encoding enumerates
+the possible encodings. Principially, the parser could use any encoding that is
+ASCII-compatible, but there are currently only lexical analyzers for UTF-8 and
+ISO-8859-1. It is currently impossible to use UTF-16 or UCS-4 as internal
+encodings (or other multibyte encodings which are not ASCII-compatible) unless
+major parts of the parser are rewritten - unlikely...)
+
+
+
+The internal encoding may be different from the external encoding (specified
+in the XML declaration <?xml ... encoding="..."?> ); in
+this case the strings are automatically converted to the internal encoding.
+
+
+
+If the internal encoding is ISO-8859-1, it is possible that there are
+characters that cannot be represented. In this case, the parser ignores such
+characters and prints a warning (to the collect_warning
+object that must be passed when the parser is called).
+
+
+ The XML specification allows lines to be separated by single LF
+characters, by CR LF character sequences, or by single CR
+characters. Internally, these separators are always converted to single LF
+characters.
+
+ The parser guarantees that there are never two adjacent data
+nodes; if necessary, data material that would otherwise be represented by
+several nodes is collapsed into one node. Note that you can still create node
+trees with adjacent data nodes; however, the parser does not return such trees.
+
+
+ Note that CDATA sections are not represented specially; such
+sections are added to the current data material that being collected for the
+next data node.
+
+
+
+
+ The representation of entities within documents
+
+ Entities are not represented within
+documents! If the parser finds an entity reference in the document
+content, the reference is immediately expanded, and the parser reads the
+expansion text instead of the reference.
+
+
+
+
+ The representation of attributes As attribute
+values are composed of Unicode characters, too, the same problems with the
+character encoding arise as for character material. Attribute values are
+converted to the internal encoding, too; and if there are characters that
+cannot be represented, these are dropped, and a warning is printed.
+
+ Attribute values are normalized before they are returned by
+methods like attribute . First, any remaining entity
+references are expanded; if necessary, expansion is performed recursively.
+Second, newline characters (any of LF, CR LF, or CR characters) are converted
+to single space characters. Note that especially the latter action is
+prescribed by the XML standard (but
is not converted
+such that it is still possible to include line feeds into attributes).
+
+
+
+
+ The representation of processing instructions
+Processing instructions are parsed to some extent: The first word of the
+PI is called the target, and it is stored separated from the rest of the PI:
+
+
+]]>
+
+The exact location where a PI occurs is not represented (by default). The
+parser puts the PI into the object that represents the embracing construct (an
+element, a DTD, or the whole document); that means you can find out which PIs
+occur in a certain element, in the DTD, or in the whole document, but you
+cannot lookup the exact position within the construct.
+
+
+ If you require the exact location of PIs, it is possible to
+create extra nodes for them. This mode is controled by the option
+enable_pinstr_nodes . The additional nodes have the node type
+T_pinstr target , and are created
+from special exemplars contained in the spec (see
+pxp_document.mli).
+
+
+
+ The representation of comments
+
+Normally, comments are not represented; they are dropped by
+default. However, if you require them, it is possible to create
+T_comment nodes for them. This mode can be specified by the
+option enable_comment_nodes . Comment nodes are created from
+special exemplars contained in the spec (see
+pxp_document.mli). You can access the contents of comments through the
+method comment .
+
+
+
+ The attributes xml:lang and
+xml:space
+
+ These attributes are not supported specially; they are handled
+like any other attribute.
+
+
+
+
+ And what about namespaces?
+ Currently, there is no special support for namespaces.
+However, the parser allows it that the colon occurs in names such that it is
+possible to implement namespaces on top of the current API.
+
+ Some future release of PXP will support namespaces as built-in
+feature...
+
+
+
+
+
+
+
+
+
+ Configuring and calling the parser
+
+
+
+
+
+
+ Overview
+
+There are the following main functions invoking the parser (in Pxp_yacc):
+
+
+
+ parse_document_entity: You want to
+parse a complete and closed document consisting of a DTD and the document body;
+the body is validated against the DTD. This mode is interesting if you have a
+file
+
+ ...
+]]>
+
+and you can accept any DTD that is included in the file (e.g. because the file
+is under your control).
+
+
+
+ parse_wfdocument_entity: You want to
+parse a complete and closed document consisting of a DTD and the document body;
+but the body is not validated, only checked for well-formedness. This mode is
+preferred if validation costs too much time or if the DTD is missing.
+
+
+
+ parse_dtd_entity: You want only to
+parse an entity (file) containing the external subset of a DTD. Sometimes it is
+interesting to read such a DTD, for example to compare it with the DTD included
+in a document, or to apply the next mode:
+
+
+
+ parse_content_entity: You want only to
+parse an entity (file) containing a fragment of a document body; this fragment
+is validated against the DTD you pass to the function. Especially, the fragment
+must not have a <!DOCTYPE> clause, and must directly
+begin with an element. The element is validated against the DTD. This mode is
+interesting if you want to check documents against a fixed, immutable DTD.
+
+
+
+ parse_wfcontent_entity: This function
+also parses a single element without DTD, but does not validate it.
+
+
+ extract_dtd_from_document_entity: This
+function extracts the DTD from a closed document consisting of a DTD and a
+document body. Both the internal and the external subsets are extracted.
+
+
+
+
+
+In many cases, parse_document_entity is the preferred mode
+to parse a document in a validating way, and
+parse_wfdocument_entity is the mode of choice to parse a
+file while only checking for well-formedness.
+
+
+
+There are a number of variations of these modes. One important application of a
+parser is to check documents of an untrusted source against a fixed DTD. One
+solution is to not allow the <!DOCTYPE> clause in
+these documents, and treat the document like a fragment (using mode
+parse_content_entity ). This is very simple, but
+inflexible; users of such a system cannot even define additional entities to
+abbreviate frequent phrases of their text.
+
+
+
+It may be necessary to have a more intelligent checker. For example, it is also
+possible to parse the document to check fully, i.e. with DTD, and to compare
+this DTD with the prescribed one. In order to fully parse the document, mode
+parse_document_entity is applied, and to get the DTD to
+compare with mode parse_dtd_entity can be used.
+
+
+
+There is another very important configurable aspect of the parser: the
+so-called resolver. The task of the resolver is to locate the contents of an
+(external) entity for a given entity name, and to make the contents accessible
+as a character stream. (Furthermore, it also normalizes the character set;
+but this is a detail we can ignore here.) Consider you have a file called
+"main.xml" containing
+
+
+%sub;
+]]>
+
+and a file stored in the subdirectory "sub" with name
+"sub.xml" containing
+
+
+%subsub;
+]]>
+
+and a file stored in the subdirectory "subsub" of
+"sub" with name "subsub.xml" (the
+contents of this file do not matter). Here, the resolver must track that
+the second entity subsub is located in the directory
+"sub/subsub" , i.e. the difficulty is to interpret the
+system (file) names of entities relative to the entities containing them,
+even if the entities are deeply nested.
+
+
+
+There is not a fixed resolver already doing everything right - resolving entity
+names is a task that highly depends on the environment. The XML specification
+only demands that SYSTEM entities are interpreted like URLs
+(which is not very precise, as there are lots of URL schemes in use), hoping
+that this helps overcoming the local peculiarities of the environment; the idea
+is that if you do not know your environment you can refer to other entities by
+denoting URLs for them. I think that this interpretation of
+SYSTEM names may have some applications in the internet, but
+it is not the first choice in general. Because of this, the resolver is a
+separate module of the parser that can be exchanged by another one if
+necessary; more precisely, the parser already defines several resolvers.
+
+
+
+The following resolvers do already exist:
+
+
+
+ Resolvers reading from arbitrary input channels. These
+can be configured such that a certain ID is associated with the channel; in
+this case inner references to external entities can be resolved. There is also
+a special resolver that interprets SYSTEM IDs as URLs; this resolver can
+process relative SYSTEM names and determine the corresponding absolute URL.
+
+
+
+ A resolver that reads always from a given O'Caml
+string. This resolver is not able to resolve further names unless the string is
+not associated with any name, i.e. if the document contained in the string
+refers to an external entity, this reference cannot be followed in this
+case.
+
+
+ A resolver for file names. The SYSTEM
+name is interpreted as file URL with the slash "/" as separator for
+directories. - This resolver is derived from the generic URL resolver.
+
+
+
+The interface a resolver must have is documented, so it is possible to write
+your own resolver. For example, you could connect the parser with an HTTP
+client, and resolve URLs of the HTTP namespace. The resolver classes support
+that several independent resolvers are combined to one more powerful resolver;
+thus it is possible to combine a self-written resolver with the already
+existing resolvers.
+
+
+
+Note that the existing resolvers only interpret SYSTEM
+names, not PUBLIC names. If it helps you, it is possible to
+define resolvers for PUBLIC names, too; for example, such a
+resolver could look up the public name in a hash table, and map it to a system
+name which is passed over to the existing resolver for system names. It is
+relatively simple to provide such a resolver.
+
+
+
+
+
+
+ Resolvers and sources
+
+
+ Using the built-in resolvers (called sources)
+
+ The type source enumerates the two
+possibilities where the document to parse comes from.
+
+
+type source =
+ Entity of ((dtd -> Pxp_entity.entity) * Pxp_reader.resolver)
+ | ExtID of (ext_id * Pxp_reader.resolver)
+
+
+You normally need not to worry about this type as there are convenience
+functions that create source values:
+
+
+
+
+ from_file s : The document is read from
+file s ; you may specify absolute or relative path names.
+The file name must be encoded as UTF-8 string.
+
+
+There is an optional argument ~system_encoding
+specifying the character encoding which is used for the names of the file
+system. For example, if this encoding is ISO-8859-1 and s is
+also a ISO-8859-1 string, you can form the source:
+
+
+
+
+
+This source has the advantage that
+it is able to resolve inner external entities; i.e. if your document includes
+data from another file (using the SYSTEM attribute), this
+mode will find that file. However, this mode cannot resolve
+PUBLIC identifiers nor SYSTEM identifiers
+other than "file:".
+
+
+
+ from_channel ch : The document is read
+from the channel ch . In general, this source also supports
+file URLs found in the document; however, by default only absolute URLs are
+understood. It is possible to associate an ID with the channel such that the
+resolver knows how to interpret relative URLs:
+
+
+from_channel ~id:(System "file:///dir/dir1/") ch
+
+
+There is also the ~system_encoding argument specifying how file names are
+encoded. - The example from above can also be written (but it is no
+longer possible to interpret relative URLs because there is no ~id argument,
+and computing this argument is relatively complicated because it must
+be a valid URL):
+
+
+let ch = open_in s in
+let src = from_channel ~system_encoding:`Enc_iso88591 ch in
+...;
+close_in ch
+
+
+
+
+ from_string s : The string
+s is the document to parse. This mode is not able to
+interpret file names of SYSTEM clauses, nor it can look up
+PUBLIC identifiers.
+
+ Normally, the encoding of the string is detected as usual
+by analyzing the XML declaration, if any. However, it is also possible to
+specify the encoding directly:
+
+
+let src = from_string ~fixenc:`ISO-8859-2 s
+
+
+
+
+ ExtID (id, r) : The document to parse
+is denoted by the identifier id (either a
+SYSTEM or PUBLIC clause), and this
+identifier is interpreted by the resolver r . Use this mode
+if you have written your own resolver.
+ Which character sets are possible depends on the passed
+resolver r .
+
+
+ Entity (get_entity, r) : The document
+to parse is returned by the function invocation get_entity
+dtd , where dtd is the DTD object to use (it may be
+empty). Inner external references occuring in this entity are resolved using
+the resolver r .
+ Which character sets are possible depends on the passed
+resolver r .
+
+
+
+
+
+
+ The resolver API
+
+ A resolver is an object that can be opened like a file, but you
+do not pass the file name to the resolver, but the XML identifier of the entity
+to read from (either a SYSTEM or PUBLIC
+clause). When opened, the resolver must return the
+Lexing.lexbuf that reads the characters. The resolver can
+be closed, and it can be cloned. Furthermore, it is possible to tell the
+resolver which character set it should assume. - The following from Pxp_reader:
+
+ unit
+ method init_warner : collect_warnings -> unit
+ method rep_encoding : rep_encoding
+ method open_in : ext_id -> Lexing.lexbuf
+ method close_in : unit
+ method change_encoding : string -> unit
+ method clone : resolver
+ method close_all : unit
+ end
+]]>
+
+The resolver object must work as follows:
+
+
+
+
+ When the parser is called, it tells the resolver the
+warner object and the internal encoding by invoking
+init_warner and init_rep_encoding . The
+resolver should store these values. The method rep_encoding
+should return the internal encoding.
+
+
+
+ If the parser wants to read from the resolver, it invokes
+the method open_in . Either the resolver succeeds, in which
+case the Lexing.lexbuf reading from the file or stream must
+be returned, or opening fails. In the latter case the method implementation
+should raise an exception (see below).
+
+
+ If the parser finishes reading, it calls the
+close_in method.
+
+
+ If the parser finds a reference to another external
+entity in the input stream, it calls clone to get a second
+resolver which must be initially closed (not yet connected with an input
+stream). The parser then invokes open_in and the other
+methods as described.
+
+
+ If you already know the character set of the input
+stream, you should recode it to the internal encoding, and define the method
+change_encoding as an empty method.
+
+
+ If you want to support multiple external character sets,
+the object must follow a much more complicated protocol. Directly after
+open_in has been called, the resolver must return a lexical
+buffer that only reads one byte at a time. This is only possible if you create
+the lexical buffer with Lexing.from_function ; the function
+must then always return 1 if the EOF is not yet reached, and 0 if EOF is
+reached. If the parser has read the first line of the document, it will invoke
+change_encoding to tell the resolver which character set to
+assume. From this moment, the object can return more than one byte at once. The
+argument of change_encoding is either the parameter of the
+"encoding" attribute of the XML declaration, or the empty string if there is
+not any XML declaration or if the declaration does not contain an encoding
+attribute.
+
+ At the beginning the resolver must only return one
+character every time something is read from the lexical buffer. The reason for
+this is that you otherwise would not exactly know at which position in the
+input stream the character set changes.
+
+ If you want automatic recognition of the character set,
+it is up to the resolver object to implement this.
+
+
+ If an error occurs, the parser calls the method
+close_all for the top-level resolver; this method should
+close itself (if not already done) and all clones.
+
+
+
+ Exceptions
+
+It is possible to chain resolvers such that when the first resolver is not able
+to open the entity, the other resolvers of the chain are tried in turn. The
+method open_in should raise the exception
+Not_competent to indicate that the next resolver should try
+to open the entity. If the resolver is able to handle the ID, but some other
+error occurs, the exception Not_resolvable should be raised
+to force that the chain breaks.
+
+
+
+ Example: How to define a resolver that is equivalent to
+from_string: ...
+
+
+
+
+ Predefined resolver components
+
+There are some classes in Pxp_reader that define common resolver behaviour.
+
+
+ ?fixenc:encoding ->
+ ?auto_close:bool ->
+ in_channel ->
+ resolver
+]]>
+
+Reads from the passed channel (it may be even a pipe). If the
+~id argument is passed to the object, the created resolver
+accepts only this ID. Otherwise all IDs are accepted. - Once the resolver has
+been cloned, it does not accept any ID. This means that this resolver cannot
+handle inner references to external entities. Note that you can combine this
+resolver with another resolver that can handle inner references (such as
+resolve_as_file); see class 'combine' below. - If you pass the
+~fixenc argument, the encoding of the channel is set to the
+passed value, regardless of any auto-recognition or any XML declaration. - If
+~auto_close = true (which is the default), the channel is
+closed after use. If ~auto_close = false , the channel is
+left open.
+
+
+
+
+ channel_of_id:(ext_id -> (in_channel * encoding option)) ->
+ resolver
+]]>
+
+This resolver calls the function ~channel_of_id to open a
+new channel for the passed ext_id . This function must either
+return the channel and the encoding, or it must fail with Not_competent. The
+function must return None as encoding if the default
+mechanism to recognize the encoding should be used. It must return
+Some e if it is already known that the encoding of the
+channel is e . If ~auto_close = true
+(which is the default), the channel is closed after use. If
+~auto_close = false , the channel is left open.
+
+
+
+
+ ?auto_close:bool ->
+ url_of_id:(ext_id -> Neturl.url) ->
+ channel_of_url:(Neturl.url -> (in_channel * encoding option)) ->
+ resolver
+]]>
+
+When this resolver gets an ID to read from, it calls the function
+~url_of_id to get the corresponding URL. This URL may be a
+relative URL; however, a URL scheme must be used which contains a path. The
+resolver converts the URL to an absolute URL if necessary. The second
+function, ~channel_of_url , is fed with the absolute URL as
+input. This function opens the resource to read from, and returns the channel
+and the encoding of the resource.
+
+
+Both functions, ~url_of_id and
+~channel_of_url , can raise Not_competent to indicate that
+the object is not able to read from the specified resource. However, there is a
+difference: A Not_competent from ~url_of_id is left as it
+is, but a Not_competent from ~channel_of_url is converted to
+Not_resolvable. So only ~url_of_id decides which URLs are
+accepted by the resolver and which not.
+
+
+The function ~channel_of_url must return
+None as encoding if the default mechanism to recognize the
+encoding should be used. It must return Some e if it is
+already known that the encoding of the channel is e .
+
+
+If ~auto_close = true (which is the default), the channel is
+closed after use. If ~auto_close = false , the channel is
+left open.
+
+
+Objects of this class contain a base URL relative to which relative URLs are
+interpreted. When creating a new object, you can specify the base URL by
+passing it as ~base_url argument. When an existing object is
+cloned, the base URL of the clone is the URL of the original object. - Note
+that the term "base URL" has a strict definition in RFC 1808.
+
+
+
+
+ ?fixenc:encoding ->
+ string ->
+ resolver
+]]>
+
+Reads from the passed string. If the ~id argument is passed
+to the object, the created resolver accepts only this ID. Otherwise all IDs are
+accepted. - Once the resolver has been cloned, it does not accept any ID. This
+means that this resolver cannot handle inner references to external
+entities. Note that you can combine this resolver with another resolver that
+can handle inner references (such as resolve_as_file); see class 'combine'
+below. - If you pass the ~fixenc argument, the encoding of
+the string is set to the passed value, regardless of any auto-recognition or
+any XML declaration.
+
+
+
+ (string * encoding option)) ->
+ resolver
+]]>
+
+This resolver calls the function ~string_of_id to get the
+string for the passed ext_id . This function must either
+return the string and the encoding, or it must fail with Not_competent. The
+function must return None as encoding if the default
+mechanism to recognize the encoding should be used. It must return
+Some e if it is already known that the encoding of the
+string is e .
+
+
+
+
+ ?host_prefix:[ `Not_recognized | `Allowed | `Required ] ->
+ ?system_encoding:encoding ->
+ ?url_of_id:(ext_id -> Neturl.url) ->
+ ?channel_of_url: (Neturl.url -> (in_channel * encoding option)) ->
+ unit ->
+ resolver
+]]>
+Reads from the local file system. Every file name is interpreted as
+file name of the local file system, and the referred file is read.
+
+
+The full form of a file URL is: file://host/path, where
+'host' specifies the host system where the file identified 'path'
+resides. host = "" or host = "localhost" are accepted; other values
+will raise Not_competent. The standard for file URLs is
+defined in RFC 1738.
+
+
+Option ~file_prefix : Specifies how the "file:" prefix of
+file names is handled:
+
+
+ `Not_recognized: The prefix is not
+recognized.
+
+
+ `Allowed: The prefix is allowed but
+not required (the default).
+
+
+ `Required: The prefix is
+required.
+
+
+
+
+Option ~host_prefix: Specifies how the "//host" phrase of
+file names is handled:
+
+
+ `Not_recognized: The prefix is not
+recognized.
+
+
+ `Allowed: The prefix is allowed but
+not required (the default).
+
+
+ `Required: The prefix is
+required.
+
+
+
+
+Option ~system_encoding: Specifies the encoding of file
+names of the local file system. Default: UTF-8.
+
+
+Options ~url_of_id , ~channel_of_url : Not
+for the casual user!
+
+
+
+
+ resolver list ->
+ resolver
+]]>
+
+Combines several resolver objects. If a concrete entity with an
+ext_id is to be opened, the combined resolver tries the
+contained resolvers in turn until a resolver accepts opening the entity
+(i.e. it does not raise Not_competent on open_in).
+
+
+Clones: If the 'clone' method is invoked before 'open_in', all contained
+resolvers are cloned separately and again combined. If the 'clone' method is
+invoked after 'open_in' (i.e. while the resolver is open), additionally the
+clone of the active resolver is flagged as being preferred, i.e. it is tried
+first.
+
+
+
+
+
+
+ The DTD classes Sorry, not yet
+written. Perhaps the interface definition of Pxp_dtd expresses the same:
+
+
+&markup-dtd1.mli;&markup-dtd2.mli;
+
+
+
+
+ Invoking the parser
+
+ Here a description of Pxp_yacc.
+
+
+ Defaults
+ The following defaults are available:
+
+
+val default_config : config
+val default_extension : ('a node extension) as 'a
+val default_spec : ('a node extension as 'a) spec
+
+
+
+
+
+ Parsing functions
+ In the following, the term "closed document" refers to
+an XML structure like
+
+
+<!DOCTYPE ... [ declarations ] >
+<root >
+...
+</root >
+
+
+The term "fragment" refers to an XML structure like
+
+
+<root >
+...
+</root >
+
+
+i.e. only to one isolated element instance.
+
+
+
+ source -> dtd
+]]>
+
+Parses the declarations which are contained in the entity, and returns them as
+dtd object.
+
+
+
+ source -> dtd
+]]>
+
+Extracts the DTD from a closed document. Both the internal and the external
+subsets are extracted and combined to one dtd object. This
+function does not parse the whole document, but only the parts that are
+necessary to extract the DTD.
+
+
+
+ dtd) ->
+ ?id_index:('ext index) ->
+ config ->
+ source ->
+ 'ext spec ->
+ 'ext document
+]]>
+
+Parses a closed document and validates it against the DTD that is contained in
+the document (internal and external subsets). The option
+~transform_dtd can be used to transform the DTD in the
+document, and to use the transformed DTD for validation. If
+~id_index is specified, an index of all ID attributes is
+created.
+
+
+
+
+ source ->
+ 'ext spec ->
+ 'ext document
+]]>
+
+Parses a closed document, but checks it only on well-formedness.
+
+
+
+
+ config ->
+ source ->
+ dtd ->
+ 'ext spec ->
+ 'ext node
+]]>
+
+Parses a fragment, and validates the element.
+
+
+
+
+ source ->
+ 'ext spec ->
+ 'ext node
+]]>
+
+Parses a fragment, but checks it only on well-formedness.
+
+
+
+
+ Configuration options
+
+
+
+
+
+ warner: The parser prints
+warnings by invoking the method warn for this warner
+object. (Default: all warnings are dropped)
+
+ errors_with_line_numbers: If
+true, errors contain line numbers; if false, errors contain only byte
+positions. The latter mode is faster. (Default: true)
+
+ enable_pinstr_nodes: If true,
+the parser creates extra nodes for processing instructions. If false,
+processing instructions are simply added to the element or document surrounding
+the instructions. (Default: false)
+
+ enable_super_root_node: If
+true, the parser creates an extra node which is the parent of the root of the
+document tree. This node is called super root; it is an element with type
+T_super_root . - If there are processing instructions outside
+the root element and outside the DTD, they are added to the super root instead
+of the document. - If false, the super root node is not created. (Default:
+false)
+
+ enable_comment_nodes: If true,
+the parser creates nodes for comments with type T_comment ;
+if false, such nodes are not created. (Default: false)
+
+ encoding: Specifies the
+internal encoding of the parser. Most strings are then represented according to
+this encoding; however there are some exceptions (especially
+ext_id values which are always UTF-8 encoded).
+(Default: `Enc_iso88591)
+
+
+recognize_standalone_declaration: If true and if the parser is
+validating, the standalone="yes" declaration forces that it
+is checked whether the document is a standalone document. - If false, or if the
+parser is in well-formedness mode, such declarations are ignored.
+(Default: true)
+
+
+ store_element_positions: If
+true, for every non-data node the source position is stored. If false, the
+position information is lost. If available, you can get the positions of nodes
+by invoking the position method.
+(Default: true)
+
+ idref_pass: If true and if
+there is an ID index, the parser checks whether every IDREF or IDREFS attribute
+refer to an existing node; this requires that the parser traverses the whole
+doument tree. If false, this check is left out. (Default: false)
+
+ validate_by_dfa: If true and if
+the content model for an element type is deterministic, a deterministic finite
+automaton is used to validate whether the element contents match the content
+model of the type. If false, or if a DFA is not available, a backtracking
+algorithm is used for validation. (Default: true)
+
+
+
+accept_only_deterministic_models: If true, only deterministic content
+models are accepted; if false, any syntactically correct content models can be
+processed. (Default: true)
+
+
+
+
+
+ Which configuration should I use?
+ First, I recommend to vary the default configuration instead of
+creating a new configuration record. For instance, to set
+idref_pass to true , change the default
+as in:
+
+let config = { default_config with idref_pass = true }
+
+The background is that I can add more options to the record in future versions
+of the parser without breaking your programs.
+
+
+ Do I need extra nodes for processing instructions?
+By default, such nodes are not created. This does not mean that the
+processing instructions are lost; however, you cannot find out the exact
+location where they occur. For example, the following XML text
+
+
+]]>
+
+will normally create one element node for x containing
+one subnode for y . The processing
+instructions are attached to x in a separate hash table; you
+can access them using x # pinstr "pi1" and x #
+pinstr "pi2" , respectively. The information is lost where the
+instructions occur within x .
+
+
+
+ If the option enable_pinstr_nodes is
+turned on, the parser creates extra nodes pi1 and
+pi2 such that the subnodes of x are now:
+
+
+
+The extra nodes contain the processing instructions in the usual way, i.e. you
+can access them using pi1 # pinstr "pi1" and pi2 #
+pinstr "pi2" , respectively.
+
+
+ Note that you will need an exemplar for the PI nodes (see
+make_spec_from_alist ).
+
+
+ Do I need a super root node?
+ By default, there is no super root node. The
+document object refers directly to the node representing the
+root element of the document, i.e.
+
+
+
+if r is the root node. This is sometimes inconvenient: (1)
+Some algorithms become simpler if every node has a parent, even the root
+node. (2) Some standards such as XPath call the "root node" the node whose
+child represents the root of the document. (3) The super root node can serve
+as a container for processing instructions outside the root element. Because of
+these reasons, it is possible to create an extra super root node, whose child
+is the root node:
+
+
+
+When extra nodes are also created for processing instructions, these nodes can
+be added to the super root node if they occur outside the root element (reason
+(3)), and the order reflects the order in the source text.
+
+
+ Note that you will need an exemplar for the super root node
+(see make_spec_from_alist ).
+
+
+ What is the effect of the UTF-8 encoding?
+ By default, the parser represents strings (with few
+exceptions) as ISO-8859-1 strings. These are well-known, and there are tools
+and fonts for this encoding.
+
+ However, internationalization may require that you switch over
+to UTF-8 encoding. In most environments, the immediate effect will be that you
+cannot read strings with character codes >= 160 any longer; your terminal will
+only show funny glyph combinations. It is strongly recommended to install
+Unicode fonts (GNU Unifont ,
+
+Markus Kuhn's fonts ) and terminal emulators
+that can handle UTF-8 byte sequences . Furthermore, a Unicode editor may
+be helpful (such as Yudit ). There are
+also FAQ by
+Markus Kuhn.
+
+ By setting encoding to
+`Enc_utf8 all strings originating from the parsed XML
+document are represented as UTF-8 strings. This includes not only character
+data and attribute values but also element names, attribute names and so on, as
+it is possible to use any Unicode letter to form such names. Strictly
+speaking, PXP is only XML-compliant if the UTF-8 mode is used; otherwise it
+will have difficulties when validating documents containing
+non-ISO-8859-1-names.
+
+
+ This mode does not have any impact on the external
+representation of documents. The character set assumed when reading a document
+is set in the XML declaration, and character set when writing a document must
+be passed to the write method.
+
+
+
+ How do I check that nodes exist which are referred by IDREF attributes?
+ First, you must create an index of all occurring ID
+attributes:
+
+
+
+This index must be passed to the parsing function:
+
+ index)
+ config source spec
+]]>
+
+Next, you must turn on the idref_pass mode:
+
+
+
+Note that now the whole document tree will be traversed, and every node will be
+checked for IDREF and IDREFS attributes. If the tree is big, this may take some
+time.
+
+
+
+
+ What are deterministic content models?
+ These type of models can speed up the validation checks;
+furthermore they ensure SGML-compatibility. In particular, a content model is
+deterministic if the parser can determine the actually used alternative by
+inspecting only the current token. For example, this element has
+non-deterministic contents:
+
+
+]]>
+
+If the first element in x is u , the
+parser does not know which of the alternatives (u,v) or
+(u,y+) will work; the parser must also inspect the second
+element to be able to distinguish between the alternatives. Because such
+look-ahead (or "guessing") is required, this example is
+non-deterministic.
+
+
+ The XML standard demands that content models must be
+deterministic. So it is recommended to turn the option
+accept_only_deterministic_models on; however, PXP can also
+process non-deterministic models using a backtracking algorithm.
+
+ Deterministic models ensure that validation can be performed in
+linear time. In order to get the maximum benefits, PXP also implements a
+special validator that profits from deterministic models; this is the
+deterministic finite automaton (DFA). This validator is enabled per element
+type if the element type has a deterministic model and if the option
+validate_by_dfa is turned on.
+
+ In general, I expect that the DFA method is faster than the
+backtracking method; especially in the worst case the DFA takes only linear
+time. However, if the content model has only few alternatives and the
+alternatives do not nest, the backtracking algorithm may be better.
+
+
+
+
+
+
+
+
+ Updates
+
+ Some (often later added) features that are otherwise
+not explained in the manual but worth to be mentioned.
+
+
+ Methods node_position, node_path, nth_node,
+previous_node, next_node for nodes: See pxp_document.mli
+
+ Functions to determine the document order of nodes:
+compare, create_ord_index, ord_number, ord_compare: See pxp_document.mli
+
+
+
+
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/pic/extension_general.fig b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/extension_general.fig
new file mode 100644
index 000000000..445095f07
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/extension_general.fig
@@ -0,0 +1,47 @@
+#FIG 3.2
+Portrait
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 1575 2250 229 229 1575 2250 1800 2295
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 1575 3375 225 225 1575 3375 1800 3375
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 675 3375 229 229 675 3375 900 3420
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2475 3375 229 229 2475 3375 2700 3420
+1 3 0 1 0 7 100 0 10 0.000 1 0.0000 3600 2475 180 180 3600 2475 3780 2475
+1 3 0 1 0 7 100 0 10 0.000 1 0.0000 2880 2475 180 180 2880 2475 3060 2475
+1 3 0 1 0 7 100 0 10 0.000 1 0.0000 4320 2475 186 186 4320 2475 4500 2520
+1 3 0 1 0 7 100 0 10 0.000 1 0.0000 3600 1485 186 186 3600 1485 3780 1530
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 675 3150 1395 2385
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 1575 2475 1575 3150
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 1755 2385 2475 3150
+2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1537 2010 3412 1462
+2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 3412 1537 1672 2047
+2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 810 3195 2707 2512
+2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 1740 3217 3442 2580
+2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 2640 3210 4177 2610
+4 0 0 80 0 14 12 0.0000 4 75 105 3555 1530 x\001
+4 0 0 80 0 14 12 0.0000 4 75 105 1530 2295 n\001
+4 0 0 80 0 12 12 0.2967 4 135 1365 1658 1950 n # extension\001
+4 0 0 80 0 12 12 0.2967 4 135 840 2475 1950 x # node\001
+4 0 0 80 0 16 12 0.0000 4 135 1140 1020 4050 The node tree\001
+4 0 0 80 0 16 12 0.0000 4 135 1245 3225 3285 The extensions\001
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_add.fig b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_add.fig
new file mode 100644
index 000000000..071683488
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_add.fig
@@ -0,0 +1,107 @@
+#FIG 3.2
+Portrait
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 6141 1350 242 229 6141 1350 6379 1395
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 6141 2250 242 229 6141 2250 6379 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 5426 2250 242 229 5426 2250 5665 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 6856 2250 242 229 6856 2250 7094 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 7571 2925 242 229 7571 2925 7809 2970
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 8524 2925 242 229 8524 2925 8762 2970
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 8047 2250 242 229 8047 2250 8285 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 1866 1350 242 229 1866 1350 2104 1395
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 1866 2250 242 229 1866 2250 2104 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 1151 2250 242 229 1151 2250 1390 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 2581 2250 242 229 2581 2250 2819 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 3296 2925 242 229 3296 2925 3534 2970
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 4249 2925 242 229 4249 2925 4487 2970
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 3772 2250 242 229 3772 2250 4010 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 8325 1350 242 229 8325 1350 8563 1395
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 61.76 123.53
+ 5910 1440 5402 2017
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 61.76 123.53
+ 6109 1590 6101 2025
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 61.76 123.53
+ 6307 1537 6697 2070
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 61.76 123.53
+ 7832 2347 7602 2692
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 61.76 123.53
+ 8150 2452 8349 2752
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 61.76 123.53
+ 5490 2017 5958 1492
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 61.76 123.53
+ 6164 2010 6173 1575
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 61.76 123.53
+ 6768 2025 6355 1470
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 61.76 123.53
+ 7673 2715 7880 2415
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 61.76 123.53
+ 8412 2707 8222 2415
+2 1 1 1 0 7 95 0 15 4.000 0 0 -1 0 0 2
+ 6387 1372 8023 2017
+2 2 0 1 0 7 95 0 -1 0.000 0 0 -1 0 0 5
+ 4950 900 9000 900 9000 3375 4950 3375 4950 900
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 61.75 123.51
+ 1635 1440 1127 2017
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 61.75 123.51
+ 1834 1590 1826 2025
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 61.75 123.51
+ 2032 1537 2422 2070
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 61.75 123.51
+ 3557 2347 3327 2692
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 61.75 123.51
+ 3875 2452 4074 2752
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 61.75 123.51
+ 1215 2017 1683 1492
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 61.75 123.51
+ 1889 2010 1898 1575
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 61.75 123.51
+ 2493 2025 2080 1470
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 61.75 123.51
+ 3398 2715 3605 2415
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 61.75 123.51
+ 4137 2707 3947 2415
+2 1 1 1 0 7 95 0 15 4.000 0 0 -1 0 0 2
+ 2112 1372 3748 2017
+2 2 0 1 0 7 95 0 -1 0.000 0 0 -1 0 0 5
+ 675 900 4725 900 4725 3375 675 3375 675 900
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 8197 1545 8055 2010
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 8137 2025 8280 1590
+2 1 0 3 0 7 95 0 -1 0.000 0 0 -1 1 0 4
+ 2 1 2.00 120.00 180.00
+ 7875 1500 7620 1965 7845 1920 7485 2355
+4 0 0 95 0 14 13 0.0000 4 79 111 6094 1379 x\001
+4 0 0 95 0 14 13 0.0000 4 111 111 7991 2265 y\001
+4 0 0 95 0 14 13 0.0000 4 79 111 1819 1379 x\001
+4 0 0 95 0 14 13 0.0000 4 111 111 3716 2265 y\001
+4 0 0 95 0 12 12 0.0000 4 150 1470 6459 1335 x # add_node y\001
+4 0 0 95 0 12 12 0.0000 4 150 1470 2214 1365 x # add_node y\001
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_clone.fig b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_clone.fig
new file mode 100644
index 000000000..ed1865f87
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_clone.fig
@@ -0,0 +1,111 @@
+#FIG 3.2
+Portrait
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 2700 1800 229 229 2700 1800 2925 1845
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 2025 2700 229 229 2025 2700 2250 2745
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 3375 2700 229 229 3375 2700 3600 2745
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 6345 1800 229 229 6345 1800 6570 1845
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 5670 2700 229 229 5670 2700 5895 2745
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 7020 2700 229 229 7020 2700 7245 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 8325 1800 229 229 8325 1800 8550 1845
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 7875 2700 229 229 7875 2700 8100 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 8775 2700 229 229 8775 2700 9000 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 6345 2700 229 229 6345 2700 6570 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 5895 3600 229 229 5895 3600 6120 3645
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 6795 3600 229 229 6795 3600 7020 3645
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 2700 2700 229 229 2700 2700 2925 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 2250 3600 229 229 2250 3600 2475 3645
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 3150 3600 229 229 3150 3600 3375 3645
+2 1 0 5 0 7 95 0 -1 12.000 1 0 -1 0 0 2
+ 4050 2610 4725 2610
+2 1 0 5 0 7 95 0 -1 12.000 1 0 -1 0 0 2
+ 4050 2745 4725 2745
+2 1 0 5 0 7 95 0 -1 12.000 1 1 -1 0 0 3
+ 4500 2385 4950 2655 4500 2970
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 2490 1905 2025 2467
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 2827 2002 3202 2542
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 2115 2475 2535 1965
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 3255 2505 2872 1957
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 6135 1905 5670 2467
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 6472 2002 6847 2542
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 5760 2475 6180 1965
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 6900 2505 6517 1957
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 8160 1957 7860 2460
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 8407 2032 8625 2520
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 7942 2467 8212 2010
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 8685 2475 8467 1987
+2 2 0 1 0 7 80 0 -1 4.000 0 0 -1 0 0 5
+ 1575 1350 9225 1350 9225 4050 1575 4050 1575 1350
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 6382 2460 6382 2032
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 6307 2032 6307 2467
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 6180 2857 5880 3360
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 6427 2932 6645 3420
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 5962 3367 6232 2910
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 6705 3375 6487 2887
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 2737 2460 2737 2032
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 2662 2032 2662 2467
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 2535 2857 2235 3360
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 2782 2932 3000 3420
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 2317 3367 2587 2910
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 3060 3375 2842 2887
+4 0 0 80 0 14 12 0.0000 4 105 105 2655 1845 y\001
+4 0 0 80 0 14 12 0.0000 4 105 105 6300 1845 y\001
+4 0 0 80 0 14 12 0.0000 4 75 105 6285 2752 x\001
+4 0 0 80 0 14 12 0.0000 4 75 105 2640 2752 x\001
+4 0 0 80 0 12 12 0.0000 4 105 840 3690 2025 let x' =\001
+4 0 0 80 0 12 12 0.0000 4 150 1890 3690 2205 x # orphaned_clone\001
+4 0 0 80 0 14 12 0.0000 4 105 210 8235 1845 x'\001
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_delete.fig b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_delete.fig
new file mode 100644
index 000000000..a9fc87eef
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_delete.fig
@@ -0,0 +1,96 @@
+#FIG 3.2
+Portrait
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+6 2550 2092 2865 2407
+2 1 0 4 0 7 80 0 -1 0.000 1 1 -1 0 0 2
+ 2595 2362 2820 2137
+2 1 0 4 0 7 80 0 -1 0.000 1 1 -1 0 0 2
+ 2595 2137 2820 2362
+-6
+6 1980 2430 3420 3870
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 2700 2700 229 229 2700 2700 2925 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 2250 3600 229 229 2250 3600 2475 3645
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 3150 3600 229 229 3150 3600 3375 3645
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 2535 2857 2235 3360
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 2782 2932 3000 3420
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 2317 3367 2587 2910
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 3060 3375 2842 2887
+-6
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 2700 1800 229 229 2700 1800 2925 1845
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 2025 2700 229 229 2025 2700 2250 2745
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 3375 2700 229 229 3375 2700 3600 2745
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 6345 1800 229 229 6345 1800 6570 1845
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 5670 2700 229 229 5670 2700 5895 2745
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 7020 2700 229 229 7020 2700 7245 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 8325 1800 229 229 8325 1800 8550 1845
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 7875 2700 229 229 7875 2700 8100 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 8775 2700 229 229 8775 2700 9000 2745
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 2737 2460 2737 2032
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 2662 2032 2662 2467
+2 1 0 5 0 7 95 0 -1 12.000 1 0 -1 0 0 2
+ 4050 2610 4725 2610
+2 1 0 5 0 7 95 0 -1 12.000 1 0 -1 0 0 2
+ 4050 2745 4725 2745
+2 1 0 5 0 7 95 0 -1 12.000 1 1 -1 0 0 3
+ 4500 2385 4950 2655 4500 2970
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 2490 1905 2025 2467
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 2827 2002 3202 2542
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 2115 2475 2535 1965
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 3255 2505 2872 1957
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 6135 1905 5670 2467
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 6472 2002 6847 2542
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 5760 2475 6180 1965
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 6900 2505 6517 1957
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 8160 1957 7860 2460
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 8407 2032 8625 2520
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 7942 2467 8212 2010
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 8685 2475 8467 1987
+2 2 0 1 0 7 80 0 -1 4.000 0 0 -1 0 0 5
+ 1575 1350 9225 1350 9225 4050 1575 4050 1575 1350
+4 0 0 80 0 14 12 0.0000 4 75 105 2640 2752 x\001
+4 0 0 95 0 12 12 0.0000 4 135 1050 3960 2250 x # delete\001
+4 0 0 80 0 14 12 0.0000 4 75 105 8280 1845 x\001
+4 0 0 80 0 14 12 0.0000 4 105 105 2655 1845 y\001
+4 0 0 80 0 14 12 0.0000 4 105 105 6300 1845 y\001
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_general.fig b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_general.fig
new file mode 100644
index 000000000..231e76da9
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_general.fig
@@ -0,0 +1,35 @@
+#FIG 3.2
+Portrait
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2025 2025 229 229 2025 2025 2250 2070
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 1350 2025 225 225 1350 2025 1575 2025
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2700 2025 225 225 2700 2025 2925 2025
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2025 1125 225 225 2025 1125 2250 1125
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 1380 1800 1845 1275
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 1815 1207 1282 1815
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 2055 1792 2055 1350
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 1980 1350 1980 1807
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 2190 1297 2550 1867
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 2602 1807 2220 1237
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 450 675 3150 675 3150 2475 450 2475 450 675
+4 0 0 100 0 12 10 0.0000 4 120 540 2377 1342 parent\001
+4 0 0 100 0 12 10 0.0000 4 105 810 645 1628 sub_nodes\001
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_term.fig b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_term.fig
new file mode 100644
index 000000000..54965fe63
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_term.fig
@@ -0,0 +1,63 @@
+#FIG 3.2
+Portrait
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+6 1665 2700 2835 3150
+2 4 0 1 0 7 100 0 15 0.000 0 0 7 0 0 5
+ 2835 3150 2835 2700 1665 2700 1665 3150 2835 3150
+4 0 0 80 0 18 12 0.0000 4 135 930 1815 3015 "Cherries"\001
+-6
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2250 1125 225 225 2250 1125 2475 1125
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 1575 2025 225 225 1575 2025 1800 2025
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2925 2025 225 225 2925 2025 3150 2025
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 900 2925 242 242 900 2925 1125 3015
+2 4 0 1 0 7 100 0 15 0.000 0 0 7 0 0 5
+ 1485 4275 1485 3825 315 3825 315 4275 1485 4275
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 2085 1275 1582 1807
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 2407 1297 2940 1800
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 1417 2190 900 2692
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 1740 2190 2257 2700
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 892 3180 892 3825
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 45 675 6525 675 6525 4950 45 4950 45 675
+3 3 0 1 0 7 100 0 -1 0.000 0 0 0 22
+ 2115 3645 2250 3600 2520 3555 2745 3510 2925 3555 3150 3690
+ 3375 3735 3600 3735 3825 3735 4140 3825 4140 4005 4005 4185
+ 3735 4230 3420 4185 3150 4230 2835 4275 2520 4230 2340 4140
+ 2115 4095 1980 4005 1980 3825 2025 3735
+ -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+ -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+ -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+3 3 0 1 0 7 100 0 -1 0.000 0 0 0 17
+ 3465 1170 3645 1080 4050 1035 4320 1035 4545 1080 4770 1170
+ 5130 1215 5355 1350 5400 1530 5265 1665 4860 1710 4455 1710
+ 4095 1665 3780 1620 3555 1575 3420 1485 3420 1305
+ -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+ -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+ -1.000
+3 2 0 1 0 7 100 0 -1 0.000 0 0 0 5
+ 2475 1215 2655 1350 2970 1440 3240 1395 3420 1260
+ 0.000 -1.000 -1.000 -1.000 0.000
+3 2 0 1 0 7 100 0 -1 0.000 0 0 0 5
+ 1125 3060 1215 3397 1410 3607 1687 3727 2025 3720
+ 0.000 -1.000 -1.000 -1.000 0.000
+4 0 0 80 0 18 12 0.0000 4 180 1065 375 4125 "An orange"\001
+4 0 0 80 0 18 12 0.0000 4 90 315 750 2985 \001
+4 0 0 80 0 18 12 0.0000 4 135 315 1410 2085 \001
+4 0 0 80 0 18 12 0.0000 4 90 315 2790 2070 \001
+4 0 0 80 0 18 12 0.0000 4 90 315 2100 1200 \001
+4 0 0 100 0 16 12 0.0000 4 135 795 3600 1260 attributes:\001
+4 0 0 100 0 16 12 0.0000 4 180 1680 3600 1485 "att" -> Value "apple"\001
+4 0 0 100 0 16 12 0.0000 4 135 795 2250 3780 attributes:\001
+4 0 0 100 0 17 12 0.0000 4 180 5910 390 4725 An orange Cherries \001
+4 0 0 100 0 16 12 0.0000 4 180 1800 2250 4005 "att" -> Value "orange"\001
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/readme.ent b/helm/DEVEL/pxp/pxp/doc/manual/src/readme.ent
new file mode 100644
index 000000000..e9fdfc35a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/src/readme.ent
@@ -0,0 +1,364 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/yacc.mli.ent b/helm/DEVEL/pxp/pxp/doc/manual/src/yacc.mli.ent
new file mode 100644
index 000000000..604918bd8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/doc/manual/src/yacc.mli.ent
@@ -0,0 +1,376 @@
+
diff --git a/helm/DEVEL/pxp/pxp/examples/Makefile b/helm/DEVEL/pxp/pxp/examples/Makefile
new file mode 100644
index 000000000..934385757
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/Makefile
@@ -0,0 +1,22 @@
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+
+.PHONY: CLEAN
+CLEAN: clean
+ $(MAKE) -C xmlforms CLEAN
+ $(MAKE) -C validate CLEAN
+ $(MAKE) -C readme CLEAN
+ $(MAKE) -C simple_transformation CLEAN
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~
+ $(MAKE) -C xmlforms distclean
+ $(MAKE) -C validate distclean
+ $(MAKE) -C readme distclean
+ $(MAKE) -C simple_transformation distclean
+
+
diff --git a/helm/DEVEL/pxp/pxp/examples/readme/.cvsignore b/helm/DEVEL/pxp/pxp/examples/readme/.cvsignore
new file mode 100644
index 000000000..2395c1946
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/readme/.cvsignore
@@ -0,0 +1,10 @@
+*.cmi
+*.cmo
+*.cma
+*.cmx
+*.o
+*.a
+*.cmxa
+depend
+depend.pkg
+
diff --git a/helm/DEVEL/pxp/pxp/examples/readme/Makefile b/helm/DEVEL/pxp/pxp/examples/readme/Makefile
new file mode 100644
index 000000000..df5f6ed0d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/readme/Makefile
@@ -0,0 +1,34 @@
+# make readme: make bytecode executable
+# make readme.opt: make native executable
+# make clean: remove intermediate files
+# make CLEAN: remove intermediate files (recursively)
+# make distclean: remove any superflous files
+# make install
+#----------------------------------------------------------------------
+
+BIN = /usr/local/bin
+
+.PHONY: readme
+readme:
+ $(MAKE) -f Makefile.code readme
+
+.PHONY: readme.opt
+readme.opt:
+ $(MAKE) -f Makefile.code readme.opt
+
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~ depend depend.pkg
+ rm -f readme readme.opt
+
+.PHONY: install
+install:
+ cp readme $(BIN)
diff --git a/helm/DEVEL/pxp/pxp/examples/readme/Makefile.code b/helm/DEVEL/pxp/pxp/examples/readme/Makefile.code
new file mode 100644
index 000000000..0514ddf33
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/readme/Makefile.code
@@ -0,0 +1,57 @@
+#----------------------------------------------------------------------
+# specific rules for this package:
+
+OBJECTS = to_html.cmo to_text.cmo
+XOBJECTS = $(OBJECTS:.cmo=.cmx)
+ARCHIVE = readme.cma
+XARCHIVE = readme.cmxa
+NAME = readme
+REQUIRES = str pxp
+
+readme: $(ARCHIVE) main.cmo
+ ocamlfind ocamlc -o readme -custom -package "$(REQUIRES)" \
+ -linkpkg $(ARCHIVE) main.cmo
+
+readme.opt: $(XARCHIVE) main.cmx
+ ocamlfind ocamlopt -o readme.opt -custom -package "$(REQUIRES)" \
+ -linkpkg $(XARCHIVE) main.cmx
+
+$(ARCHIVE): $(OBJECTS)
+ $(OCAMLC) -a -o $(ARCHIVE) $(OBJECTS)
+
+$(XARCHIVE): $(XOBJECTS)
+ $(OCAMLOPT) -a -o $(XARCHIVE) $(XOBJECTS)
+
+#----------------------------------------------------------------------
+# general rules:
+
+OPTIONS =
+OCAMLC = ocamlc -g $(OPTIONS) $(ROPTIONS)
+OCAMLOPT = ocamlopt -p $(OPTIONS) $(ROPTIONS)
+OCAMLDEP = ocamldep $(OPTIONS)
+OCAMLFIND = ocamlfind
+
+depend: *.ml *.mli
+ $(OCAMLDEP) *.ml *.mli >depend
+
+depend.pkg: Makefile
+ $(OCAMLFIND) use -p ROPTIONS= $(REQUIRES) >depend.pkg
+
+.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll .mly
+
+.ml.cmx:
+ $(OCAMLOPT) -c $<
+
+.ml.cmo:
+ $(OCAMLC) -c $<
+
+.mli.cmi:
+ $(OCAMLC) -c $<
+
+.mll.ml:
+ ocamllex $<
+
+*.mli:
+
+include depend
+include depend.pkg
diff --git a/helm/DEVEL/pxp/pxp/examples/readme/main.ml b/helm/DEVEL/pxp/pxp/examples/readme/main.ml
new file mode 100644
index 000000000..4e3837aa9
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/readme/main.ml
@@ -0,0 +1,108 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_types
+open Pxp_document
+open Pxp_yacc
+
+
+let rec print_error e =
+ prerr_endline(string_of_exn e)
+;;
+
+
+let run f a =
+ try f a with
+ e -> print_error e
+;;
+
+
+let convert_to_html filename =
+ (* read in style definition *)
+ let document =
+ parse_document_entity
+ { default_config with encoding = `Enc_iso88591 }
+ (from_file filename)
+ To_html.tag_map
+ in
+ let root = document # root in
+ let store = new To_html.store in
+ root # extension # to_html store stdout
+;;
+
+
+let convert_to_text filename =
+ (* read in style definition *)
+ let document =
+ parse_document_entity
+ default_config
+ (from_file filename)
+ To_text.tag_map
+ in
+ let root = document # root in
+ let store = new To_text.store in
+ let box = new To_text.box 79 79 in
+ root # extension # to_box store box;
+ box # output 0 0 stdout
+;;
+
+
+let main() =
+ let want_html = ref false in
+ let want_text = ref false in
+ let filename = ref None in
+ Arg.parse
+ [ "-html", Arg.Set want_html,
+ " convert file to html";
+ "-text", Arg.Set want_text,
+ " convert file to text";
+ ]
+ (fun s ->
+ match !filename with
+ None -> filename := Some s
+ | Some _ ->
+ raise (Arg.Bad "Multiple arguments not allowed."))
+ "usage: readme [ -text | -html ] input.xml >output";
+ let fn =
+ match !filename with
+ None ->
+ prerr_endline "readme: no input";
+ exit 1
+ | Some s -> s
+ in
+ match !want_html, !want_text with
+ true, false ->
+ run convert_to_html fn
+ | false, true ->
+ run convert_to_text fn
+ | _ ->
+ prerr_endline ("readme: Please select exactly one output format")
+;;
+
+main();;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:31 lpadovan
+ * Initial revision
+ *
+ * Revision 1.5 2000/07/08 17:58:17 gerd
+ * Updated because of PXP API changes.
+ *
+ * Revision 1.4 2000/06/04 20:25:38 gerd
+ * Updates because of renamed PXP modules.
+ *
+ * Revision 1.3 2000/05/01 16:46:40 gerd
+ * Using the new error formatter.
+ *
+ * Revision 1.2 1999/08/23 16:54:19 gerd
+ * Minor changes.
+ *
+ * Revision 1.1 1999/08/22 22:29:32 gerd
+ * Initial revision.
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/examples/readme/readme.dtd b/helm/DEVEL/pxp/pxp/examples/readme/readme.dtd
new file mode 100644
index 000000000..8ff6a9f75
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/readme/readme.dtd
@@ -0,0 +1,38 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/examples/readme/to_html.ml b/helm/DEVEL/pxp/pxp/examples/readme/to_html.ml
new file mode 100644
index 000000000..f717b2259
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/readme/to_html.ml
@@ -0,0 +1,432 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+(*$ readme.code.header *)
+open Pxp_types
+open Pxp_document
+(*$-*)
+
+
+(*$ readme.code.footnote-printer *)
+class type footnote_printer =
+ object
+ method footnote_to_html : store_type -> out_channel -> unit
+ end
+
+and store_type =
+ object
+ method alloc_footnote : footnote_printer -> int
+ method print_footnotes : out_channel -> unit
+ end
+;;
+(*$-*)
+
+
+(*$ readme.code.store *)
+class store =
+ object (self)
+
+ val mutable footnotes = ( [] : (int * footnote_printer) list )
+ val mutable next_footnote_number = 1
+
+ method alloc_footnote n =
+ let number = next_footnote_number in
+ next_footnote_number <- number+1;
+ footnotes <- footnotes @ [ number, n ];
+ number
+
+ method print_footnotes ch =
+ if footnotes <> [] then begin
+ output_string ch " \n";
+ output_string ch "\n";
+ List.iter
+ (fun (_,n) ->
+ n # footnote_to_html (self : #store_type :> store_type) ch)
+ footnotes;
+ output_string ch " \n";
+ end
+
+ end
+;;
+(*$-*)
+
+
+
+(*$ readme.code.escape-html *)
+let escape_html s =
+ Str.global_substitute
+ (Str.regexp "<\\|>\\|&\\|\"")
+ (fun s ->
+ match Str.matched_string s with
+ "<" -> "<"
+ | ">" -> ">"
+ | "&" -> "&"
+ | "\"" -> """
+ | _ -> assert false)
+ s
+;;
+(*$-*)
+
+
+(*$ readme.code.shared *)
+class virtual shared =
+ object (self)
+
+ (* --- default_ext --- *)
+
+ val mutable node = (None : shared node option)
+
+ method clone = {< >}
+ method node =
+ match node with
+ None ->
+ assert false
+ | Some n -> n
+ method set_node n =
+ node <- Some n
+
+ (* --- virtual --- *)
+
+ method virtual to_html : store -> out_channel -> unit
+
+ end
+;;
+(*$-*)
+
+
+(*$ readme.code.only-data *)
+class only_data =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ output_string ch (escape_html (self # node # data))
+ end
+;;
+(*$-*)
+
+
+(*$ readme.code.no-markup *)
+class no_markup =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ (self # node # sub_nodes)
+ end
+;;
+(*$-*)
+
+
+(*$ readme.code.readme *)
+class readme =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ (* output header *)
+ output_string
+ ch "";
+ output_string
+ ch "\n";
+ let title =
+ match self # node # attribute "title" with
+ Value s -> s
+ | _ -> assert false
+ in
+ let html_header, _ =
+ try (self # node # dtd # par_entity "readme:html:header")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_trailer, _ =
+ try (self # node # dtd # par_entity "readme:html:trailer")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_bgcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:bgcolor")
+ # replacement_text
+ with WF_error _ -> "white", false in
+ let html_textcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:textcolor")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_alinkcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:alinkcolor")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_vlinkcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:vlinkcolor")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_linkcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:linkcolor")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_background, _ =
+ try (self # node # dtd # par_entity "readme:html:background")
+ # replacement_text
+ with WF_error _ -> "", false in
+
+ output_string ch "\n";
+ output_string ch (escape_html title);
+ output_string ch " \n";
+ output_string ch "
+ if value <> "" then
+ output_string ch (name ^ "=\"" ^ escape_html value ^ "\" "))
+ [ "bgcolor", html_bgcolor;
+ "text", html_textcolor;
+ "link", html_linkcolor;
+ "alink", html_alinkcolor;
+ "vlink", html_vlinkcolor;
+ ];
+ output_string ch ">\n";
+ output_string ch html_header;
+ output_string ch "";
+ output_string ch (escape_html title);
+ output_string ch " \n";
+ (* process main content: *)
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ (self # node # sub_nodes);
+ (* now process footnotes *)
+ store # print_footnotes ch;
+ (* trailer *)
+ output_string ch html_trailer;
+ output_string ch "\n";
+
+ end
+;;
+(*$-*)
+
+
+(*$ readme.code.section *)
+class section the_tag =
+ object (self)
+ inherit shared
+
+ val tag = the_tag
+
+ method to_html store ch =
+ let sub_nodes = self # node # sub_nodes in
+ match sub_nodes with
+ title_node :: rest ->
+ output_string ch ("<" ^ tag ^ ">\n");
+ title_node # extension # to_html store ch;
+ output_string ch ("\n" ^ tag ^ ">");
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ rest
+ | _ ->
+ assert false
+ end
+;;
+
+class sect1 = section "h1";;
+class sect2 = section "h3";;
+class sect3 = section "h4";;
+(*$-*)
+
+
+(*$ readme.code.map-tag *)
+class map_tag the_target_tag =
+ object (self)
+ inherit shared
+
+ val target_tag = the_target_tag
+
+ method to_html store ch =
+ output_string ch ("<" ^ target_tag ^ ">\n");
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ (self # node # sub_nodes);
+ output_string ch ("\n" ^ target_tag ^ ">");
+ end
+;;
+
+class p = map_tag "p";;
+class em = map_tag "b";;
+class ul = map_tag "ul";;
+class li = map_tag "li";;
+(*$-*)
+
+
+(*$ readme.code.br *)
+class br =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ output_string ch " \n";
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ (self # node # sub_nodes);
+ end
+;;
+(*$-*)
+
+
+(*$ readme.code.code *)
+class code =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ let data = self # node # data in
+ (* convert tabs *)
+ let l = String.length data in
+ let rec preprocess i column =
+ (* this is very ineffective but comprehensive: *)
+ if i < l then
+ match data.[i] with
+ '\t' ->
+ let n = 8 - (column mod 8) in
+ String.make n ' ' ^ preprocess (i+1) (column + n)
+ | '\n' ->
+ "\n" ^ preprocess (i+1) 0
+ | c ->
+ String.make 1 c ^ preprocess (i+1) (column + 1)
+ else
+ ""
+ in
+ output_string ch "
";
+ output_string ch (escape_html (preprocess 0 0));
+ output_string ch "
";
+
+ end
+;;
+(*$-*)
+
+
+(*$ readme.code.a *)
+class a =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ output_string ch " escape_html v
+ | Valuelist _ -> assert false
+ | Implied_value ->
+ begin match self # node # attribute "readmeref" with
+ Value v -> escape_html v ^ ".html"
+ | Valuelist _ -> assert false
+ | Implied_value ->
+ ""
+ end
+ in
+ if href <> "" then
+ output_string ch ("href=\"" ^ href ^ "\"");
+ output_string ch ">";
+ output_string ch (escape_html (self # node # data));
+ output_string ch " ";
+
+ end
+;;
+(*$-*)
+
+
+(*$ readme.code.footnote *)
+class footnote =
+ object (self)
+ inherit shared
+
+ val mutable footnote_number = 0
+
+ method to_html store ch =
+ let number =
+ store # alloc_footnote (self : #shared :> footnote_printer) in
+ let foot_anchor =
+ "footnote" ^ string_of_int number in
+ let text_anchor =
+ "textnote" ^ string_of_int number in
+ footnote_number <- number;
+ output_string ch ( "[" ^ string_of_int number ^
+ "] " )
+
+ method footnote_to_html store ch =
+ (* prerequisite: we are in a definition list ... *)
+ let foot_anchor =
+ "footnote" ^ string_of_int footnote_number in
+ let text_anchor =
+ "textnote" ^ string_of_int footnote_number in
+ output_string ch ("[" ^ string_of_int footnote_number ^
+ "] \n");
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ (self # node # sub_nodes);
+ output_string ch ("\n ")
+
+ end
+;;
+(*$-*)
+
+
+(**********************************************************************)
+
+(*$ readme.code.tag-map *)
+open Pxp_yacc
+
+let tag_map =
+ make_spec_from_alist
+ ~data_exemplar:(new data_impl (new only_data))
+ ~default_element_exemplar:(new element_impl (new no_markup))
+ ~element_alist:
+ [ "readme", (new element_impl (new readme));
+ "sect1", (new element_impl (new sect1));
+ "sect2", (new element_impl (new sect2));
+ "sect3", (new element_impl (new sect3));
+ "title", (new element_impl (new no_markup));
+ "p", (new element_impl (new p));
+ "br", (new element_impl (new br));
+ "code", (new element_impl (new code));
+ "em", (new element_impl (new em));
+ "ul", (new element_impl (new ul));
+ "li", (new element_impl (new li));
+ "footnote", (new element_impl (new footnote : #shared :> shared));
+ "a", (new element_impl (new a));
+ ]
+ ()
+;;
+(*$-*)
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:31 lpadovan
+ * Initial revision
+ *
+ * Revision 1.6 2000/08/22 14:34:25 gerd
+ * Using make_spec_from_alist instead of make_spec_from_mapping.
+ *
+ * Revision 1.5 2000/08/18 21:15:14 gerd
+ * Update because of PXP API change: par_entity raises WF_error
+ * instead of Validation error if the entity is not defined.
+ * Further minor updates.
+ *
+ * Revision 1.4 2000/07/08 17:58:17 gerd
+ * Updated because of PXP API changes.
+ *
+ * Revision 1.3 2000/06/04 20:25:38 gerd
+ * Updates because of renamed PXP modules.
+ *
+ * Revision 1.2 1999/09/12 20:09:32 gerd
+ * Added section marks.
+ *
+ * Revision 1.1 1999/08/22 22:29:32 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/examples/readme/to_text.ml b/helm/DEVEL/pxp/pxp/examples/readme/to_text.ml
new file mode 100644
index 000000000..fc45f45cd
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/readme/to_text.ml
@@ -0,0 +1,599 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_types
+open Pxp_document
+
+
+(**********************************************************************)
+(* The box class represents formatted text *)
+(**********************************************************************)
+
+class type formatted_text =
+ object
+ method output : int -> int -> out_channel -> unit
+ (* output initial_indent indent ch:
+ * 'initial_indent' is how far the first line should be indented;
+ * 'indent' how far the rest. 'ch' is the channel on which the lines
+ * are to be printed.
+ *)
+
+ method multiline : bool
+ (* whether the box occupies multiple lines *)
+
+ method width_of_last_line : int
+ (* returns the width of the last line *)
+ end
+;;
+
+
+type text =
+ Text of string
+ | Box of formatted_text
+;;
+
+
+let textwidth tl =
+ let rec compute tl r =
+ match tl with
+ [] -> r
+ | t :: tl' ->
+ begin match t with
+ Text s ->
+ compute tl' (r + String.length s)
+ | Box b ->
+ if b # multiline then
+ compute tl' (b # width_of_last_line)
+ else
+ compute tl' (r + b # width_of_last_line)
+ end
+ in
+ compute (List.rev tl) 0
+;;
+
+
+class box the_initial_width the_width =
+ object (self)
+
+ (* The 'initial_width' is the width that is available on the first
+ * line of output; the 'width' is the width that is available in the
+ * rest.
+ *)
+
+ val initial_width = the_initial_width
+ val width = the_width
+
+ (* state: *)
+
+ val mutable space_added = false
+ val mutable linefeed_added = false
+ val mutable is_first_line = true
+ val mutable lines = []
+ (* lines in reverse order (first line = last element) *)
+ val mutable current_line = []
+ (* not member of 'lines'; again reverse order *)
+ val mutable current_indent = 0
+
+ method add_space =
+ if not space_added then begin
+ space_added <- true;
+ linefeed_added <- true;
+ current_line <- Text " " :: current_line
+ end
+
+ method ignore_space =
+ space_added <- true;
+ linefeed_added <- true
+
+ method add_linefeed =
+ if not linefeed_added then begin
+ linefeed_added <- true;
+ if not space_added then
+ current_line <- Text " " :: current_line
+ end
+
+ method ignore_linefeed =
+ linefeed_added <- true
+
+ method add_newline =
+ lines <- current_line :: lines;
+ current_line <- [];
+ space_added <- true;
+ linefeed_added <- true;
+ is_first_line <- false;
+ current_indent <- 0;
+
+ method add_word s =
+ (* first try to add 's' to 'current_line' *)
+ let current_line' = Text s :: current_line in
+ let current_width =
+ if is_first_line then initial_width else width in
+ if textwidth current_line' + current_indent <= current_width then begin
+ (* ok, the line does not become too long *)
+ current_line <- current_line';
+ space_added <- false;
+ linefeed_added <- false
+ end
+ else begin
+ (* The line would be too long. *)
+ lines <- current_line :: lines;
+ current_line <- [Text s];
+ space_added <- false;
+ linefeed_added <- false;
+ is_first_line <- false;
+ current_indent <- 0;
+ end
+
+ method add_box b =
+ current_line <- Box b :: current_line;
+ space_added <- false;
+ linefeed_added <- false;
+
+
+ method width_of_last_line =
+ textwidth current_line + current_indent
+
+
+ method available_width =
+ let current_width =
+ if is_first_line then initial_width else width in
+ current_width - textwidth current_line - current_indent
+
+
+ method multiline =
+ lines <> [] or
+ (List.exists
+ (function
+ Text _ -> false
+ | Box b -> b # multiline)
+ current_line)
+
+ method output initial_indent indent ch =
+ let eff_lines =
+ List.rev
+ (current_line :: lines) in
+ let rec out_lines cur_indent ll =
+ match ll with
+ [] -> ()
+ | l :: ll' ->
+ output_string ch (String.make cur_indent ' ');
+ List.iter
+ (function
+ Text s ->
+ output_string ch s
+ | Box b ->
+ b # output 0 indent ch
+ )
+ (List.rev l);
+ if ll' <> [] then
+ output_string ch "\n";
+ out_lines indent ll'
+ in
+ out_lines initial_indent eff_lines
+ end
+;;
+
+
+class listitem_box listmark indent totalwidth =
+ let initial_newline = String.length listmark >= indent in
+ object (self)
+ inherit box totalwidth (totalwidth - indent) as super
+
+ val extra_indent = indent
+
+ initializer
+ self # add_word listmark;
+ if initial_newline then
+ self # add_newline
+ else begin
+ current_line <- Text (String.make (indent - String.length listmark) ' ')
+ :: current_line;
+ space_added <- true;
+ linefeed_added <- true;
+ end
+
+
+ method output initial_indent indent ch =
+ super # output initial_indent (indent + extra_indent) ch
+ end
+;;
+
+
+(**********************************************************************)
+(* Footnotes etc. *)
+(**********************************************************************)
+
+
+class type footnote_printer =
+ object
+ method footnote_to_box : store_type -> box -> unit
+ end
+
+and store_type =
+ object
+ method alloc_footnote : footnote_printer -> int
+ method print_footnotes : box -> unit
+ end
+;;
+
+
+class store =
+ object (self)
+
+ val mutable footnotes = ( [] : (int * footnote_printer) list )
+ val mutable next_footnote_number = 1
+
+ method alloc_footnote n =
+ let number = next_footnote_number in
+ next_footnote_number <- number+1;
+ footnotes <- footnotes @ [ number, n ];
+ number
+
+ method print_footnotes (b : box) =
+ if footnotes <> [] then begin
+ b # add_newline;
+ b # add_newline;
+ let w = b # available_width in
+ b # add_word (String.make (w/3) '-');
+ b # add_newline;
+ b # add_newline;
+ List.iter
+ (fun (_,n) ->
+ n # footnote_to_box (self : #store_type :> store_type) b)
+ footnotes;
+ b # add_newline;
+ end
+ end
+;;
+
+
+
+(**********************************************************************)
+(* The extension objects *)
+(**********************************************************************)
+
+
+class virtual shared =
+ object (self)
+
+ (* --- default_ext --- *)
+
+ val mutable node = (None : shared node option)
+
+ method clone = {< >}
+ method node =
+ match node with
+ None ->
+ assert false
+ | Some n -> n
+ method set_node n =
+ node <- Some n
+
+ (* --- virtual --- *)
+
+ method virtual to_box : store -> box -> unit
+ (* to_box store b:
+ * formats the element using box 'b'
+ *)
+ end
+;;
+
+
+class only_data =
+ object (self)
+ inherit shared
+
+ val white_space_re = Str.regexp "[ \t]+\\|\n"
+
+ method to_box store b =
+ let s = self # node # data in
+ let splitted = Str.full_split white_space_re s in
+ List.iter
+ (function
+ Str.Delim "\n" ->
+ b # add_linefeed
+ | Str.Delim _ ->
+ b # add_space
+ | Str.Text s ->
+ b # add_word s)
+ splitted
+ end
+;;
+
+
+class no_markup =
+ object (self)
+ inherit shared
+
+ method to_box store b =
+ List.iter
+ (fun n -> n # extension # to_box store b)
+ (self # node # sub_nodes)
+ end
+;;
+
+
+class readme =
+ object (self)
+ inherit shared
+
+ method to_box store b =
+ let title =
+ match self # node # attribute "title" with
+ Value s -> s
+ | _ -> assert false
+ in
+ let w = b # available_width in
+ let line = String.make (w-1) '*' in
+ b # add_word line;
+ b # add_newline;
+ b # add_word title;
+ b # add_newline;
+ b # add_word line;
+ b # add_newline;
+ b # add_newline;
+ (* process main content: *)
+ List.iter
+ (fun n -> n # extension # to_box store b)
+ (self # node # sub_nodes);
+ (* now process footnotes *)
+ store # print_footnotes b;
+ (* trailer *)
+ b # add_newline;
+ end
+;;
+
+
+class section the_tag =
+ object (self)
+ inherit shared
+
+ val tag = the_tag
+
+ method to_box store b =
+ let sub_nodes = self # node # sub_nodes in
+ match sub_nodes with
+ title_node :: rest ->
+ b # add_newline;
+ let w = b # available_width in
+ let line = String.make (w-1) tag in
+ b # add_word line;
+ b # add_newline;
+ b # add_word (title_node # data);
+ b # add_newline;
+ b # add_word line;
+ b # add_newline;
+ List.iter
+ (fun n ->
+ n # extension # to_box store b)
+ rest;
+ | _ ->
+ assert false
+ end
+;;
+
+class sect1 = section '=';;
+class sect2 = section '-';;
+class sect3 = section ':';;
+
+
+class p =
+ object (self)
+ inherit shared
+
+ method to_box store b =
+ let within_list =
+ match self # node # parent # node_type with
+ T_element "li" -> true
+ | T_element _ -> false
+ | _ -> assert false
+ in
+ if not within_list then
+ b # add_newline;
+ let w = b # available_width in
+ let b' = new box w w in
+ b' # ignore_space;
+ List.iter
+ (fun n -> n # extension # to_box store b')
+ (self # node # sub_nodes);
+ b # add_box (b' :> formatted_text);
+ b # add_newline;
+ end
+;;
+
+
+class li =
+ object (self)
+ inherit shared
+
+ method to_box store b =
+ b # add_newline;
+ let w = b # available_width in
+ let b' = new listitem_box "-" 3 w in
+ b' # ignore_space;
+ List.iter
+ (fun n -> n # extension # to_box store b')
+ (self # node # sub_nodes);
+ b # add_box (b' :> formatted_text);
+ end
+;;
+
+
+class code =
+ object (self)
+ inherit shared
+
+ method to_box store b =
+ b # add_newline;
+ let w = b # available_width in
+ let b' = new box w w in
+ b' # ignore_space;
+ let data = self # node # data in
+ (* convert tabs *)
+ let l = String.length data in
+ let rec add s i column =
+ (* this is very ineffective but comprehensive: *)
+ if i < l then
+ match data.[i] with
+ '\t' ->
+ let n = 8 - (column mod 8) in
+ add (s ^ String.make n ' ') (i+1) (column + n)
+ | '\n' ->
+ b' # add_word s;
+ b' # add_newline;
+ add "" (i+1) 0
+ | c ->
+ add (s ^ String.make 1 c) (i+1) (column + 1)
+ else
+ if s <> "" then begin
+ b' # add_word s;
+ b' # add_newline;
+ end
+ in
+ add "" 0 0;
+ b # add_box (b' :> formatted_text);
+ b # add_newline;
+ end
+;;
+
+
+class br =
+ object (self)
+ inherit shared
+
+ method to_box store b =
+ b # add_newline;
+ end
+;;
+
+
+class footnote =
+ object (self)
+ inherit shared
+
+ val mutable footnote_number = 0
+
+ method to_box store b =
+ let number =
+ store # alloc_footnote (self : #shared :> footnote_printer) in
+ footnote_number <- number;
+ b # add_space;
+ b # add_word ("[" ^ string_of_int number ^ "]");
+
+ method footnote_to_box store b =
+ let w = b # available_width in
+ let n = "[" ^ string_of_int footnote_number ^ "]" in
+ let b' = new listitem_box n 6 w in
+ b' # ignore_space;
+ List.iter
+ (fun n -> n # extension # to_box store b')
+ (self # node # sub_nodes);
+ b # add_box (b' :> formatted_text);
+ b # add_newline;
+ b # add_newline;
+
+ end
+;;
+
+
+class a =
+ object (self)
+ inherit shared
+
+ val mutable footnote_number = 0
+ val mutable a_href = ""
+
+ method to_box store b =
+ let href =
+ match self # node # attribute "href" with
+ Value v -> "see " ^ v
+ | Valuelist _ -> assert false
+ | Implied_value ->
+ begin match self # node # attribute "readmeref" with
+ Value v -> "see file " ^ v
+ | Valuelist _ -> assert false
+ | Implied_value ->
+ ""
+ end
+ in
+ a_href <- href;
+ List.iter
+ (fun n -> n # extension # to_box store b)
+ (self # node # sub_nodes);
+ if href <> "" then begin
+ let number =
+ store # alloc_footnote (self : #shared :> footnote_printer) in
+ footnote_number <- number;
+ b # add_space;
+ b # add_word ("[" ^ string_of_int number ^ "]");
+ end
+
+ method footnote_to_box store b =
+ if a_href <> "" then begin
+ let w = b # available_width in
+ let n = "[" ^ string_of_int footnote_number ^ "]" in
+ let b' = new listitem_box n 6 w in
+ b' # ignore_space;
+ b' # add_word a_href;
+ b # add_box (b' :> formatted_text);
+ b # add_newline;
+ b # add_newline;
+ end
+ end
+;;
+
+(**********************************************************************)
+
+open Pxp_yacc
+
+let tag_map =
+ make_spec_from_alist
+ ~data_exemplar:(new data_impl (new only_data))
+ ~default_element_exemplar:(new element_impl (new no_markup))
+ ~element_alist:
+ [ "readme", (new element_impl (new readme));
+ "sect1", (new element_impl (new sect1));
+ "sect2", (new element_impl (new sect2));
+ "sect3", (new element_impl (new sect3));
+ "title", (new element_impl (new no_markup));
+ "p", (new element_impl (new p));
+ "br", (new element_impl (new br));
+ "code", (new element_impl (new code));
+ "em", (new element_impl (new no_markup));
+ "ul", (new element_impl (new no_markup));
+ "li", (new element_impl (new li));
+ "footnote", (new element_impl (new footnote : #shared :> shared));
+ "a", (new element_impl (new a : #shared :> shared));
+ ]
+ ()
+;;
+
+
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:31 lpadovan
+ * Initial revision
+ *
+ * Revision 1.5 2000/08/22 14:34:25 gerd
+ * Using make_spec_from_alist instead of make_spec_from_mapping.
+ *
+ * Revision 1.4 2000/08/18 21:15:25 gerd
+ * Minor updates because of PXP API changes.
+ *
+ * Revision 1.3 2000/07/08 17:58:17 gerd
+ * Updated because of PXP API changes.
+ *
+ * Revision 1.2 2000/06/04 20:25:38 gerd
+ * Updates because of renamed PXP modules.
+ *
+ * Revision 1.1 1999/08/22 22:29:32 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/examples/simple_transformation/Makefile b/helm/DEVEL/pxp/pxp/examples/simple_transformation/Makefile
new file mode 100644
index 000000000..27be18c30
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/simple_transformation/Makefile
@@ -0,0 +1,21 @@
+all: print sort delcol
+
+print: print.ml
+ ocamlfind ocamlc -o print -package pxp -linkpkg -custom \
+ -predicates pxp_without_utf8 print.ml
+
+sort: sort.ml
+ ocamlfind ocamlc -o sort -package pxp -linkpkg -custom \
+ -predicates pxp_without_utf8 sort.ml
+
+delcol: delcol.ml
+ ocamlfind ocamlc -o delcol -package pxp -linkpkg -custom \
+ -predicates pxp_without_utf8 delcol.ml
+
+clean:
+ rm -f *.cmo *.cma *.cmi *.cmxa *.a *.o
+
+distclean: clean
+ rm -f *~ print sort delcol
+
+CLEAN: clean
diff --git a/helm/DEVEL/pxp/pxp/examples/simple_transformation/README b/helm/DEVEL/pxp/pxp/examples/simple_transformation/README
new file mode 100644
index 000000000..5b9212862
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/simple_transformation/README
@@ -0,0 +1,17 @@
+Usage:
+ sort -by phone
+ match n # node_type with
+ T_element name when name = col ->
+ raise Skip
+ | _ -> n # orphaned_flat_clone)
+ tree
+;;
+
+
+let main() =
+ let column = ref "" in
+ Arg.parse
+ [ "-col", Arg.String (fun s -> column := s),
+ " (last-name|first-name|phone)";
+ ]
+ (fun _ -> raise (Arg.Bad "Bad usage"))
+ "usage: sort [ options ]";
+ if !column = "" then (
+ prerr_endline "Column not specified!";
+ exit 1;
+ );
+ if not(List.mem !column ["last-name"; "first-name"; "phone"]) then (
+ prerr_endline ("Unknown column: " ^ !column);
+ exit 1
+ );
+ try
+ let dtd = parse_dtd_entity default_config (from_file "record.dtd") in
+ let tree =
+ parse_content_entity default_config (from_channel stdin) dtd default_spec
+ in
+ print_endline "";
+ (delcol !column tree) # write (Out_channel stdout) `Enc_iso88591
+ with
+ x ->
+ prerr_endline(string_of_exn x);
+ exit 1
+;;
+
+
+main();;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/08/24 09:42:52 gerd
+ * Updated a comment.
+ *
+ * Revision 1.1 2000/08/24 09:39:59 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/examples/simple_transformation/print.ml b/helm/DEVEL/pxp/pxp/examples/simple_transformation/print.ml
new file mode 100644
index 000000000..56f5fb69b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/simple_transformation/print.ml
@@ -0,0 +1,60 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* Read a record-list structure and print it *)
+open Pxp_types;;
+open Pxp_document;;
+open Pxp_yacc;;
+
+let print tree =
+ iter_tree
+ ~pre:
+ (fun n ->
+ match n # node_type with
+ T_element "last-name" ->
+ print_endline ("Last name: " ^ n # data)
+ | T_element "first-name" ->
+ print_endline ("First name: " ^ n # data)
+ | T_element "phone" ->
+ print_endline ("Telephone number: " ^ n # data)
+ | _ ->
+ ())
+ ~post:
+ (fun n ->
+ match n # node_type with
+ T_element "record" ->
+ print_newline()
+ | _ ->
+ ())
+ tree
+;;
+
+let main() =
+ try
+ let dtd = parse_dtd_entity default_config (from_file "record.dtd") in
+ let tree =
+ parse_content_entity default_config (from_channel stdin) dtd default_spec in
+ print tree
+ with
+ x ->
+ prerr_endline(string_of_exn x);
+ exit 1
+;;
+
+
+main();;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/08/22 21:57:43 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/examples/simple_transformation/record.dtd b/helm/DEVEL/pxp/pxp/examples/simple_transformation/record.dtd
new file mode 100644
index 000000000..b054ccd29
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/simple_transformation/record.dtd
@@ -0,0 +1,5 @@
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/examples/simple_transformation/sample.xml b/helm/DEVEL/pxp/pxp/examples/simple_transformation/sample.xml
new file mode 100644
index 000000000..00d36b09b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/simple_transformation/sample.xml
@@ -0,0 +1,18 @@
+
+
+
+ Stolpmann
+ Gerd
+ 997705
+
+
+ Smith
+ Jack
+ 12345
+
+
+ Ützgür
+ xxx
+ 7654
+
+
diff --git a/helm/DEVEL/pxp/pxp/examples/simple_transformation/sort.ml b/helm/DEVEL/pxp/pxp/examples/simple_transformation/sort.ml
new file mode 100644
index 000000000..297730f66
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/simple_transformation/sort.ml
@@ -0,0 +1,83 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* Read a record-list, sort it, and print it as XML *)
+open Pxp_types;;
+open Pxp_document;;
+open Pxp_yacc;;
+
+let sort by tree =
+ map_tree
+ ~pre:
+ (fun n -> n # orphaned_flat_clone)
+ ~post:
+ (fun n ->
+ match n # node_type with
+ T_element "record-list" ->
+ let l = n # sub_nodes in
+ let l' = List.sort
+ (fun a b ->
+ let a_string =
+ try (find_element by a) # data
+ with Not_found -> "" in
+ let b_string =
+ try (find_element by b) # data
+ with Not_found -> "" in
+ Pervasives.compare a_string b_string)
+ l in
+ n # set_nodes l';
+ n
+ | _ ->
+ n)
+ tree
+;;
+
+
+let main() =
+ let criterion = ref "last-name" in
+ Arg.parse
+ [ "-by", Arg.String (fun s -> criterion := s),
+ " (last-name|first-name|phone)";
+ ]
+ (fun _ -> raise (Arg.Bad "Bad usage"))
+ "usage: sort [ options ]";
+ if not(List.mem !criterion ["last-name"; "first-name"; "phone"]) then (
+ prerr_endline ("Unknown criterion: " ^ !criterion);
+ exit 1
+ );
+ try
+ let dtd = parse_dtd_entity default_config (from_file "record.dtd") in
+ let tree =
+ parse_content_entity default_config (from_channel stdin) dtd default_spec
+ in
+ print_endline "";
+ (sort !criterion tree) # write (Out_channel stdout) `Enc_iso88591
+ with
+ x ->
+ prerr_endline(string_of_exn x);
+ exit 1
+;;
+
+
+main();;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/08/30 16:05:44 gerd
+ * Minor update
+ *
+ * Revision 1.2 2000/08/24 09:40:11 gerd
+ * Allow that columns are missing.
+ *
+ * Revision 1.1 2000/08/22 21:57:44 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/examples/validate/.cvsignore b/helm/DEVEL/pxp/pxp/examples/validate/.cvsignore
new file mode 100644
index 000000000..e125622dd
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/validate/.cvsignore
@@ -0,0 +1,13 @@
+*.cmi
+*.cmo
+*.cma
+*.cmx
+*.o
+*.a
+*.cmxa
+*.new
+*.mlf
+*.ml0
+depend
+depend.pkg
+
diff --git a/helm/DEVEL/pxp/pxp/examples/validate/Makefile b/helm/DEVEL/pxp/pxp/examples/validate/Makefile
new file mode 100644
index 000000000..64b691887
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/validate/Makefile
@@ -0,0 +1,28 @@
+# make validate: make bytecode executable
+# make validate.opt: make native executable
+# make clean: remove intermediate files (in this directory)
+# make CLEAN: remove intermediate files (recursively)
+# make distclean: remove any superflous files (recursively)
+#----------------------------------------------------------------------
+
+pxpvalidate: validate.ml
+ ocamlfind ocamlc -o pxpvalidate -package "pxp" -linkpkg validate.ml
+
+pxpvalidate.opt: validate.ml
+ ocamlfind ocamlopt -o pxpvalidate.opt -package "pxp" -linkpkg validate.ml
+
+#----------------------------------------------------------------------
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~
+ rm -f pxpvalidate pxpvalidate.opt
diff --git a/helm/DEVEL/pxp/pxp/examples/validate/validate.ml b/helm/DEVEL/pxp/pxp/examples/validate/validate.ml
new file mode 100644
index 000000000..3bb83d2d1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/validate/validate.ml
@@ -0,0 +1,126 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+open Pxp_document;;
+open Pxp_yacc;;
+open Pxp_types;;
+
+let error_happened = ref false;;
+
+let print_error e =
+ print_endline (string_of_exn e)
+;;
+
+class warner =
+ object
+ method warn w =
+ print_endline ("WARNING: " ^ w)
+ end
+;;
+
+let parse debug wf iso88591 filename =
+ try
+ (* Parse the document: *)
+ let parse_fn =
+ if wf then parse_wfdocument_entity
+ else
+ let index = new hash_index in
+ parse_document_entity
+ ?transform_dtd:None
+ ~id_index:(index :> 'ext index)
+ in
+ let doc =
+ parse_fn
+ { default_config with
+ debugging_mode = debug;
+ encoding = if iso88591 then `Enc_iso88591 else `Enc_utf8;
+ idref_pass = true;
+ warner = new warner
+ }
+ (from_file filename)
+ default_spec
+ in
+ ()
+ with
+ e ->
+ (* Print error; remember that there was an error *)
+ error_happened := true;
+ print_error e
+;;
+
+
+let main() =
+ let debug = ref false in
+ let wf = ref false in
+ let iso88591 = ref false in
+ let files = ref [] in
+ Arg.parse
+ [ "-d", Arg.Set debug,
+ " turn debugging mode on";
+ "-wf", Arg.Set wf,
+ " check only on well-formedness";
+ "-iso-8859-1", Arg.Set iso88591,
+ " use ISO-8859-1 as internal encoding instead of UTF-8";
+ ]
+ (fun x -> files := x :: !files)
+ "
+usage: pxpvalidate [options] file ...
+
+- checks the validity of XML documents. See below for list of options.
+
+PXP - The XML parser for Objective Caml
+
+List of options:";
+ files := List.rev !files;
+ List.iter (parse !debug !wf !iso88591) !files;
+;;
+
+
+main();
+if !error_happened then exit(1);;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:31 lpadovan
+ * Initial revision
+ *
+ * Revision 1.10 2000/08/30 15:58:41 gerd
+ * Updated.
+ *
+ * Revision 1.9 2000/07/14 14:57:30 gerd
+ * Updated: warner
+ *
+ * Revision 1.8 2000/07/14 14:13:15 gerd
+ * Cosmetic changes.
+ *
+ * Revision 1.7 2000/07/14 14:11:06 gerd
+ * Updated because of changes of the PXP API.
+ *
+ * Revision 1.6 2000/07/08 21:53:00 gerd
+ * Updated because of PXP interface changes.
+ *
+ * Revision 1.5 2000/06/04 20:21:55 gerd
+ * Updated to new module names.
+ *
+ * Revision 1.4 2000/05/01 16:44:57 gerd
+ * Added check for ID uniqueness.
+ * Using new error formatter.
+ *
+ * Revision 1.3 1999/11/09 22:27:30 gerd
+ * The programs returns now an exit code of 1 if one of the
+ * XML files produces an error.
+ *
+ * Revision 1.2 1999/09/01 23:09:56 gerd
+ * Added the option -wf that switches to well-formedness checking
+ * instead of validation.
+ *
+ * Revision 1.1 1999/08/14 22:20:53 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/.cvsignore b/helm/DEVEL/pxp/pxp/examples/xmlforms/.cvsignore
new file mode 100644
index 000000000..e125622dd
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/xmlforms/.cvsignore
@@ -0,0 +1,13 @@
+*.cmi
+*.cmo
+*.cma
+*.cmx
+*.o
+*.a
+*.cmxa
+*.new
+*.mlf
+*.ml0
+depend
+depend.pkg
+
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/Makefile b/helm/DEVEL/pxp/pxp/examples/xmlforms/Makefile
new file mode 100644
index 000000000..5a0ba32b3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/xmlforms/Makefile
@@ -0,0 +1,33 @@
+# make xmlforms: make bytecode executable
+# make xmlforms.opt: make native executable
+# make clean: remove intermediate files
+# make CLEAN: remove intermediate files (recursively)
+# make distclean: remove any superflous files
+# make release: cleanup, create archive, tag CVS module
+# (for developers)
+#----------------------------------------------------------------------
+
+.PHONY: xmlforms
+xmlforms:
+ $(MAKE) -f Makefile.code xmlforms
+
+.PHONY: xmlforms.opt
+xmlforms.opt:
+ $(MAKE) -f Makefile.code xmlforms.opt
+
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
+
+.PHONY: CLEAN
+CLEAN: clean
+ $(MAKE) -C styles CLEAN
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~ depend depend.pkg
+ rm -f xmlforms xmlforms.opt
+ $(MAKE) -C styles distclean
+
+
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/Makefile.code b/helm/DEVEL/pxp/pxp/examples/xmlforms/Makefile.code
new file mode 100644
index 000000000..f99674042
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/xmlforms/Makefile.code
@@ -0,0 +1,57 @@
+#----------------------------------------------------------------------
+# specific rules for this package:
+
+OBJECTS = ds_context.cmo ds_style.cmo
+XOBJECTS = $(OBJECTS:.cmo=.cmx)
+ARCHIVE = xmlforms.cma
+XARCHIVE = xmlforms.cmxa
+NAME = xmlforms
+REQUIRES = camltk str pxp
+
+xmlforms: $(ARCHIVE) ds_app.cmo
+ ocamlfind ocamlc -g -o xmlforms -custom -package "$(REQUIRES)" \
+ -linkpkg $(ARCHIVE) ds_app.cmo
+
+xmlform.opt: $(XARCHIVE) ds_app.cmx
+ ocamlfind ocamlopt -o xmlforms.opt -custom -package "$(REQUIRES)" \
+ -linkpkg $(XARCHIVE) ds_app.cmx
+
+$(ARCHIVE): $(OBJECTS)
+ $(OCAMLC) -a -o $(ARCHIVE) $(OBJECTS)
+
+$(XARCHIVE): $(XOBJECTS)
+ $(OCAMLOPT) -a -o $(XARCHIVE) $(XOBJECTS)
+
+#----------------------------------------------------------------------
+# general rules:
+
+OPTIONS =
+OCAMLC = ocamlc -g $(OPTIONS) $(ROPTIONS)
+OCAMLOPT = ocamlopt -p $(OPTIONS) $(ROPTIONS)
+OCAMLDEP = ocamldep $(OPTIONS)
+OCAMLFIND = ocamlfind
+
+depend: *.ml *.mli
+ $(OCAMLDEP) *.ml *.mli >depend
+
+depend.pkg: Makefile
+ $(OCAMLFIND) use -p ROPTIONS= $(REQUIRES) >depend.pkg
+
+.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll .mly
+
+.ml.cmx:
+ $(OCAMLOPT) -c $<
+
+.ml.cmo:
+ $(OCAMLC) -c $<
+
+.mli.cmi:
+ $(OCAMLC) -c $<
+
+.mll.ml:
+ ocamllex $<
+
+*.mli:
+
+include depend
+include depend.pkg
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/README b/helm/DEVEL/pxp/pxp/examples/xmlforms/README
new file mode 100644
index 000000000..806a4094a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/xmlforms/README
@@ -0,0 +1,61 @@
+-----------------------------------------------------------------------------
+xmlforms
+-----------------------------------------------------------------------------
+
+THE IDEA:
+
+This example uses XML for two purposes:
+
+- The "story" and layout of the application is specified in XML
+- The data records are stored in XML
+
+An "application" is a set of "masks" or sequences of masks, and every mask
+is thought as a visible page of the application, containing layout
+elements and functional elements. Layout is specified in TeX-style using
+hboxes, vboxes, hspaces, vspaces. Functional elements are "entries" (input
+box for a string with one line), "textboxes" (input boxes with several
+lines), and buttons.
+
+See styles/ds-style.dtd for the DTD of an application specification, and
+the other xml files in this directory for examples.
+
+The entries and textboxes are bound to "slots", i.e. string variables. If
+the application is started, the slots are read from a file, and if the
+user presses a special "save" button, the slots are stored into this file.
+The format of this data file is again XML; the simplistic DTD can be found
+in styles/ds-object.dtd.
+
+
+THE IMPLEMENTATION:
+
+There is currently a mapping of the specifications to ocamltk, done by a
+program called "xmlforms".
+
+
+HOW TO COMPILE:
+
+It is assumed that "findlib" is present on your system; see ABOUT-FINDLIB
+in the toplevel directory.
+The "markup" module must have been installed.
+
+- "make xmlforms" produces a bytecode executable "xmlforms"
+- "make xmlforms.opt" produces a native executable "xmlforms.opt"
+
+Note that you cannot start the executables directly:
+
+
+HOW TO START AN APPLICATION:
+
+As "xmlforms" is a generic executable, there is a simple mechanism to bind
+it to a specific instance of an application. For example, in the "styles"
+subdirectory there is the application specification "crazy-style.xml". To
+start it, make a symlink called "crazy" referring to the "xmlforms"
+binary, set the environment variable DATASHEETS to the directory where the
+DTDs and XML files can be found, and start "crazy":
+
+ ln -s ../xmlforms crazy
+ DATASHEETS=. crazy my-record.xml
+
+(If you do not set DATASHEETS, a default directory, normally
+"/opt/xmlforms/lib" is used.)
+
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_app.ml b/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_app.ml
new file mode 100644
index 000000000..55589ea59
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_app.ml
@@ -0,0 +1,107 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Tk
+open Pxp_types
+open Pxp_document
+open Pxp_yacc
+open Ds_context
+open Ds_style
+
+
+let installdir =
+ try Sys.getenv "DATASHEETS" with
+ Not_found -> "/opt/xmlforms/lib"
+let style_sysid = ref ""
+let object_dtd_sysid = Filename.concat installdir "ds-object.dtd"
+let object_dtd_root = "record"
+
+
+let rec print_error e =
+ print_endline (string_of_exn e)
+;;
+
+
+let run f arg1 arg2 =
+ try f arg1 arg2 with
+ e -> print_error e
+;;
+
+
+let edit filename cmd =
+ (* read in style definition *)
+ let index = new hash_index in
+ let style =
+ parse_document_entity
+ ~id_index:(index :> 'ext index)
+ default_config
+ (from_file !style_sysid)
+ tag_map
+ in
+ let root = style # root in
+ root # extension # prepare (index :> 'ext index);
+
+ let obj_dtd =
+ parse_dtd_entity
+ default_config
+ (from_file object_dtd_sysid)
+ in
+ obj_dtd # set_root object_dtd_root;
+
+ let topframe = openTk() in
+ let context = new context filename obj_dtd index root topframe in
+
+ Toplevel.configure topframe [ Width (Centimeters 20.0);
+ Height (Centimeters 12.0);
+ ];
+ Pack.propagate_set topframe false;
+ Wm.title_set topframe cmd;
+ context # goto (root # extension # start_node_name);
+ mainLoop()
+;;
+
+
+let main() =
+ let cmd = Filename.basename Sys.argv.(0) in
+ match Sys.argv with
+ [| _; filename |] ->
+ style_sysid := Filename.concat installdir (cmd ^ "-style.xml");
+ run edit filename cmd
+ | _ ->
+ prerr_endline ("usage: " ^ cmd ^ " filename");
+ exit(1)
+;;
+
+main();;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.6 2000/07/16 19:36:03 gerd
+ * Updated.
+ *
+ * Revision 1.5 2000/07/08 22:03:11 gerd
+ * Updates because of PXP interface changes.
+ *
+ * Revision 1.4 2000/06/04 20:29:19 gerd
+ * Updates because of renamed PXP modules.
+ *
+ * Revision 1.3 2000/05/01 16:48:45 gerd
+ * Using the new error formatter.
+ *
+ * Revision 1.2 1999/12/17 21:34:29 gerd
+ * The name of the root element is set to "record" in the
+ * object_dtd; otherwise the parser would not check that the root
+ * element is the right element.
+ *
+ * Revision 1.1 1999/08/21 19:11:05 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_context.ml b/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_context.ml
new file mode 100644
index 000000000..453ca00f0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_context.ml
@@ -0,0 +1,238 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_types
+open Pxp_document
+open Pxp_yacc
+
+let empty_record = new element_impl (Pxp_yacc.default_extension);;
+let empty_dnode = new data_impl Pxp_yacc.default_extension;;
+
+class context the_filename the_obj_dtd the_index the_root the_topframe =
+ object (self)
+ val filename = the_filename
+ val obj_dtd = the_obj_dtd
+ val node_index = the_index
+ val mutable obj = empty_record # create_element
+ the_obj_dtd (T_element "record") []
+ val root = the_root
+ val topframe = the_topframe
+ val mutable wdg = None
+
+ val mutable history = ( [| |] : string array )
+ val mutable index = 0
+
+ initializer
+ self # load_obj
+
+ method obj = obj
+
+ (* history *)
+
+ method private leave_node =
+ begin match wdg with
+ None -> ()
+ | Some w -> Tk.destroy w
+ end;
+ wdg <- None
+
+ method private enter_node =
+ let where = history.(index) in
+ let n =
+ try node_index # find where with
+ Not_found -> failwith ("Mask not found: " ^ where) in
+ let w = n # extension # create_widget topframe self in
+ Tk.pack [w] (n # extension # pack_opts @ [ Tk.Expand true] );
+ wdg <- Some w
+
+
+
+ method previous =
+ if index > 0 then
+ index <- index - 1
+ else
+ raise Not_found;
+ self # leave_node;
+ self # enter_node;
+
+
+ method next =
+ if index < Array.length history - 1 then
+ index <- index + 1
+ else
+ raise Not_found;
+ self # leave_node;
+ self # enter_node;
+
+
+ method goto where =
+ assert (index <= Array.length history);
+ self # leave_node;
+ let persisting_history =
+ if index < Array.length history then
+ Array.sub history 0 (index+1)
+ else
+ history
+ in
+ history <- Array.concat [ persisting_history; [| where |] ];
+ index <- Array.length history - 1;
+ self # enter_node;
+
+
+ method current =
+ if index < Array.length history then
+ history.(index)
+ else
+ raise Not_found
+
+
+ (* read, write the slots of object *)
+
+ method search_slot name =
+ let rec search n =
+ match n # node_type with
+ T_element "string" ->
+ if n # required_string_attribute "name" = name then
+ n
+ else raise Not_found
+ | T_element _ ->
+ search_list (n # sub_nodes)
+ | T_data ->
+ raise Not_found
+ | _ ->
+ assert false
+
+ and search_list l =
+ match l with
+ x :: l' ->
+ (try search x with Not_found -> search_list l')
+ | [] ->
+ raise Not_found
+ in
+ search obj
+
+ method get_slot name =
+ let d = (self # search_slot name) # data in
+ d
+
+ method set_slot name value =
+ let dtd = obj # dtd in
+ begin try
+ let n = self # search_slot name in
+ n # delete
+ with
+ Not_found -> ()
+ end;
+ let e_string = empty_record # create_element dtd (T_element "string")
+ [ "name", name ] in
+ let dnode = empty_dnode # create_data dtd value in
+ e_string # add_node dnode;
+ e_string # local_validate();
+ obj # add_node e_string;
+ assert(self # get_slot name = value)
+
+ (* load, save object *)
+
+
+ method load_obj =
+ if Sys.file_exists filename then begin
+ obj <- parse_content_entity
+ default_config
+ (from_file filename)
+ obj_dtd
+ default_spec
+ end
+ else begin
+ print_string "New file!\n";
+ flush stdout
+ end
+
+
+ method save_obj =
+ let fd = open_out filename in
+ try
+
+ let re1 = Str.regexp "&" in
+ let re2 = Str.regexp "<" in
+ let re3 = Str.regexp "'" in
+ let re4 = Str.regexp ">" in
+ let protect s =
+ let s1 = Str.global_replace re1 "&" s in
+ let s2 = Str.global_replace re2 "<" s1 in
+ let s3 = Str.global_replace re3 "'" s2 in
+ let s4 = Str.global_replace re2 ">" s1 in
+ s3
+ in
+
+ let rec iterate (n : 'node extension node as 'node) =
+ match n # node_type with
+ T_data ->
+ output_string fd (protect (n # data))
+ | T_element name ->
+ output_string fd ("<" ^ name ^ "\n");
+ let anames = n # attribute_names in
+ List.iter
+ (fun aname ->
+ let aval = n # attribute aname in
+ let v =
+ match aval with
+ Value s ->
+ aname ^ "='" ^ protect s ^ "'\n"
+ | Valuelist l ->
+ aname ^ "='" ^ String.concat " " (List.map protect l) ^ "'\n"
+ | Implied_value ->
+ ""
+ in
+ output_string fd v)
+ anames;
+ output_string fd ">";
+ List.iter iterate (n # sub_nodes);
+ output_string fd ("" ^ name ^ "\n>");
+ | _ ->
+ assert false
+ in
+
+ output_string fd "\n";
+ iterate obj;
+ close_out fd
+ with
+ e ->
+ close_out fd;
+ raise e
+
+ end
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:31 lpadovan
+ * Initial revision
+ *
+ * Revision 1.7 2000/08/30 15:58:49 gerd
+ * Updated.
+ *
+ * Revision 1.6 2000/07/23 20:25:05 gerd
+ * Update because of API change: local_validate.
+ *
+ * Revision 1.5 2000/07/16 19:36:03 gerd
+ * Updated.
+ *
+ * Revision 1.4 2000/07/08 22:03:11 gerd
+ * Updates because of PXP interface changes.
+ *
+ * Revision 1.3 2000/06/04 20:29:19 gerd
+ * Updates because of renamed PXP modules.
+ *
+ * Revision 1.2 2000/05/30 00:09:08 gerd
+ * Minor fix.
+ *
+ * Revision 1.1 1999/08/21 19:11:05 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_style.ml b/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_style.ml
new file mode 100644
index 000000000..08d0daa03
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_style.ml
@@ -0,0 +1,778 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_types
+open Pxp_document
+open Ds_context
+
+
+let get_dimension s =
+ let re = Str.regexp "\\([0-9]*\\(.[0-9]+\\)?\\)[ \t\n]*\\(px\\|cm\\|in\\|mm\\|pt\\)" in
+ if Str.string_match re s 0 then begin
+ let number = Str.matched_group 1 s in
+ let dim = Str.matched_group 3 s in
+ match dim with
+ "px" -> Tk.Pixels (int_of_float (float_of_string number))
+ | "cm" -> Tk.Centimeters (float_of_string number)
+ | "in" -> Tk.Inches (float_of_string number)
+ | "mm" -> Tk.Millimeters (float_of_string number)
+ | "pt" -> Tk.PrinterPoint (float_of_string number)
+ | _ -> assert false
+ end
+ else
+ failwith ("Bad dimension: " ^ s)
+;;
+
+
+class virtual shared =
+ object(self)
+
+ (* --- default_ext --- *)
+
+ val mutable node = (None : shared node option)
+
+ method clone = {< >}
+ method node =
+ match node with
+ None ->
+ assert false
+ | Some n -> n
+ method set_node n =
+ node <- Some n
+
+ (* --- shared attributes: color & font settings --- *)
+
+ val mutable fgcolor = (None : string option)
+ val mutable bgcolor = (None : string option)
+ val mutable font = (None : string option)
+
+ method fgcolor =
+ (* Get the foreground color: If there is a local value, return it;
+ * otherwise ask parent node
+ *)
+ match fgcolor with
+ Some c -> c
+ | None -> try self # node # parent # extension # fgcolor with
+ Not_found -> failwith "#fgcolor"
+
+ method bgcolor =
+ (* Get the background color: If there is a local value, return it;
+ * otherwise ask parent node
+ *)
+ match bgcolor with
+ Some c -> c
+ | None -> try self # node # parent # extension # bgcolor with
+ Not_found -> failwith "#bgcolor"
+
+ method font =
+ (* Get the current font: If there is a local value, return it;
+ * otherwise ask parent node
+ *)
+ match font with
+ Some c -> c
+ | None -> try self # node # parent # extension # font with
+ Not_found -> failwith "#font"
+
+ method private init_color_and_font =
+ let get_color n =
+ try
+ match self # node # attribute n with
+ Value v -> Some v
+ | Implied_value -> None
+ | _ -> assert false
+ with Not_found -> None in
+ fgcolor <- get_color "fgcolor";
+ bgcolor <- get_color "bgcolor";
+ font <- get_color "font"; (* sic! *)
+
+
+ method private bg_color_opt =
+ [ Tk.Background (Tk.NamedColor (self # bgcolor)) ]
+
+ method private fg_color_opt =
+ [ Tk.Foreground (Tk.NamedColor (self # fgcolor)) ]
+
+ method private font_opt =
+ [ Tk.Font (self # font) ]
+
+ (* --- virtual --- *)
+
+ method virtual prepare : shared Pxp_yacc.index -> unit
+ method virtual create_widget : Widget.widget -> context -> Widget.widget
+
+ method pack_opts = ( [] : Tk.options list )
+ method xstretchable = false
+ method ystretchable = false
+
+ method accept (c:context) = ()
+
+ method private get_mask =
+ (* find parent which is a mask *)
+ let rec search n =
+ match n # node_type with
+ T_element "mask" ->
+ n # extension
+ | T_element _ ->
+ search (n # parent)
+ | _ ->
+ assert false
+ in
+ search (self # node)
+
+
+ method private accept_mask (c:context) =
+ let rec iterate n =
+ n # extension # accept c;
+ List.iter iterate (n # sub_nodes)
+ in
+ iterate (self # get_mask # node)
+
+
+ method start_node_name =
+ (failwith "#start_node_name" : string)
+
+ (* --- debug --- *)
+
+ method private name =
+ let nt = self # node # node_type in
+ match nt with
+ T_element n -> n
+ | T_data -> "#PCDATA"
+ | _ -> assert false
+
+ end
+;;
+
+
+class default =
+ object (self)
+ inherit shared
+
+ method prepare idx =
+ self # init_color_and_font
+
+ method create_widget w c =
+ failwith "default # create_widget"
+ end
+;;
+
+
+let dummy_node = new element_impl (new default);;
+
+class application =
+ object (self)
+ inherit shared
+
+ val mutable start_node = dummy_node
+
+ method prepare idx =
+ (* prepare this node *)
+ self # init_color_and_font;
+ if fgcolor = None then fgcolor <- Some "black";
+ if bgcolor = None then bgcolor <- Some "white";
+ if font = None then font <- Some "fixed";
+ let start =
+ match self # node # attribute "start" with
+ Value v -> v
+ | _ -> assert false in
+ start_node <- (try idx # find start with
+ Not_found -> failwith "Start node not found");
+ (* iterate over the subtree *)
+ let rec iterate n =
+ n # extension # prepare idx;
+ List.iter iterate (n # sub_nodes)
+ in
+ List.iter iterate (self # node # sub_nodes)
+
+
+ method start_node_name =
+ match self # node # attribute "start" with
+ Value v -> v
+ | _ -> assert false
+
+ method create_widget w c =
+ start_node # extension # create_widget w c
+
+ method pack_opts =
+ start_node # extension # pack_opts
+ end
+;;
+
+
+class sequence =
+ object (self)
+ inherit shared
+
+ method prepare idx =
+ self # init_color_and_font;
+
+ method create_widget w c =
+ let node = List.hd (self # node # sub_nodes) in
+ node # extension # create_widget w c
+
+ method pack_opts =
+ let node = List.hd (self # node # sub_nodes) in
+ node # extension # pack_opts
+ end
+;;
+
+
+class vbox =
+ object (self)
+ inherit shared
+
+ val mutable att_halign = "left"
+
+ method prepare idx =
+ self # init_color_and_font;
+ match self # node # attribute "halign" with
+ Value v -> att_halign <- v
+ | _ -> assert false
+
+ method create_widget w c =
+ let f = Frame.create w (self # bg_color_opt) in
+ let nodes = self # node # sub_nodes in
+ let options =
+ match att_halign with
+ "left" -> [ Tk.Anchor Tk.W ]
+ | "right" -> [ Tk.Anchor Tk.E ]
+ | "center" -> [ Tk.Anchor Tk.Center ]
+ | _ -> assert false
+ in
+ List.iter
+ (fun n ->
+ let opts = n # extension # pack_opts in
+ let wdg = n # extension # create_widget f c in
+ Tk.pack [wdg] (options @ opts);
+ )
+ nodes;
+ f
+
+ method pack_opts =
+ match self # xstretchable, self # ystretchable with
+ true, false -> [ Tk.Fill Tk.Fill_X; (* Tk.Expand true *) ]
+ | false, true -> [ Tk.Fill Tk.Fill_Y; (* Tk.Expand true *) ]
+ | true, true -> [ Tk.Fill Tk.Fill_Both; (* Tk.Expand true *) ]
+ | false, false -> []
+
+ method xstretchable =
+ let nodes = self # node # sub_nodes in
+ List.exists (fun n -> n # extension # xstretchable) nodes
+
+ method ystretchable =
+ let nodes = self # node # sub_nodes in
+ List.exists (fun n -> n # extension # ystretchable) nodes
+
+ end
+
+;;
+
+
+class mask =
+ object (self)
+
+ inherit vbox
+
+ method prepare idx =
+ self # init_color_and_font;
+ att_halign <- "left"
+ end
+;;
+
+
+class hbox =
+ object (self)
+ inherit shared
+
+ val mutable att_width = None
+ val mutable att_halign = "left"
+ val mutable att_valign = "top"
+
+ method prepare idx =
+ self # init_color_and_font;
+ begin match self # node # attribute "halign" with
+ Value v -> att_halign <- v
+ | _ -> assert false
+ end;
+ begin match self # node # attribute "valign" with
+ Value v -> att_valign <- v
+ | _ -> assert false
+ end;
+ begin match self # node # attribute "width" with
+ Value v -> att_width <- Some (get_dimension v)
+ | Implied_value -> att_width <- None
+ | _ -> assert false
+ end
+
+ method create_widget w c =
+ let f1 = Frame.create w (self # bg_color_opt) in
+ let f_extra =
+ match att_width with
+ None -> []
+ | Some wd ->
+ [ Canvas.create f1
+ ( [ Tk.Width wd; Tk.Height (Tk.Pixels 0);
+ Tk.Relief Tk.Flat;
+ Tk.HighlightThickness (Tk.Pixels 0);
+ ] @
+ self # bg_color_opt ) ]
+ in
+ let f2 = Frame.create f1 (self # bg_color_opt) in
+ let nodes = self # node # sub_nodes in
+
+ let outer_pack_opts =
+ match att_halign with
+ "left" -> [ Tk.Anchor Tk.W ]
+ | "right" -> [ Tk.Anchor Tk.E ]
+ | "center" -> [ Tk.Anchor Tk.Center ]
+ | _ -> assert false
+ in
+ let inner_pack_opts =
+ match att_valign with
+ "top" -> [ Tk.Anchor Tk.N ]
+ | "bottom" -> [ Tk.Anchor Tk.S ]
+ | "center" -> [ Tk.Anchor Tk.Center ]
+ | _ -> assert false
+ in
+ List.iter
+ (fun n ->
+ let opts = n # extension # pack_opts in
+ let wdg = n # extension # create_widget f2 c in
+ Tk.pack [wdg] (inner_pack_opts @ [ Tk.Side Tk.Side_Left ] @ opts);
+ )
+ nodes;
+ let extra_opts = self # pack_opts in
+ Tk.pack (f_extra @ [f2]) (outer_pack_opts @ extra_opts);
+ f1
+
+ method pack_opts =
+ match self # xstretchable, self # ystretchable with
+ true, false -> [ Tk.Fill Tk.Fill_X; (* Tk.Expand true *) ]
+ | false, true -> [ Tk.Fill Tk.Fill_Y; (* Tk.Expand true *) ]
+ | true, true -> [ Tk.Fill Tk.Fill_Both; (* Tk.Expand true *) ]
+ | false, false -> []
+
+ method xstretchable =
+ let nodes = self # node # sub_nodes in
+ List.exists (fun n -> n # extension # xstretchable) nodes
+
+ method ystretchable =
+ let nodes = self # node # sub_nodes in
+ List.exists (fun n -> n # extension # ystretchable) nodes
+
+ end
+;;
+
+class vspace =
+ object (self)
+ inherit shared
+
+ val mutable att_height = Tk.Pixels 0
+ val mutable att_fill = false
+
+ method prepare idx =
+ self # init_color_and_font;
+ begin match self # node # attribute "height" with
+ Value v -> att_height <- get_dimension v
+ | _ -> assert false
+ end;
+ begin match self # node # attribute "fill" with
+ Value "yes" -> att_fill <- true
+ | Value "no" -> att_fill <- false
+ | _ -> assert false
+ end
+
+
+ method create_widget w c =
+ let f = Frame.create w ( self # bg_color_opt ) in
+ let strut =
+ Canvas.create f
+ ( [ Tk.Height att_height; Tk.Width (Tk.Pixels 0);
+ Tk.Relief Tk.Flat;
+ Tk.HighlightThickness (Tk.Pixels 0);
+ ] @
+ self # bg_color_opt ) in
+ if att_fill then
+ Tk.pack [strut] [Tk.Fill Tk.Fill_Y; Tk.Expand true]
+ else
+ Tk.pack [strut] [];
+ f
+
+ method pack_opts =
+ if att_fill then [ Tk.Fill Tk.Fill_Y; Tk.Expand true ] else []
+
+ method ystretchable = att_fill
+ end
+;;
+
+class hspace =
+ object (self)
+ inherit shared
+
+
+ val mutable att_width = Tk.Pixels 0
+ val mutable att_fill = false
+
+ method prepare idx =
+ self # init_color_and_font;
+ begin match self # node # attribute "width" with
+ Value v -> att_width <- get_dimension v
+ | _ -> assert false
+ end;
+ begin match self # node # attribute "fill" with
+ Value "yes" -> att_fill <- true
+ | Value "no" -> att_fill <- false
+ | _ -> assert false
+ end
+
+
+ method create_widget w c =
+ let f = Frame.create w ( self # bg_color_opt ) in
+ let strut =
+ Canvas.create f
+ ( [ Tk.Width att_width; Tk.Height (Tk.Pixels 0);
+ Tk.Relief Tk.Flat;
+ Tk.HighlightThickness (Tk.Pixels 0);
+ ] @
+ self # bg_color_opt ) in
+ if att_fill then
+ Tk.pack [strut] [Tk.Fill Tk.Fill_X; Tk.Expand true]
+ else
+ Tk.pack [strut] [];
+ f
+
+ method pack_opts =
+ if att_fill then [ Tk.Fill Tk.Fill_X; Tk.Expand true ] else []
+
+ method xstretchable = att_fill
+ end
+;;
+
+class label =
+ object (self)
+ inherit shared
+
+ val mutable att_textwidth = (-1)
+ val mutable att_halign = "left"
+
+ method prepare idx =
+ self # init_color_and_font;
+ att_textwidth <- (match self # node # attribute "textwidth" with
+ Value v ->
+ let w = try int_of_string v
+ with _ -> failwith ("Not an integer: " ^ v) in
+ w
+ | Implied_value ->
+ (-1)
+ | _ -> assert false);
+ att_halign <- (match self # node # attribute "halign" with
+ Value v -> v
+ | _ -> assert false);
+
+
+ method create_widget w c =
+ let opts_textwidth = if att_textwidth < 0 then [] else
+ [ Tk.TextWidth att_textwidth ] in
+ let opts_halign =
+ match att_halign with
+ "left" -> [ Tk.Anchor Tk.W ]
+ | "right" -> [ Tk.Anchor Tk.E ]
+ | "center" -> [ Tk.Anchor Tk.Center ]
+ | _ -> assert false
+ in
+ let opts_content =
+ [ Tk.Text (self # node # data) ] in
+ let label = Label.create w (opts_textwidth @ opts_halign @
+ opts_content @ self # bg_color_opt @
+ self # fg_color_opt @ self # font_opt) in
+ label
+
+ end
+;;
+
+class entry =
+ object (self)
+ inherit shared
+
+ val mutable tv = lazy (Textvariable.create())
+ val mutable att_textwidth = (-1)
+ val mutable att_slot = ""
+
+ method prepare idx =
+ self # init_color_and_font;
+ tv <- lazy (Textvariable.create());
+ att_textwidth <- (match self # node # attribute "textwidth" with
+ Value v ->
+ let w = try int_of_string v
+ with _ -> failwith ("Not an integer: " ^ v) in
+ w
+ | Implied_value ->
+ (-1)
+ | _ -> assert false);
+ att_slot <- (match self # node # attribute "slot" with
+ Value v -> v
+ | _ -> assert false);
+
+ method create_widget w c =
+ let opts_textwidth = if att_textwidth < 0 then [] else
+ [ Tk.TextWidth att_textwidth ] in
+ let e = Entry.create w ( [ Tk.TextVariable (Lazy.force tv) ] @
+ self # fg_color_opt @
+ self # bg_color_opt @
+ self # font_opt @
+ opts_textwidth
+ ) in
+ let s =
+ try c # get_slot att_slot with
+ Not_found -> self # node # data in
+ Textvariable.set (Lazy.force tv) s;
+ e
+
+ method accept c =
+ c # set_slot att_slot (Textvariable.get (Lazy.force tv))
+
+ end
+;;
+
+class textbox =
+ object (self)
+ inherit shared
+
+ val mutable att_textwidth = (-1)
+ val mutable att_textheight = (-1)
+ val mutable att_slot = ""
+ val mutable last_widget = None
+
+ method prepare idx =
+ self # init_color_and_font;
+ att_textwidth <- (match self # node # attribute "textwidth" with
+ Value v ->
+ let w = try int_of_string v
+ with _ -> failwith ("Not an integer: " ^ v) in
+ w
+ | Implied_value ->
+ (-1)
+ | _ -> assert false);
+ att_textheight <- (match self # node # attribute "textheight" with
+ Value v ->
+ let w = try int_of_string v
+ with _ -> failwith ("Not an integer: " ^ v) in
+ w
+ | Implied_value ->
+ (-1)
+ | _ -> assert false);
+ att_slot <- (match self # node # attribute "slot" with
+ Value v -> v
+ | Implied_value -> ""
+ | _ -> assert false);
+
+
+ method create_widget w c =
+ let opts_textwidth = if att_textwidth < 0 then [] else
+ [ Tk.TextWidth att_textwidth ] in
+ let opts_textheight = if att_textheight < 0 then [] else
+ [ Tk.TextHeight att_textheight ] in
+ let f = Frame.create w (self # bg_color_opt) in
+ let vscrbar = Scrollbar.create f [ Tk.Orient Tk.Vertical ] in
+ let e = Text.create f ( [ ] @
+ self # fg_color_opt @
+ self # bg_color_opt @
+ self # font_opt @
+ opts_textwidth @ opts_textheight
+ ) in
+ last_widget <- Some e;
+ Scrollbar.configure vscrbar [ Tk.ScrollCommand
+ (fun s -> Text.yview e s);
+ Tk.Width (Tk.Pixels 9) ];
+ Text.configure e [ Tk.YScrollCommand
+ (fun a b -> Scrollbar.set vscrbar a b) ];
+ let s =
+ if att_slot <> "" then
+ try c # get_slot att_slot with
+ Not_found -> self # node # data
+ else
+ self # node # data
+ in
+ (* Text.insert appends always a newline to the last line; so strip
+ * an existing newline first
+ *)
+ let s' =
+ if s <> "" & s.[String.length s - 1] = '\n' then
+ String.sub s 0 (String.length s - 1)
+ else
+ s in
+ Text.insert e (Tk.TextIndex(Tk.End,[])) s' [];
+ if att_slot = "" then
+ Text.configure e [ Tk.State Tk.Disabled ];
+ Tk.pack [e] [ Tk.Side Tk.Side_Left ];
+ Tk.pack [vscrbar] [ Tk.Side Tk.Side_Left; Tk.Fill Tk.Fill_Y ];
+ f
+
+ method accept c =
+ if att_slot <> "" then
+ match last_widget with
+ None -> ()
+ | Some w ->
+ let s =
+ Text.get
+ w
+ (Tk.TextIndex(Tk.LineChar(1,0),[]))
+ (Tk.TextIndex(Tk.End,[])) in
+ c # set_slot att_slot s
+
+ end
+;;
+
+class button =
+ object (self)
+ inherit shared
+
+ val mutable att_label = ""
+ val mutable att_action = ""
+ val mutable att_goto = ""
+
+ method prepare idx =
+ self # init_color_and_font;
+ att_label <- (match self # node # attribute "label" with
+ Value v -> v
+ | _ -> assert false);
+ att_action <- (match self # node # attribute "action" with
+ Value v -> v
+ | _ -> assert false);
+ att_goto <- (match self # node # attribute "goto" with
+ Value v -> v
+ | Implied_value -> ""
+ | _ -> assert false);
+ if att_action = "goto" then begin
+ try let _ = idx # find att_goto in () with
+ Not_found -> failwith ("Target `" ^ att_goto ^ "' not found")
+ end;
+ if att_action = "list-prev" or att_action = "list-next" then begin
+ let m = self # get_mask in
+ if m # node # parent # node_type <> T_element "sequence" then
+ failwith ("action " ^ att_action ^ " must not be used out of ");
+ end;
+
+
+ method create_widget w c =
+ let cmd () =
+ self # accept_mask c;
+ match att_action with
+ "goto" ->
+ c # goto att_goto
+ | "save" ->
+ c # save_obj
+ | "exit" ->
+ Protocol.closeTk()
+ | "save-exit" ->
+ c # save_obj;
+ Protocol.closeTk()
+ | "list-prev" ->
+ let m = self # get_mask # node in
+ let s = m # parent in
+ let rec search l =
+ match l with
+ x :: y :: l' ->
+ if y == m then
+ match x # attribute "name" with
+ Value s -> c # goto s
+ | _ -> assert false
+ else
+ search (y :: l')
+ | _ -> ()
+ in
+ search (s # sub_nodes)
+ | "list-next" ->
+ let m = self # get_mask # node in
+ let s = m # parent in
+ let rec search l =
+ match l with
+ x :: y :: l' ->
+ if x == m then
+ match y # attribute "name" with
+ Value s -> c # goto s
+ | _ -> assert false
+ else
+ search (y :: l')
+ | _ -> ()
+ in
+ search (s # sub_nodes)
+ | "hist-prev" ->
+ (try c # previous with Not_found -> ())
+ | "hist-next" ->
+ (try c # next with Not_found -> ())
+ | _ -> ()
+ in
+ let b = Button.create w ( [ Tk.Text att_label; Tk.Command cmd ] @
+ self # fg_color_opt @
+ self # bg_color_opt @
+ self # font_opt ) in
+ b
+
+
+ end
+;;
+
+
+(**********************************************************************)
+
+open Pxp_yacc
+
+let tag_map =
+ make_spec_from_mapping
+ ~data_exemplar:(new data_impl (new default))
+ ~default_element_exemplar:(new element_impl (new default))
+ ~element_mapping:
+ (let m = Hashtbl.create 50 in
+ Hashtbl.add m "application"
+ (new element_impl (new application));
+ Hashtbl.add m "sequence"
+ (new element_impl (new sequence));
+ Hashtbl.add m "mask"
+ (new element_impl (new mask));
+ Hashtbl.add m "vbox"
+ (new element_impl (new vbox));
+ Hashtbl.add m "hbox"
+ (new element_impl (new hbox));
+ Hashtbl.add m "vspace"
+ (new element_impl (new vspace));
+ Hashtbl.add m "hspace"
+ (new element_impl (new hspace));
+ Hashtbl.add m "label"
+ (new element_impl (new label));
+ Hashtbl.add m "entry"
+ (new element_impl (new entry));
+ Hashtbl.add m "textbox"
+ (new element_impl (new textbox));
+ Hashtbl.add m "button"
+ (new element_impl (new button));
+ m)
+ ()
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:31 lpadovan
+ * Initial revision
+ *
+ * Revision 1.5 2000/08/30 15:58:49 gerd
+ * Updated.
+ *
+ * Revision 1.4 2000/07/16 19:36:03 gerd
+ * Updated.
+ *
+ * Revision 1.3 2000/07/08 22:03:11 gerd
+ * Updates because of PXP interface changes.
+ *
+ * Revision 1.2 2000/06/04 20:29:19 gerd
+ * Updates because of renamed PXP modules.
+ *
+ * Revision 1.1 1999/08/21 19:11:05 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/Makefile b/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/Makefile
new file mode 100644
index 000000000..c0068a59d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/Makefile
@@ -0,0 +1,16 @@
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~
+
+.PHONY: symlinks
+symlinks:
+ for x in *-style.xml; do ln -s ../xmlforms $${x%-style.xml} || true; done
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/address-style.xml b/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/address-style.xml
new file mode 100644
index 000000000..d3af5daa0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/address-style.xml
@@ -0,0 +1,361 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ More about person...
+
+
+
+
+
+
+
+
+
+ '>
+
+
+ Help
+
+
+
+
+
+
+
+
+
+ '>
+
+
+ About xmlforms
+
+
+
+
+
+
+
+
+
+ '>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ '>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ '>
+
+
+
+]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ A sample xmlforms application:
+ Address editor
+
+
+
+
+
+ Name:
+
+
+
+
+
+ Postal address:
+
+
+
+
+
+ Email:
+
+
+
+
+
+ Telephone number:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ &headline;
+
+ Department
+
+ The person is working in this department:
+
+
+
+
+
+ The project he/she is working for:
+
+
+
+
+
+
+
+
+
+
+
+
+
+ &footline;
+
+
+
+
+
+
+
+
+
+
+ &headline;
+
+ Business Contacts
+
+ Notes about contacts:
+
+
+
+
+
+
+
+
+
+
+
+
+
+ &footline;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ &help.headline;
+
+ Department
+
+ The help system should be designed to help you filling out your form, but
+writing help texts is so stupid...
+
+
+
+
+
+
+
+
+ &help.footline;
+
+
+
+
+
+
+
+
+
+ &help.headline;
+
+ Business Contacts
+
+ It is often helpful to remember the last telephone and/or email contacts
+quickly.
+
+
+
+
+
+
+
+
+ &help.footline;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ &info.headline;
+
+
+ ,
+written by Gerd Stolpmann
+
+Contact: Gerd.Stolpmann@darmstadt.netsurf.de
+]]>
+
+
+
+
+
+
+
+ &info.footline;
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/crazy-style.xml b/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/crazy-style.xml
new file mode 100644
index 000000000..cce8df007
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/crazy-style.xml
@@ -0,0 +1,62 @@
+
+
+
+ '>
+]
+>
+
+
+
+
+one
+Number two
+
+
+a1
+
+a2
+
+b
+c
+
+
+A Text
+
+A very long label, bigger than the box
+
+
+right
+
+
+
+
+
+
+
+
+
+
+
+n1
+&vz;
+
+
+n2
+&vz;
+
+
+n3
+&vz;
+
+
+n4
+&vz;
+
+
+n5
+&vz;
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/ds-object.dtd b/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/ds-object.dtd
new file mode 100644
index 000000000..750300cd5
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/ds-object.dtd
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/ds-style.dtd b/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/ds-style.dtd
new file mode 100644
index 000000000..2f8b7a278
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/ds-style.dtd
@@ -0,0 +1,183 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/mini-style.xml b/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/mini-style.xml
new file mode 100644
index 000000000..844235cc4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/mini-style.xml
@@ -0,0 +1,9 @@
+
+
+
+
+
+This is a label
+
+
diff --git a/helm/DEVEL/pxp/pxp/lexers/Makefile b/helm/DEVEL/pxp/pxp/lexers/Makefile
new file mode 100644
index 000000000..63ade7050
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/lexers/Makefile
@@ -0,0 +1,34 @@
+all_iso88591: generate_iso88591
+ $(MAKE) -f Makefile.code all_iso88591
+
+opt_iso88591: generate_iso88591
+ $(MAKE) -f Makefile.code opt_iso88591
+
+all_utf8: generate_utf8
+ $(MAKE) -f Makefile.code all_utf8
+
+opt_utf8: generate_utf8
+ $(MAKE) -f Makefile.code opt_utf8
+
+
+
+generate_iso88591:
+ $(MAKE) -f Makefile.generate all_iso88591
+ rm -f objects_iso88591 objects_utf8
+ $(MAKE) -f Makefile.generate objects_iso88591
+ touch objects_utf8
+ $(MAKE) -f Makefile.generate depend
+
+generate_utf8:
+ $(MAKE) -f Makefile.generate all_utf8
+ rm -f objects_iso88591 objects_utf8
+ $(MAKE) -f Makefile.generate objects_utf8
+ touch objects_iso88591
+ $(MAKE) -f Makefile.generate depend
+
+
+
+clean:
+ touch depend objects
+ $(MAKE) -f Makefile.code clean
+ $(MAKE) -f Makefile.generate clean
diff --git a/helm/DEVEL/pxp/pxp/lexers/Makefile.code b/helm/DEVEL/pxp/pxp/lexers/Makefile.code
new file mode 100644
index 000000000..781f29669
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/lexers/Makefile.code
@@ -0,0 +1,54 @@
+
+LARCHIVE_iso88591 = pxp_lex_iso88591.cma
+LARCHIVE_utf8 = pxp_lex_utf8.cma
+XLARCHIVE_iso88591 = $(LARCHIVE_iso88591:.cma=.cmxa)
+XLARCHIVE_utf8 = $(LARCHIVE_utf8:.cma=.cmxa)
+
+# LOBJECTS_* and XLOBJECTS_* are included from "objects_*":
+include objects_iso88591
+include objects_utf8
+
+#----------------------------------------------------------------------
+
+all_iso88591: $(LARCHIVE_iso88591)
+opt_iso88591: $(XLARCHIVE_iso88591)
+all_utf8: $(LARCHIVE_utf8)
+opt_utf8: $(XLARCHIVE_utf8)
+
+$(LARCHIVE_iso88591): $(LOBJECTS_iso88591)
+ $(OCAMLC) -a -o $(LARCHIVE_iso88591) $(LOBJECTS_iso88591)
+
+$(XLARCHIVE_iso88591): $(XLOBJECTS_iso88591)
+ $(OCAMLOPT) -a -o $(XLARCHIVE_iso88591) $(XLOBJECTS_iso88591)
+
+$(LARCHIVE_utf8): $(LOBJECTS_utf8)
+ $(OCAMLC) -a -o $(LARCHIVE_utf8) $(LOBJECTS_utf8)
+
+$(XLARCHIVE_utf8): $(XLOBJECTS_utf8)
+ $(OCAMLOPT) -a -o $(XLARCHIVE_utf8) $(XLOBJECTS_utf8)
+
+#----------------------------------------------------------------------
+# general rules:
+
+OPTIONS =
+OCAMLC = ocamlfind ocamlc -g -I .. -package netstring $(OPTIONS)
+OCAMLOPT = ocamlfind ocamlopt -p -I .. -package netstring $(OPTIONS)
+
+.SUFFIXES: .cmo .cmi .cmx .ml .mli
+
+.ml.cmx:
+ $(OCAMLOPT) -c $<
+
+.ml.cmo:
+ $(OCAMLC) -c $<
+
+.mli.cmi:
+ $(OCAMLC) -c $<
+
+
+*.mli:
+
+clean:
+ rm -f *.cmo *.cmx *.cma *.cmxa *.cmi *.o *.a
+
+include depend
diff --git a/helm/DEVEL/pxp/pxp/lexers/Makefile.generate b/helm/DEVEL/pxp/pxp/lexers/Makefile.generate
new file mode 100644
index 000000000..8ee39cb44
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/lexers/Makefile.generate
@@ -0,0 +1,67 @@
+LEXERSRC = pxp_lex_misc.src \
+ pxp_lex_document.src \
+ pxp_lex_content.src \
+ pxp_lex_within_tag.src \
+ pxp_lex_document_type.src \
+ pxp_lex_declaration.src \
+ pxp_lex_dtd_string.src \
+ pxp_lex_content_string.src \
+ pxp_lex_name_string.src
+
+OTHERSRC = open_pxp_lex_aux_iso88591.src \
+ pxp_lex_aux.src \
+ pxp_lex_defs_iso88591.def
+
+LEXERMLL_iso88591 = $(LEXERSRC:.src=_iso88591.mll)
+LEXERMLL_utf8 = $(LEXERSRC:.src=_utf8.mll)
+
+LEXERML_iso88591 = $(LEXERSRC:.src=_iso88591.ml)
+LEXERML_utf8 = $(LEXERSRC:.src=_utf8.ml)
+
+LEXERCMO_iso88591 = pxp_lex_aux_iso88591.cmo $(LEXERSRC:.src=_iso88591.cmo)
+LEXERCMO_utf8 = pxp_lex_aux_utf8.cmo $(LEXERSRC:.src=_utf8.cmo)
+
+LEXERCMX_iso88591 = $(LEXERCMO_iso88591:.cmo=.cmx)
+LEXERCMX_utf8 = $(LEXERCMO_utf8:.cmo=.cmx)
+
+.PHONY: all_iso88591
+all_iso88591: iso88591_done
+
+.PHONY: all_utf8
+all_utf8: utf8_done
+
+iso88591_done: $(LEXERSRC) $(OTHERSRC)
+ ../tools/insert_variant -variant iso88591 $(LEXERSRC)
+ for file in $(LEXERMLL_iso88591); do ocamllex $$file; done
+ touch iso88591_done
+
+utf8_done: $(LEXERSRC) $(OTHERSRC) pxp_lex_defs_utf8.def
+ ../tools/insert_variant -variant utf8 $(LEXERSRC)
+ for file in $(LEXERMLL_utf8); do ocamllex $$file; done
+ touch utf8_done
+
+pxp_lex_defs_utf8.def: pxp_lex_defs_generic.def pxp_lex_defs_drv_utf8.def
+ ../tools/ucs2_to_utf8/ucs2_to_utf8 pxp_lex_defs_utf8.def || \
+ rm -f pxp_lex_defs_utf8.def
+ cat pxp_lex_defs_drv_utf8.def >>pxp_lex_defs_utf8.def
+
+objects_iso88591:
+ echo LOBJECTS_iso88591 = $(LEXERCMO_iso88591) >objects_iso88591
+ echo XLOBJECTS_iso88591 = $(LEXERCMX_iso88591) >>objects_iso88591
+
+objects_utf8:
+ echo LOBJECTS_utf8 = $(LEXERCMO_utf8) >objects_utf8
+ echo XLOBJECTS_utf8 = $(LEXERCMX_utf8) >>objects_utf8
+
+depend: *.ml *.mli
+ ocamldep *.ml *.mli >depend
+
+.PHONY: clean
+clean:
+ rm -f $(LEXERMLL_iso88591) $(LEXERML_iso88591) iso88591_done \
+ $(LEXERMLL_utf8) $(LEXERML_utf8) utf8_done \
+ pxp_lex_defs_utf8.def \
+ objects_iso88591 objects_utf8 depend
+
+*.mli:
diff --git a/helm/DEVEL/pxp/pxp/lexers/objects b/helm/DEVEL/pxp/pxp/lexers/objects
new file mode 100644
index 000000000..e69de29bb
diff --git a/helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_aux_iso88591.src b/helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_aux_iso88591.src
new file mode 100644
index 000000000..2377affe4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_aux_iso88591.src
@@ -0,0 +1,19 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_lex_aux_iso88591
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_aux_utf8.src b/helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_aux_utf8.src
new file mode 100644
index 000000000..7c1b12a4e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_aux_utf8.src
@@ -0,0 +1,19 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_lex_aux_utf8
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_misc_iso88591.src b/helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_misc_iso88591.src
new file mode 100644
index 000000000..104eb9bc1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_misc_iso88591.src
@@ -0,0 +1,19 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_lex_misc_iso88591
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_misc_utf8.src b/helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_misc_utf8.src
new file mode 100644
index 000000000..fc545f2c1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_misc_utf8.src
@@ -0,0 +1,19 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_lex_misc_utf8
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/pxp_lex_aux.src b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_aux.src
new file mode 100644
index 000000000..2ab21a3b3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_aux.src
@@ -0,0 +1,82 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+ class dummy_entity = object end
+
+ let dummy_entity = ( new dummy_entity : entity_id )
+
+ (* The following tokens are pre-allocated to reduce the load on the
+ * GC.
+ *)
+
+ let tok_Doctype__Document_type = Doctype dummy_entity, Document_type
+ let tok_Ignore__Document = Ignore, Document
+ let tok_Ignore__Within_tag = Ignore, Within_tag
+ let tok_Ignore__Document_type = Ignore, Document_type
+ let tok_Ignore__Declaration = Ignore, Declaration
+ let tok_Ignore__Ignored = Ignore, Ignored_section
+ let tok_Eof__Document = Eof, Document
+ let tok_Eof__Content = Eof, Content
+ let tok_Eof__Within_tag = Eof, Within_tag
+ let tok_Eof__Document_type = Eof, Document_type
+ let tok_Eof__Declaration = Eof, Declaration
+ let tok_Eof__Ignored = Eof, Ignored_section
+ let tok_LineEndCRLF__Content = LineEnd "\r\n", Content
+ let tok_LineEndCR__Content = LineEnd "\r", Content
+ let tok_LineEndLF__Content = LineEnd "\n", Content
+ let tok_CharDataRBRACKET__Content = CharData "]", Content
+ let tok_Eq__Within_tag = Eq, Within_tag
+ let tok_Rangle__Content = Rangle, Content
+ let tok_Rangle_empty__Content = Rangle_empty, Content
+ let tok_Dtd_begin__Declaration = Dtd_begin dummy_entity, Declaration
+ let tok_Doctype_rangle__Document = Doctype_rangle dummy_entity, Document
+ let tok_Percent__Declaration = Percent, Declaration
+ let tok_Plus__Declaration = Plus, Declaration
+ let tok_Star__Declaration = Star, Declaration
+ let tok_Bar__Declaration = Bar, Declaration
+ let tok_Comma__Declaration = Comma, Declaration
+ let tok_Qmark__Declaration = Qmark, Declaration
+ let tok_Lparen__Declaration = Lparen dummy_entity, Declaration
+ let tok_RparenPlus__Declaration = RparenPlus dummy_entity, Declaration
+ let tok_RparenStar__Declaration = RparenStar dummy_entity, Declaration
+ let tok_RparenQmark__Declaration = RparenQmark dummy_entity, Declaration
+ let tok_Rparen__Declaration = Rparen dummy_entity, Declaration
+ let tok_Required__Declaration = Required, Declaration
+ let tok_Implied__Declaration = Implied, Declaration
+ let tok_Fixed__Declaration = Fixed, Declaration
+ let tok_Pcdata__Declaration = Pcdata, Declaration
+ let tok_Decl_element__Declaration = Decl_element dummy_entity, Declaration
+ let tok_Decl_attlist__Declaration = Decl_attlist dummy_entity, Declaration
+ let tok_Decl_entity__Declaration = Decl_entity dummy_entity, Declaration
+ let tok_Decl_notation__Declaration = Decl_notation dummy_entity, Declaration
+ let tok_Conditional_begin__Declaration = Conditional_begin dummy_entity,
+ Declaration
+ let tok_Conditional_begin__Ignored = Conditional_begin dummy_entity,
+ Ignored_section
+ let tok_Conditional_end__Declaration = Conditional_end dummy_entity,
+ Declaration
+ let tok_Conditional_end__Ignored = Conditional_end dummy_entity,
+ Ignored_section
+ let tok_Conditional_body__Declaration = Conditional_body dummy_entity,
+ Declaration
+ let tok_Decl_rangle__Declaration = Decl_rangle dummy_entity, Declaration
+ let tok_Dtd_end__Document_type = Dtd_end dummy_entity, Document_type
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/08/18 20:19:59 gerd
+ * Comments return different comment tokens.
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/pxp_lex_aux_iso88591.ml b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_aux_iso88591.ml
new file mode 100644
index 000000000..07f8c45cd
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_aux_iso88591.ml
@@ -0,0 +1,97 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* NOTE: Currently, this module is *identical* to Pxp_lex_aux_utf8 *)
+
+ open Pxp_types
+ open Pxp_lexer_types
+
+ let get_name_end s k =
+ (* Get the index of the end+1 of the name beginning at position k *)
+ let l = String.length s in
+ let rec find j =
+ if j < l then
+ match s.[j] with
+ | ('\009'|'\010'|'\013'|'\032') -> j
+ |_ -> find (j+1)
+ else
+ l
+ in
+ find k
+
+ let get_ws_end s k =
+ let l = String.length s in
+ let rec find j =
+ if j < l then
+ match s.[j] with
+ (' '|'\t'|'\r'|'\n') -> find (j+1)
+ | _ -> j
+ else
+ l
+ in
+ find k
+
+ let scan_pi pi xml_scanner =
+ let s = String.sub pi 2 (String.length pi - 4) in
+ (* the PI without the leading "" and the trailing "?>" *)
+ let xml_lexbuf = Lexing.from_string (s ^ " ") in
+ (* Add space because the lexer expects whitespace after every
+ * clause; by adding a space there is always whitespace at the
+ * end of the string.
+ *)
+
+ (* The first word of a PI must be a name: Extract it. *)
+
+ let s_name, s_len =
+ match xml_scanner xml_lexbuf with
+ Pro_name n ->
+ let ltok = String.length (Lexing.lexeme xml_lexbuf) in
+ if String.length n = ltok then
+ (* No whitespace after the name *)
+ raise (WF_error ("Bad processing instruction"));
+ n, ltok
+ | _ -> raise (WF_error ("Bad processing instruction"))
+ in
+
+ (* Note: s_len is the length of s_name + the whitespace following s_name *)
+
+ match s_name with
+ "xml" -> begin
+ (* It is a PI: Get the other tokens *)
+ let rec collect () =
+ let t = xml_scanner xml_lexbuf in
+ (* prerr_endline (string_of_int (Lexing.lexeme_end xml_lexbuf)); *)
+ if t = Pro_eof then
+ []
+ else
+ t :: collect()
+ in
+ PI_xml (collect())
+ end
+ | _ ->
+ let len_param = String.length s - s_len in
+ (* It is possible that len_param = -1 *)
+ if len_param >= 1 then
+ PI(s_name, String.sub s s_len len_param)
+ else
+ PI(s_name, "")
+
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/05/29 23:53:12 gerd
+ * Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/pxp_lex_aux_utf8.ml b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_aux_utf8.ml
new file mode 100644
index 000000000..0b2c577e9
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_aux_utf8.ml
@@ -0,0 +1,95 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* NOTE: Currently, this module is *identical* to Pxp_lex_aux_iso88591 *)
+
+ open Pxp_types
+ open Pxp_lexer_types
+
+ let get_name_end s k =
+ (* Get the index of the end+1 of the name beginning at position k *)
+ let l = String.length s in
+ let rec find j =
+ if j < l then
+ match s.[j] with
+ | ('\009'|'\010'|'\013'|'\032') -> j
+ |_ -> find (j+1)
+ else
+ l
+ in
+ find k
+
+ let get_ws_end s k =
+ let l = String.length s in
+ let rec find j =
+ if j < l then
+ match s.[j] with
+ (' '|'\t'|'\r'|'\n') -> find (j+1)
+ | _ -> j
+ else
+ l
+ in
+ find k
+
+ let scan_pi pi xml_scanner =
+ let s = String.sub pi 2 (String.length pi - 4) in
+ (* the PI without the leading "" and the trailing "?>" *)
+ let xml_lexbuf = Lexing.from_string (s ^ " ") in
+ (* Add space because the lexer expects whitespace after every
+ * clause; by adding a space there is always whitespace at the
+ * end of the string.
+ *)
+
+ (* The first word of a PI must be a name: Extract it. *)
+
+ let s_name, s_len =
+ match xml_scanner xml_lexbuf with
+ Pro_name n ->
+ let ltok = String.length (Lexing.lexeme xml_lexbuf) in
+ if String.length n = ltok then
+ (* No whitespace after the name *)
+ raise (WF_error ("Bad processing instruction"));
+ n, ltok
+ | _ -> raise (WF_error ("Bad processing instruction"))
+ in
+
+ (* Note: s_len is the length of s_name + the whitespace following s_name *)
+
+ match s_name with
+ "xml" -> begin
+ (* It is a PI: Get the other tokens *)
+ let rec collect () =
+ let t = xml_scanner xml_lexbuf in
+ (* prerr_endline (string_of_int (Lexing.lexeme_end xml_lexbuf)); *)
+ if t = Pro_eof then
+ []
+ else
+ t :: collect()
+ in
+ PI_xml (collect())
+ end
+ | _ ->
+ let len_param = String.length s - s_len in
+ (* It is possible that len_param = -1 *)
+ if len_param >= 1 then
+ PI(s_name, String.sub s s_len len_param)
+ else
+ PI(s_name, "")
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/05/29 23:53:12 gerd
+ * Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/pxp_lex_content.src b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_content.src
new file mode 100644
index 000000000..3df202568
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_content.src
@@ -0,0 +1,107 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+{
+ open Pxp_types
+ open Pxp_lexer_types
+
+#insert pxp_lex_aux.src
+
+#insert open_pxp_lex_aux_*.src
+#insert open_pxp_lex_misc_*.src
+
+}
+
+#insert pxp_lex_defs_*.def
+
+rule scan_content = parse
+ "" pi_string "?>"
+ { scan_pi (Lexing.lexeme lexbuf) scan_xml_pi, Content }
+ | ""
+ { raise (WF_error ("Illegal processing instruction")) }
+ | ""
+ { Comment_end, Content }
+ | "--"
+ { raise (WF_error "Double hyphens are illegal inside comments") }
+ | "-"
+ { Comment_material "-", Content_comment }
+ | character_except_minus+
+ { Comment_material(Lexing.lexeme lexbuf), Content_comment }
+ | eof
+ { Eof, Content_comment }
+ | _
+ { raise Netconversion.Malformed_code }
+
+
+(* In declarations, comments are always thrown away. *)
+
+and scan_decl_comment = parse
+ "-->"
+ { Comment_end, Declaration }
+ | "--"
+ { raise (WF_error "Double hyphens are illegal inside comments") }
+ | "-"
+ { Comment_material "", Decl_comment }
+ | character_except_minus+
+ { Comment_material "", Decl_comment }
+ | eof
+ { Eof, Decl_comment }
+ | _
+ { raise Netconversion.Malformed_code }
+
+
+and scan_document_comment = parse
+ "-->"
+ { Comment_end, Document }
+ | "--"
+ { raise (WF_error "Double hyphens are illegal inside comments") }
+ | "-"
+ { Comment_material "-", Document_comment }
+ | character_except_minus+
+ { Comment_material(Lexing.lexeme lexbuf), Document_comment }
+ | eof
+ { Eof, Document_comment }
+ | _
+ { raise Netconversion.Malformed_code }
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.4 2000/08/18 20:19:59 gerd
+ * Comments return different comment tokens.
+ *
+ * Revision 1.3 2000/08/14 22:18:34 gerd
+ * Bad_character_stream -> Netconversion.Malformed_code
+ *
+ * Revision 1.2 2000/05/29 23:53:12 gerd
+ * Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/pxp_lex_name_string.src b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_name_string.src
new file mode 100644
index 000000000..57a0d54ce
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_name_string.src
@@ -0,0 +1,77 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+{
+ open Pxp_types
+ open Pxp_lexer_types
+
+#insert open_pxp_lex_aux_*.src
+#insert pxp_lex_aux.src
+
+}
+
+#insert pxp_lex_defs_*.def
+
+rule scan_name_string = parse
+ name
+ { Name (Lexing.lexeme lexbuf) }
+ | ws+
+ { Ignore }
+ | nmtoken
+ { Nametoken (Lexing.lexeme lexbuf) }
+ | eof
+ { Eof }
+ | character
+ { CharData (Lexing.lexeme lexbuf) }
+ | _
+ { raise Netconversion.Malformed_code }
+
+
+and scan_ignored_section = parse
+ | ""
+ { tok_Conditional_end__Ignored }
+ | ""
+ { tok_Ignore__Ignored }
+ | '"' character_except_quot* '"'
+ { tok_Ignore__Ignored }
+ | "'" character_except_apos* "'"
+ { tok_Ignore__Ignored }
+ | eof
+ { tok_Eof__Ignored }
+ | character_except_special+
+ { tok_Ignore__Ignored }
+ | "<"
+ { tok_Ignore__Ignored }
+ | "]"
+ { tok_Ignore__Ignored }
+ | "'"
+ { tok_Ignore__Ignored }
+ | "\""
+ { tok_Ignore__Ignored }
+ | _
+ { raise Netconversion.Malformed_code }
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/08/14 22:18:34 gerd
+ * Bad_character_stream -> Netconversion.Malformed_code
+ *
+ * Revision 1.2 2000/05/29 23:53:12 gerd
+ * Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/pxp_lex_within_tag.src b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_within_tag.src
new file mode 100644
index 000000000..39697b00a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_within_tag.src
@@ -0,0 +1,69 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+{
+ open Pxp_types
+ open Pxp_lexer_types
+
+#insert open_pxp_lex_aux_*.src
+#insert pxp_lex_aux.src
+
+}
+
+#insert pxp_lex_defs_*.def
+
+
+rule scan_within_tag = parse
+ ws+
+ { tok_Ignore__Within_tag }
+ | name
+ { Name (Lexing.lexeme lexbuf ), Within_tag }
+ | '='
+ { tok_Eq__Within_tag }
+ | '"' character_except_quot* '"'
+ { let s = Lexing.lexeme lexbuf in
+ let v = String.sub s 1 (String.length s - 2) in
+ Attval v, Within_tag }
+ | '"'
+ { raise (WF_error ("Cannot find the second quotation mark"))
+ }
+ | "'" character_except_apos* "'"
+ { let s = Lexing.lexeme lexbuf in
+ let v = String.sub s 1 (String.length s - 2) in
+ Attval v, Within_tag }
+ | "'"
+ { raise (WF_error ("Cannot find the second quotation mark"))
+ }
+ | '>'
+ { tok_Rangle__Content }
+ | "/>"
+ { tok_Rangle_empty__Content }
+ | eof
+ { tok_Eof__Within_tag }
+ | character
+ { raise (WF_error ("Illegal inside tags")) }
+ | _
+ { raise Netconversion.Malformed_code }
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/08/14 22:18:34 gerd
+ * Bad_character_stream -> Netconversion.Malformed_code
+ *
+ * Revision 1.2 2000/05/29 23:53:12 gerd
+ * Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/m2parsergen/Makefile b/helm/DEVEL/pxp/pxp/m2parsergen/Makefile
new file mode 100644
index 000000000..78f5359c8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/m2parsergen/Makefile
@@ -0,0 +1,62 @@
+# make all: make bytecode executable
+# make clean: remove intermediate files (in this directory)
+# make CLEAN: remove intermediate files (recursively)
+
+#----------------------------------------------------------------------
+
+SRC = ast.ml lexer.ml parser.ml generator.ml
+OBJ = $(SRC:.ml=.cmo)
+
+#----------------------------------------------------------------------
+
+
+.PHONY: all
+all: m2parsergen
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa lexer.ml parser.ml \
+ parser.mli
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~ depend depend.pkg m2parsergen a.out x.ml
+
+#----------------------------------------------------------------------
+# general rules:
+
+OPTIONS =
+OCAMLC = ocamlc -g $(OPTIONS) $(ROPTIONS)
+OCAMLOPT = ocamlopt -p $(OPTIONS) $(ROPTIONS)
+OCAMLDEP = ocamldep $(OPTIONS)
+OCAMLFIND = ocamlfind
+
+#----------------------------------------------------------------------
+
+depend: $(SRC)
+ $(OCAMLDEP) $(SRC) >depend
+
+m2parsergen: $(OBJ)
+ $(OCAMLC) -o m2parsergen $(OBJ)
+
+.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll .mly
+
+.ml.cmx:
+ $(OCAMLOPT) -c $<
+
+.ml.cmo:
+ $(OCAMLC) -c $<
+
+.mli.cmi:
+ $(OCAMLC) -c $<
+
+.mll.ml:
+ ocamllex $<
+
+.mly.ml:
+ ocamlyacc $<
+
+include depend
diff --git a/helm/DEVEL/pxp/pxp/m2parsergen/README b/helm/DEVEL/pxp/pxp/m2parsergen/README
new file mode 100644
index 000000000..cccf7aa55
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/m2parsergen/README
@@ -0,0 +1,319 @@
+----------------------------------------------------------------------
+m2parsergen
+----------------------------------------------------------------------
+
+This is a parser generator for top-down (or recursively descending) parsers.
+The input file must be structured as follows:
+
+---------------------------------------- Begin of file
+
+
+
+%%
+
+
+
+%%
+
+
+
+%%
+
+
+
+---------------------------------------- End of file
+
+The two-character combination %% separates the various sections. The
+text before the first %% and after the last %% will be copied verbatim
+to the output file.
+
+Within the declarations and rules sections you must use /* ... */ as
+comment braces.
+
+There are two types of declarations:
+
+%token Name
+
+declares that Name is a token without associated value, and
+
+%token <> Name
+
+declares that Name is a token with associated value (i.e. Name x).
+
+In contrast to ocamlyacc, you need not to specify a type. This is a
+fundamental difference, because m2parsergen will not generate a type
+declaration for a "token" type; you must do this yourself.
+
+You need not to declare start symbols; every grammar rule may be used
+as start symbol.
+
+The rules look like:
+
+name_of_rule(arg1, arg2, ...):
+ label1:symbol1 label2:symbol2 ... {{ CODE }}
+| label1:symbol1 label2:symbol2 ... {{ CODE }}
+...
+| label1:symbol1 label2:symbol2 ... {{ CODE }}
+
+The rules may have arguments (note that you must write the
+parantheses, even if the rule does not have arguments). Here, arg1,
+arg2, ... are the formal names of the arguments; you may refer to them
+in OCaml code.
+
+Furthermore, the symbols may have labels (you can leave the labels
+out). You can refer to the value associated with a symbol by its
+label, i.e. there is an OCaml variable with the same name as the label
+prescribes, and this variable contains the value.
+
+The OCaml code must be embraced by {{ and }}, and these separators
+must not occur within the code.
+
+EXAMPLE:
+
+prefix_term():
+ Plus_symbol Left_paren v1:prefix_term() Comma v2:prefix_term() Right_paren
+ {{ v1 + v2 }}
+| Times_symbol Left_paren v1:prefix_term() Comma v2:prefix_term() Right_paren
+ {{ v1 * v2 }}
+| n:Number
+ {{ n }}
+
+As you can see in the example, you must pass values for the arguments
+if you call non-terminal symbols (here, the argument list is empty: ()).
+
+The generated parsers behave as follows:
+
+- A rule is applicable to a token sequence if the first token is
+ matched by the rule.
+
+ In the example: prefix_term is applicable if the first token of a
+ sequence is either Plus_symbol, Times_symbol, or Number.
+
+- One branch of the applicable rule is selected: it is the first
+ branch that matches the first token. THE OTHER TOKENS DO NOT HAVE
+ ANY EFFECT ON BRANCH SELECTION!
+
+ For instance, in the following rule the second branch is never
+ selected, because only the A is used to select the branch:
+
+ a():
+ A B {{ ... }}
+ | A C {{ ... }}
+
+- Once a branch is selected, it is checked whether the branch matches
+ the token sequence. If this check succeeds, the code section of the
+ branch is executed, and the resulting value is returned to the
+ caller.
+ If the check fails, the exception Parsing.Parse_error is raised.
+ Normally, this exception is not caught, and will force the parser
+ to stop.
+
+ The check in detail:
+
+ If the rule demands a terminal, there a must be exactly this
+ terminal at the corresponding location in the token sequence.
+
+ If the rule demands a non-terminal, it is checked whether the rule
+ for to this non-terminal is applicable. If so, the branch
+ is selected, and recursively checked. If the rule is not applicable,
+ the check fails immediately.
+
+- THERE IS NO BACKTRACKING!
+
+ Note that the following works (but the construction is resolved at
+ generation time):
+
+ rule1() =
+ rule2() A B ... {{ ... }}
+
+ rule2() =
+ C {{ ... }}
+ | D {{ ... }}
+
+ In this case, the (only) branch of rule1 is selected if the next
+ token is C or D.
+
+---
+
+
+
+*** Options and repetitions ***
+
+Symbols can be tagged as being optional, or to occur repeatedly:
+
+rule():
+ Name whitespace()* Question_mark?
+
+- "*": The symbol matches zero or more occurrences.
+
+- "?": The symbol matches zero or one occurrence.
+
+This is done as follows:
+
+- terminal*: The maximum number of consecutive tokens are
+ matched.
+- non-terminal*: The maximum number of the subsequences matching
+ are matched. Before another
+ subsequence is matched, it is checked whether the
+ rule for is applicable. If so, the
+ rule is invoked and must succeed (otherwise Parsing.
+ Parse_error). If not, the loop is exited.
+
+- terminal?: If the next token is , it is matched. If not,
+ no token is matched.
+
+- non-terminal?: It is checked whether the rule for
+ is applicable. If so, the rule is invoked, and
+ matches a sequence of tokens. If not, no token is
+ matched.
+
+You may refer to repeated or optional symbols by labels. In this case,
+the label is associated with lists of values, or optional values,
+respectively:
+
+rule():
+ A lab:other()* lab':unlikely()?
+ {{ let n = List.length lab in ...
+ match lab' with
+ None -> ...
+ | Some v -> ...
+ }}
+
+A different scheme is applied if the symbol is a token without
+associated value (%token Name, and NOT %token <> Name):
+
+rule():
+ A lab:B* lab':C?
+
+Here, "lab" becomes an integer variable counting the number of Bs, and
+"lab'" becomes a boolean variable denoting whether there is a C or not.
+
+
+*** Early let-binding ***
+
+You may put some OCaml code directly after the first symbol of a
+branch:
+
+rule():
+ A $ {{ let-binding }} C D ... {{ ... }}
+
+The code brace {{ let-binding }} must be preceded by a dollar
+sign. You can put "let ... = ... in" statements into this brace:
+
+rule1():
+ n:A $ {{ let twice = 2 * n in }} rule2(twice) {{ ... }}
+
+This code is executed once the branch is selected.
+
+
+*** Very early let-binding ***
+
+This is also possible:
+
+rule():
+ $ {{ CODE }}
+ A
+ ...
+
+The CODE is executed right when the branch is selected, and before any
+other happens. (Only for hacks!)
+
+
+
+*** Computed rules ***
+
+rule():
+ A $ {{ let followup = ... some function ... in }} [ followup ]()
+ {{ ... }}
+
+Between [ and ], you can refer to the O'Caml name of *any* function.
+Here, the function "followup" is bound in the let-binding.
+
+
+*** Error handling ***
+
+If a branch is already selected, but the check fails whether the other
+symbols of the branch match, it is possible to catch the resulting
+exception and to find out at which position the failure has occurred.
+
+rule():
+ x:A y:B z:C {{ ... }} ? {{ ERROR-CODE }}
+
+After a question mark, it is allowed to append another code
+brace. This code is executed if the branch check fails (but not if the
+branch is not selected nor if no branches are selected). The string
+variable !yy_position contains the label of the symbol that caused the
+failure (or it contains the empty string if the symbol does not have a
+label).
+
+Example:
+
+rule():
+ x:A y:B z:C {{ print_endline "SUCCESS" }} ? {{ print_endline !yy_position }}
+
+If the token sequence is A B C, "SUCCESS" will be printed. If the
+sequence is A C, the second symbol fails, and "y" will be printed. If
+the sequence is A B D, the third symbol fails, and "z" will be
+printed. If the sequence is B, the rule will be never selected because
+it is not applicable.
+
+
+
+*** Error recovery ***
+
+You may call the functions yy_current, yy_get_next, or one of the
+parse_* functions in the error brace to recover from the error
+(e.g. to move ahead until a certain token is reached). See below.
+
+
+
+*** How to call the parser ***
+
+The rules are rewritten into a OCaml let-binding:
+
+let rec parse_ ... = ...
+ and parse_ ... = ...
+ ...
+ and parse_ ... = ...
+in
+
+i.e. there are lots of functions, and the name of the functions are
+"parse_" plus the name of the rules. You can call every function.
+
+The first two arguments of the functions have a special meaning; the
+other arguments are the arguments coming from the rule description:
+
+rule(a,b):
+ ...
+
+===>
+
+let rec parse_rule yy_current yy_get_next a b = ...
+
+The first argument, yy_current, is a function that returns the current
+token. The second arguments, yy_get_next, is a function that switches
+to the next token, and returns it.
+
+If the tokens are stored in a list, this may be a definition:
+
+let input = ref [ Token1; Token2; ... ] in
+let yy_current() = List.hd !input in
+let yy_get_next () =
+ input := List.tl !input;
+ List.hd !input
+
+When you call one of the parser functions, the current token must
+already be loaded, i.e. yy_current returns the first token to match by
+the function.
+
+After the functions has returned, the current token is the token
+following the sequence of tokens that have been matched by the
+function.
+
+The function returns the value computed by the OCaml code brace of the
+rule (or the value of the error brace).
+
+If the rule is not applicable, the exception Not_found is raised.
+
+If the rule is applicable, but it does not match, the exception
+Parsing.Parse_error is raised.
diff --git a/helm/DEVEL/pxp/pxp/m2parsergen/ast.ml b/helm/DEVEL/pxp/pxp/m2parsergen/ast.ml
new file mode 100644
index 000000000..219dd0c80
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/m2parsergen/ast.ml
@@ -0,0 +1,75 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+type declaration =
+ D_token of string (* D_token name *)
+ | D_typed_token of string (* D_typed_token name *)
+;;
+
+type symbol =
+ U_symbol of (string * string option) (* U_symbol(token, label) *)
+ | L_symbol of (string * string list * string option)
+ (* L_symbol(token, args, label) *)
+ | L_indirect of (string * string list * string option)
+;;
+
+
+type modifier =
+ Exact
+ | Option
+ | Repetition
+;;
+
+
+type pattern =
+ { pat_symbol : symbol;
+ pat_modifier : modifier;
+ }
+
+
+type branch =
+ { branch_selector : symbol;
+ branch_early_code : (string * int * int);
+ branch_binding_code : (string * int * int);
+ branch_pattern : pattern list;
+ branch_result_code : (string * int * int);
+ branch_error_code : (string * int * int) option;
+ }
+;;
+
+type rule =
+ { rule_name : string;
+ rule_arguments : string list; (* List of names *)
+ rule_branches : branch list;
+ }
+;;
+
+type text =
+ { text_decls : declaration list;
+ text_rules : rule list;
+ }
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/05/09 00:03:22 gerd
+ * Added [ ml_name ] symbols, where ml_name is an arbitrary
+ * OCaml identifier.
+ *
+ * Revision 1.2 2000/05/08 22:03:01 gerd
+ * It is now possible to have a $ {{ }} sequence right BEFORE
+ * the first token. This code is executed just after the first token
+ * has been recognized.
+ *
+ * Revision 1.1 2000/05/06 17:36:17 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/m2parsergen/generator.ml b/helm/DEVEL/pxp/pxp/m2parsergen/generator.ml
new file mode 100644
index 000000000..4301f2259
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/m2parsergen/generator.ml
@@ -0,0 +1,920 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Parser
+open Ast
+
+(* Overall scheme:
+ *
+ * The rules are translated to:
+ *
+ * let rec parse_ ... = ...
+ * and parse_ ... = ...
+ * and ...
+ * and parse_ ... = ...
+ * in
+ *
+ * Every rule has at least two arguments: 'current' and 'get_next'.
+ * 'current()' is the token that should match the first symbol of the
+ * rule. 'get_next()' returns the next token.
+ *
+ * The rules may have further user arguments; these are the next arguments
+ * in turn.
+ *
+ * The rules return the user value. After they have returned to the caller
+ * the current token is the token that follows the sequence of tokens
+ * matching the rule.
+ *
+ * The rules will raise:
+ * - Not_found if the first token does not match
+ * - Parsing.Parse_error if the rest does not match.
+ *
+ * Rule scheme:
+ *
+ * rule(arg1,arg2,...):
+ * (l1:x1)
+ * {{ let-CODE }}
+ * (l2:y2(name1,...)) y3 ...
+ * {{ CODE }}
+ * ? {{ ?-CODE }}
+ * | x2 ...
+ * | ...
+ * | xN
+ *
+ * let parse_ current get_next arg1 arg2 ... =
+ * match current() with
+ * S(x1) -> ...
+ * | S(x2) -> ...
+ * | ...
+ * | S(xN) -> ...
+* | _ -> raise Not_found
+ *
+ * Here, S(xi) denotes the set of tokens matched by xi without all tokens
+ * already matched by x1 to x(i-1). (If S(xi) = empty, a warning is printed,
+ * and this branch of the rule is omitted.)
+ *
+ * S(xi) may be a set because xi may be a reference to another rule. In this
+ * case, S(xi) bases on the set of tokens that match the first symbol of
+ * the other rule. (In general, S(xi) must be computed recursively.)
+ *
+ * If the "?" clause is present, every branch is embraced by the following:
+ *
+ * let position = ref "" in
+ * ( try ...
+ * with Parsing.Parse_error -> ( <-CODE>> )
+ * )
+ *
+ * Next: The "..." is
+ *
+ * OPTIONAL: let = parse_ in
+ * <>
+ * M(y1)
+ * M(y2)
+ * ...
+ * M(yN)
+ * <>
+ *
+ * If x1 is a rule invocation, it is now parsed, and the result is bound
+ * to a variable.
+ *
+ * Note: After x1 has matched, the Caml variable must be either
+ * bound to the result of the sub parsing, or to the value associated
+ * with the token (if any). The latter is already done in the main
+ * "match" statement, i.e. "match ... with S(x1) -> ..." is actually
+ * "match ... with Token1 -> ...".
+ *
+ * Note: After calling parse_ the exception Not_found is NEVER
+ * converted to Parsing.Parse_error. It is simply not possible that this
+ * happens.
+
+ * For every remaining symbol yi of the rule, a matching statement M(yi)
+ * is produced. These statements have the form:
+ *
+ * OPTIONAL: position := "";
+ * CASE: yi is a token without associated value
+ * let yy_i = get_next() OR current() in
+ * if yy_i <> Token(yi) then raise Parsing.Parse_error;
+ * CASE: yi is a token with value
+ * let yy_i = get_next() OR current() in
+ * let = match yy_i with Token x -> x | _ -> raise Parsing.Parse_error
+ * in
+ * CASE: yi is a rule invocation
+ * OPTIONAL: let _ = get_next() in
+ * let = try parse_
+ * with Not_found -> raise Parsing.Parse_error in
+ *
+ * yy_i is get_next() if y(i-1) was a token, and yy_i is current() if
+ * y(i-1) was a rule invocation.
+ *
+ * Repetitions:
+ *
+ * If yi = (yi')*:
+ *
+ * CASE no label given:
+ *
+ * ( try
+ * while true do
+ * M(yi') with the modification that top-level mismatches raise
+ * Not_found instead of Parsing.Parse_error
+ * done
+ * with Not_found -> ()
+ * )
+ *
+ * CASE a label is given: The list of results must be bound to !
+ *
+ * let yy_list = ref [] in
+ * ( try
+ * while true do
+ * let yy_first = M(yi') (with some modifications) in
+ * yy_list := yy_first :: !yy_list;
+ * done
+ * with Not_found -> ()
+ * );
+ * let = List.rev !yy_list in
+ *
+ * Note that this scheme minimizes stack and heap allocations.
+ *
+ * Options:
+ *
+ * If yi = (yi')?:
+ *
+ * CASE no label given:
+ *
+ * ( try
+ * M(yi') with the modification that top-level mismatches raise
+ * Not_found instead of Parsing.Parse_error
+ * with Not_found -> ()
+ * )
+ *
+ * CASE a label is given: The optional result must be bound to !
+ *
+ * let =
+ * try
+ * Some( M(yi') (with some modifications) )
+ * with Not_found -> None
+ * );
+ *)
+
+
+let lookup_rule tree name =
+ try
+ List.find (fun r -> r.rule_name = name) tree.text_rules
+ with
+ Not_found ->
+ failwith ("Rule `" ^ name ^ "' not found")
+;;
+
+
+let is_typed tree name =
+ (* Find out whether the token 'name' is typed or not *)
+ let decl =
+ try
+ List.find (fun d -> match d with
+ D_token n -> n = name
+ | D_typed_token n -> n = name
+ )
+ tree.text_decls
+ with
+ Not_found ->
+ failwith ("Token `" ^ name ^ "' not found")
+ in
+ match decl with
+ D_token _ -> false
+ | D_typed_token _ -> true
+;;
+
+
+let label_of_symbol tree sym =
+ match sym with
+ U_symbol (tok, lab) ->
+ (* if is_typed tree tok then lab else None *)
+ lab
+ | L_symbol (_, _, lab) -> lab
+ | L_indirect (_, _, lab) -> lab
+;;
+
+
+let is_untyped_U_symbol tree sym =
+ match sym with
+ U_symbol (tok, _) ->
+ not(is_typed tree tok)
+ | L_symbol (_, _, _) -> false
+ | L_indirect (_, _, _) -> false
+;;
+
+
+
+let rec set_of_list l =
+ (* Removes duplicate members of l *)
+ match l with
+ [] -> []
+ | x :: l' -> if List.mem x l' then set_of_list l' else x :: (set_of_list l')
+;;
+
+
+let selector_set_of_rule tree name =
+ (* Determines the set of tokens that match the first symbol of a rule *)
+
+ let rec collect visited_rules name =
+ if List.mem name visited_rules then
+ []
+ else
+ let r = lookup_rule tree name in
+ List.flatten
+ (List.map
+ (fun branch ->
+ match branch.branch_selector with
+ U_symbol (tok_name,_) ->
+ [ tok_name ]
+ | L_symbol (rule_name, _, _) ->
+ collect (name :: visited_rules) rule_name
+ | L_indirect (_, _, _) ->
+ failwith("The first symbol in rule `" ^ name ^
+ "' is an indirect call; this is not allowed")
+ )
+ r.rule_branches
+ )
+ in
+ set_of_list (collect [] name)
+;;
+
+
+let output_code_location b file_name (_, line, column) =
+ Buffer.add_string b "\n";
+ Buffer.add_string b ("# " ^ string_of_int line ^ " \"" ^
+ file_name ^ "\"\n");
+ Buffer.add_string b (String.make column ' ')
+;;
+
+
+let phantasy_line = ref 100000;;
+
+let output_code b file_name ((code, line, column) as triple) =
+ if code <> "" then begin
+ output_code_location b file_name triple;
+ Buffer.add_string b code;
+ Buffer.add_string b ("\n# " ^ string_of_int !phantasy_line ^ " \"\"\n");
+ phantasy_line := !phantasy_line + 10000;
+ end
+;;
+
+
+let process_branch b file_name tree branch =
+
+ let make_rule_invocation called_rule args lab allow_not_found =
+ (* Produces: let = parse_ ... args in
+ * If not allow_not_found, the exception Not_found is caught and
+ * changed into Parsing.Parse_error.
+ *)
+ let r = lookup_rule tree called_rule in
+ if List.length r.rule_arguments <> List.length args then
+ failwith("Calling rule `" ^ called_rule ^ "' with the wrong number of arguments!");
+
+ Buffer.add_string b "let ";
+ begin match lab with
+ None -> Buffer.add_string b "_"
+ | Some l -> Buffer.add_string b l
+ end;
+ Buffer.add_string b " = ";
+ if not allow_not_found then
+ Buffer.add_string b "try ";
+ Buffer.add_string b "parse_";
+ Buffer.add_string b called_rule;
+ Buffer.add_string b " yy_current yy_get_next";
+ List.iter
+ (fun a -> Buffer.add_string b " ";
+ Buffer.add_string b a;
+ )
+ args;
+ if not allow_not_found then
+ Buffer.add_string b " with Not_found -> raise Parsing.Parse_error";
+ Buffer.add_string b " in\n"
+ in
+
+ let make_indirect_rule_invocation ml_name args lab allow_not_found =
+ (* Produces: let = ml_name ... args in
+ * If not allow_not_found, the exception Not_found is caught and
+ * changed into Parsing.Parse_error.
+ *)
+ Buffer.add_string b "let ";
+ begin match lab with
+ None -> Buffer.add_string b "_"
+ | Some l -> Buffer.add_string b l
+ end;
+ Buffer.add_string b " = ";
+ if not allow_not_found then
+ Buffer.add_string b "try ";
+ Buffer.add_string b ml_name;
+ Buffer.add_string b " yy_current yy_get_next";
+ List.iter
+ (fun a -> Buffer.add_string b " ";
+ Buffer.add_string b a;
+ )
+ args;
+ if not allow_not_found then
+ Buffer.add_string b " with Not_found -> raise Parsing.Parse_error";
+ Buffer.add_string b " in\n"
+ in
+
+ let process_symbol sym previous_was_token allow_not_found =
+ match sym with
+ U_symbol(tok, lab) ->
+ (* Distinguish between simple tokens and typed tokens *)
+ if is_typed tree tok then begin
+ (* Typed token *)
+ Buffer.add_string b "let ";
+ begin match lab with
+ None -> Buffer.add_string b "_"
+ | Some l -> Buffer.add_string b l
+ end;
+ Buffer.add_string b " = match ";
+ if previous_was_token then
+ Buffer.add_string b "yy_get_next()"
+ else
+ Buffer.add_string b "yy_current()";
+ Buffer.add_string b " with ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " x -> x | _ -> raise ";
+ if allow_not_found then
+ Buffer.add_string b "Not_found"
+ else
+ Buffer.add_string b "Parsing.Parse_error";
+ Buffer.add_string b " in\n";
+ end
+ else begin
+ (* Simple token *)
+ Buffer.add_string b "if (";
+ if previous_was_token then
+ Buffer.add_string b "yy_get_next()"
+ else
+ Buffer.add_string b "yy_current()";
+ Buffer.add_string b ") <> ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " then raise ";
+ if allow_not_found then
+ Buffer.add_string b "Not_found;\n"
+ else
+ Buffer.add_string b "Parsing.Parse_error;\n"
+ end
+ | L_symbol(called_rule, args, lab) ->
+ if previous_was_token then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+ make_rule_invocation called_rule args lab allow_not_found
+ | L_indirect(ml_name, args, lab) ->
+ if previous_was_token then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+ make_indirect_rule_invocation ml_name args lab allow_not_found
+ in
+
+ let process_pattern (current_position, previous_was_token) pat =
+ (* Assign "position" if necessary. *)
+ let new_position =
+ if branch.branch_error_code <> None then begin
+ match pat.pat_symbol with
+ U_symbol(_,Some l) -> l
+ | L_symbol(_,_,Some l) -> l
+ | L_indirect(_,_,Some l) -> l
+ | _ -> ""
+ end
+ else ""
+ in
+ if new_position <> current_position then begin
+ Buffer.add_string b "yy_position := \"";
+ Buffer.add_string b new_position;
+ Buffer.add_string b "\";\n";
+ end;
+
+ let this_is_token =
+ match pat.pat_symbol with
+ U_symbol(_,_) -> pat.pat_modifier = Exact
+ | L_symbol(_,_,_) -> false
+ | L_indirect(_,_,_) -> false
+ in
+
+ (* First distinguish between Exact, Option, and Repetition: *)
+ begin match pat.pat_modifier with
+ Exact ->
+ process_symbol pat.pat_symbol previous_was_token false
+ | Option ->
+ begin match label_of_symbol tree pat.pat_symbol with
+ None ->
+ (* CASE: optional symbol without label *)
+ (* OPTIMIZATION: If the symbol is
+ * a token, the loop becomes very simple.
+ *)
+ if (match pat.pat_symbol with
+ U_symbol(t,_) -> not (is_typed tree t) | _ -> false)
+ then begin
+ let tok = match pat.pat_symbol with
+ U_symbol(t,_) -> t | _ -> assert false in
+ (* Optimized case *)
+ Buffer.add_string b "if ";
+ if previous_was_token then
+ Buffer.add_string b "yy_get_next()"
+ else
+ Buffer.add_string b "yy_current()";
+ Buffer.add_string b " = ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " then ignore(yy_get_next());\n";
+ end
+ else begin
+ (* General, non-optimized case: *)
+ Buffer.add_string b "( try (";
+ process_symbol pat.pat_symbol previous_was_token true;
+ Buffer.add_string b "ignore(yy_get_next());\n";
+ Buffer.add_string b ") with Not_found -> ());\n";
+ end
+ | Some l ->
+ (* CASE: optional symbol with label *)
+ if is_untyped_U_symbol tree pat.pat_symbol then begin
+ (* SUBCASE: The label becomes a boolean variable *)
+ Buffer.add_string b "let ";
+ Buffer.add_string b l;
+ Buffer.add_string b " = try (";
+ process_symbol pat.pat_symbol previous_was_token true;
+ Buffer.add_string b ");\n";
+ Buffer.add_string b "ignore(yy_get_next());\n";
+ Buffer.add_string b "true with Not_found -> false in\n";
+ end
+ else begin
+ (* SUBCASE: the symbol has a value *)
+ Buffer.add_string b "let ";
+ Buffer.add_string b l;
+ Buffer.add_string b " = try let yy_tok = Some(";
+ process_symbol pat.pat_symbol previous_was_token true;
+ Buffer.add_string b l;
+ Buffer.add_string b ") in\n";
+
+ if (match pat.pat_symbol with
+ U_symbol(_,_) -> true | _ -> false) then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+
+ Buffer.add_string b "yy_tok with Not_found -> None in\n";
+ end
+ end
+ | Repetition ->
+ begin match label_of_symbol tree pat.pat_symbol with
+ None ->
+ (* CASE: repeated symbol without label *)
+ (* OPTIMIZATION: If the symbol is
+ * a token, the loop becomes very simple.
+ *)
+ if (match pat.pat_symbol with
+ U_symbol(t,_) -> not (is_typed tree t) | _ -> false)
+ then begin
+ let tok = match pat.pat_symbol with
+ U_symbol(t,_) -> t | _ -> assert false in
+ if previous_was_token then begin
+ (* Optimized case I *)
+ Buffer.add_string b "while yy_get_next() = ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " do () done;\n";
+ end
+ else begin
+ (* Optimized case II *)
+ Buffer.add_string b "if yy_current() = ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " then (";
+ Buffer.add_string b "while yy_get_next() = ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " do () done);\n";
+ end
+ end
+ else begin
+ (* General, non-optimized case: *)
+ if previous_was_token then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+ Buffer.add_string b "( try while true do (";
+ process_symbol pat.pat_symbol false true;
+
+ if (match pat.pat_symbol with
+ U_symbol(_,_) -> true | _ -> false) then
+ Buffer.add_string b "ignore(yy_get_next());\n"
+ else
+ Buffer.add_string b "();\n";
+
+ Buffer.add_string b ") done with Not_found -> ());\n";
+ end
+ | Some l ->
+ (* CASE: repeated symbol with label *)
+ if is_untyped_U_symbol tree pat.pat_symbol then begin
+ (* SUBCASE: The label becomes an integer variable *)
+ if previous_was_token then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+ Buffer.add_string b "let yy_counter = ref 0 in\n";
+ Buffer.add_string b "( try while true do \n";
+ process_symbol pat.pat_symbol false true;
+ Buffer.add_string b "incr yy_counter;\n";
+
+ if (match pat.pat_symbol with
+ U_symbol(_,_) -> true | _ -> false) then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+
+ Buffer.add_string b "done with Not_found -> ());\n";
+ Buffer.add_string b "let ";
+ Buffer.add_string b l;
+ Buffer.add_string b " = !yy_counter in\n";
+ end
+ else begin
+ (* SUBCASE: the symbol has a value *)
+ if previous_was_token then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+ Buffer.add_string b "let yy_list = ref [] in\n";
+ Buffer.add_string b "( try while true do \n";
+ process_symbol pat.pat_symbol false true;
+ Buffer.add_string b "yy_list := ";
+ Buffer.add_string b l;
+ Buffer.add_string b " :: !yy_list;\n";
+
+ if (match pat.pat_symbol with
+ U_symbol(_,_) -> true | _ -> false) then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+
+ Buffer.add_string b "done with Not_found -> ());\n";
+ Buffer.add_string b "let ";
+ Buffer.add_string b l;
+ Buffer.add_string b " = List.rev !yy_list in\n";
+ end
+ end
+ end;
+
+ (* Continue: *)
+ (new_position, this_is_token)
+ in
+
+
+ let process_inner_branch current_position =
+ (* If there is "early code", run this now: *)
+ output_code b file_name branch.branch_early_code;
+ Buffer.add_string b "\n";
+
+ (* If the first symbol is a rule invocation, call the corresponding
+ * parser function now.
+ *)
+ let previous_was_token =
+ begin match branch.branch_selector with
+ U_symbol(_,_) ->
+ true
+ | L_symbol(called_rule, args, lab) ->
+ make_rule_invocation called_rule args lab true;
+ false
+ | L_indirect(_,_,_) ->
+ failwith("The first symbol in some rule is an indirect call; this is not allowed")
+ end
+ in
+
+ (* Now output the "let-CODE". *)
+ output_code b file_name branch.branch_binding_code;
+ Buffer.add_string b "\n";
+
+ (* Process the other symbols in turn: *)
+ let (_, previous_was_token') =
+ (List.fold_left
+ process_pattern
+ (current_position, previous_was_token)
+ branch.branch_pattern
+ )
+ in
+
+ (* Special case:
+ *
+ * If previous_was_token', we must invoke yy_get_next one more time.
+ * This is deferred until "CODE" is executed to give this code
+ * the chance to make the next token available (in XML, the next token
+ * might come from a different entity, and "CODE" must switch to this
+ * entity).
+ *)
+
+ (* Now output "CODE": *)
+ Buffer.add_string b "let result = \n";
+ output_code b file_name branch.branch_result_code;
+ Buffer.add_string b "\nin\n";
+
+ if previous_was_token' then
+ Buffer.add_string b "ignore(yy_get_next());\nresult\n"
+ else
+ Buffer.add_string b "result\n"
+ in
+
+ (* If we have a ? clause, generate now the "try" statement *)
+ match branch.branch_error_code with
+ None ->
+ Buffer.add_string b "( ";
+ process_inner_branch "";
+ Buffer.add_string b " )";
+ | Some code ->
+
+ (* let position = ref "" in *)
+
+ Buffer.add_string b "let yy_position = ref \"";
+ let current_position =
+ match branch.branch_selector with
+ U_symbol(_,_) -> ""
+ | L_symbol(_,_,None) -> ""
+ | L_symbol(_,_,Some l) -> l
+ | L_indirect(_,_,None) -> ""
+ | L_indirect(_,_,Some l) -> l
+ in
+ Buffer.add_string b current_position;
+ Buffer.add_string b "\" in\n";
+
+ (* The "try" statement: *)
+
+ Buffer.add_string b "( try (\n";
+
+ process_inner_branch current_position;
+
+ Buffer.add_string b "\n) with Parsing.Parse_error -> (\n";
+ output_code b file_name code;
+ Buffer.add_string b "\n))\n"
+;;
+
+
+let process b file_name tree =
+ (* Iterate over the rules and output the parser functions: *)
+ let is_first = ref true in
+ List.iter
+ (fun r ->
+
+ (* Generate the function header: *)
+
+ if !is_first then
+ Buffer.add_string b "let rec "
+ else
+ Buffer.add_string b "and ";
+ is_first := false;
+ Buffer.add_string b "parse_";
+ Buffer.add_string b r.rule_name;
+ Buffer.add_string b " yy_current yy_get_next";
+ List.iter
+ (fun arg -> Buffer.add_string b " ";
+ Buffer.add_string b arg)
+ r.rule_arguments;
+ Buffer.add_string b " =\n";
+
+ (* Generate the "match" statement: *)
+
+ Buffer.add_string b "match yy_current() with\n";
+ let s_done = ref [] in
+ (* s_done: The set of already matched tokens *)
+
+ List.iter
+ (fun branch ->
+ match branch.branch_selector with
+ U_symbol(tok, lab) ->
+ (* A simple token *)
+ if List.mem tok !s_done then begin
+ prerr_endline("WARNING: In rule `" ^ r.rule_name ^
+ "': Match for token `" ^
+ tok ^ "' hidden by previous match");
+ end
+ else
+ if is_typed tree tok then begin
+ match lab with
+ None ->
+ Buffer.add_string b "| ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " _ -> ";
+ process_branch b file_name tree branch;
+ Buffer.add_string b "\n";
+ s_done := tok :: !s_done;
+ | Some l ->
+ Buffer.add_string b "| ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " ";
+ Buffer.add_string b l;
+ Buffer.add_string b " -> ";
+ process_branch b file_name tree branch;
+ Buffer.add_string b "\n";
+ s_done := tok :: !s_done;
+ end
+ else begin
+ Buffer.add_string b "| ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " -> ";
+ process_branch b file_name tree branch;
+ Buffer.add_string b "\n";
+ s_done := tok :: !s_done;
+ end
+ | L_symbol(called_rule, args, lab) ->
+ (* An invocation of a rule *)
+ let s_rule = selector_set_of_rule tree called_rule in
+ let s_rule' =
+ List.filter
+ (fun tok ->
+ if List.mem tok !s_done then begin
+ prerr_endline("WARNING: In rule `" ^ r.rule_name ^
+ "': Match for token `" ^
+ tok ^ "' hidden by previous match");
+ false
+ end
+ else true)
+ s_rule in
+ if s_rule' <> [] then begin
+ Buffer.add_string b "| ( ";
+ let is_first = ref true in
+ List.iter
+ (fun tok ->
+ if not !is_first then
+ Buffer.add_string b " | ";
+ is_first := false;
+ Buffer.add_string b tok;
+ if is_typed tree tok then
+ Buffer.add_string b " _";
+ )
+ s_rule';
+ Buffer.add_string b ") -> ";
+ process_branch b file_name tree branch;
+ Buffer.add_string b "\n";
+ s_done := s_rule' @ !s_done;
+ end
+ | L_indirect(ml_name, args, lab) ->
+ (* An invocation of an indirect rule *)
+ failwith("The first symbol in rule `" ^ r.rule_name ^
+ "' is an indirect call; this is not allowed")
+ )
+ r.rule_branches;
+
+ Buffer.add_string b "\n| _ -> raise Not_found\n";
+ )
+ tree.text_rules;
+
+ Buffer.add_string b " in\n"
+;;
+
+
+let count_lines s =
+ (* returns number of lines in s, number of columns of the last line *)
+ let l = String.length s in
+
+ let rec count n k no_cr no_lf =
+ let next_cr =
+ if no_cr then
+ (-1)
+ else
+ try String.index_from s k '\013' with Not_found -> (-1) in
+ let next_lf =
+ if no_lf then
+ (-1)
+ else
+ try String.index_from s k '\010' with Not_found -> (-1) in
+ if next_cr >= 0 & (next_lf < 0 or next_cr < next_lf) then begin
+ if next_cr+1 < l & s.[next_cr+1] = '\010' then
+ count (n+1) (next_cr+2) false (next_lf < 0)
+ else
+ count (n+1) (next_cr+1) false (next_lf < 0)
+ end
+ else if next_lf >= 0 then begin
+ count (n+1) (next_lf+1) (next_cr < 0) false
+ end
+ else
+ n, (l - k)
+
+ in
+ count 0 0 false false
+;;
+
+
+type scan_context =
+ { mutable old_line : int;
+ mutable old_column : int;
+ mutable line : int;
+ mutable column : int;
+ }
+;;
+
+
+let rec next_token context lexbuf =
+ let t = Lexer.scan_file lexbuf in
+ let line = context.line in
+ let column = context.column in
+ context.old_line <- line;
+ context.old_column <- column;
+ let n_lines, n_columns = count_lines (Lexing.lexeme lexbuf) in
+ if n_lines > 0 then begin
+ context.line <- line + n_lines;
+ context.column <- n_columns;
+ end
+ else
+ context.column <- column + n_columns;
+ match t with
+ Space -> next_token context lexbuf
+ | Code(s,_,_) -> Code(s,line,column + 2)
+ | Eof -> failwith "Unexpected end of file"
+ | _ -> t
+;;
+
+
+let parse_and_generate ch =
+ let b = Buffer.create 20000 in
+
+ let rec find_sep context lexbuf =
+ let t = Lexer.scan_header lexbuf in
+ let line = context.line in
+ let column = context.column in
+ context.old_line <- line;
+ context.old_column <- column;
+ let n_lines, n_columns = count_lines (Lexing.lexeme lexbuf) in
+ if n_lines > 0 then begin
+ context.line <- line + n_lines;
+ context.column <- n_columns;
+ end
+ else
+ context.column <- column + n_columns;
+ match t with
+ Code(s,_,_) ->
+ Buffer.add_string b s;
+ find_sep context lexbuf
+ | Eof -> failwith "Unexpected end of file"
+ | Separator -> ()
+ | _ -> assert false
+ in
+
+ let rec find_rest context lexbuf =
+ let t = Lexer.scan_header lexbuf in
+ let line = context.line in
+ let column = context.column in
+ context.old_line <- line;
+ context.old_column <- column;
+ let n_lines, n_columns = count_lines (Lexing.lexeme lexbuf) in
+ if n_lines > 0 then begin
+ context.line <- line + n_lines;
+ context.column <- n_columns;
+ end
+ else
+ context.column <- column + n_columns;
+ match t with
+ Code(s,_,_) ->
+ Buffer.add_string b s;
+ find_rest context lexbuf
+ | Eof -> ()
+ | _ -> assert false
+ in
+
+ (* First read until '%%' *)
+ let lexbuf = Lexing.from_channel ch in
+ let context = { old_line = 0; old_column = 0; line = 1; column = 0 } in
+ let file_name = "stdin" in
+ try
+ output_code_location b file_name ("", 1, 0);
+ find_sep context lexbuf;
+ (* Parse the following text *)
+ let text = (Parser.text (next_token context) lexbuf : Ast.text) in
+ (* Process it: *)
+ process b file_name text;
+ (* Read rest *)
+ output_code_location b file_name ("", context.line, context.column);
+ find_rest context lexbuf;
+ (* Output everything: *)
+ print_string (Buffer.contents b)
+ with
+ any ->
+ Printf.eprintf
+ "Error at line %d column %d: %s\n"
+ context.old_line
+ context.old_column
+ (Printexc.to_string any);
+ exit 1
+;;
+
+
+parse_and_generate stdin;;
+exit 0;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.7 2000/08/17 00:33:02 gerd
+ * Bugfix: tok* and tok? work now if tok is an untyped token
+ * without label.
+ *
+ * Revision 1.6 2000/05/14 20:59:24 gerd
+ * Added "phantasy line numbers" to help finding errorneous locations.
+ *
+ * Revision 1.5 2000/05/14 20:41:58 gerd
+ * x: Token? means: if Token is detected x=true else x=false.
+ * x: Token* means: x becomes the number of ocurrences of Token.
+ *
+ * Revision 1.4 2000/05/09 00:03:22 gerd
+ * Added [ ml_name ] symbols, where ml_name is an arbitrary
+ * OCaml identifier.
+ *
+ * Revision 1.3 2000/05/08 22:03:01 gerd
+ * It is now possible to have a $ {{ }} sequence right BEFORE
+ * the first token. This code is executed just after the first token
+ * has been recognized.
+ *
+ * Revision 1.2 2000/05/06 21:51:08 gerd
+ * Numerous bugfixes.
+ *
+ * Revision 1.1 2000/05/06 17:36:17 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/m2parsergen/lexer.mll b/helm/DEVEL/pxp/pxp/m2parsergen/lexer.mll
new file mode 100644
index 000000000..a016897b2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/m2parsergen/lexer.mll
@@ -0,0 +1,93 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+{
+ open Parser
+}
+
+rule scan_file = parse
+ "/*" [^ '*']* ('*'+ [^ '/' '*'] [^ '*']* )* '*'* "*/"
+ { Space }
+ | "%token"
+ { Token }
+ | "<" [' ' '\t' '\r' '\n']* ">"
+ { Type
+ }
+ | [ 'a'-'z' ] [ 'a'-'z' 'A'-'Z' '0'-'9' '_' ]*
+ { let s = Lexing.lexeme lexbuf in
+ Lname s
+ }
+ | [ 'A'-'Z' ] [ 'a'-'z' 'A'-'Z' '0'-'9' '_' ]*
+ { let s = Lexing.lexeme lexbuf in
+ Uname s
+ }
+ | "%%"
+ { Separator }
+ | "("
+ { Lparen }
+ | ","
+ { Comma }
+ | ")"
+ { Rparen }
+ | "["
+ { Lbracket }
+ | "]"
+ { Rbracket }
+ | ":"
+ { Colon }
+ | "{{" [^ '}']* ( '}' [^ '}']+ )* "}}"
+ { let s = Lexing.lexeme lexbuf in
+ Code (String.sub s 2 (String.length s - 4), 0, 0)
+ }
+ | "?"
+ { Error }
+ | "|"
+ { Alt }
+ | "+"
+ { Loop_plus }
+ | "*"
+ { Loop_star }
+ | [' ' '\t' '\r' '\n']+
+ { Space }
+ | "$"
+ { Dollar }
+ | eof
+ { Eof }
+
+and scan_header = parse
+ "%%"
+ { Separator }
+ | "%"
+ { Code("%", 0, 0) }
+ | [^ '%']*
+ { Code(Lexing.lexeme lexbuf, 0, 0) }
+ | eof
+ { Eof }
+
+and scan_rest = parse
+ _*
+ { Code(Lexing.lexeme lexbuf, 0, 0) }
+ | eof
+ { Eof }
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/05/09 00:03:22 gerd
+ * Added [ ml_name ] symbols, where ml_name is an arbitrary
+ * OCaml identifier.
+ *
+ * Revision 1.2 2000/05/06 21:51:24 gerd
+ * New symbol Dollar.
+ *
+ * Revision 1.1 2000/05/06 17:36:17 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/m2parsergen/parser.mly b/helm/DEVEL/pxp/pxp/m2parsergen/parser.mly
new file mode 100644
index 000000000..7497c3a93
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/m2parsergen/parser.mly
@@ -0,0 +1,194 @@
+/* $Id$
+ * ----------------------------------------------------------------------
+ *
+ */
+
+%{
+ open Ast
+
+%}
+
+%token Space
+%token Token
+%token Type
+%token Lname
+%token Uname
+%token Separator
+%token Lparen
+%token Rparen
+%token Comma
+%token Colon
+%token Code
+%token Error
+%token Alt
+%token Loop_plus
+%token Loop_star
+%token Dollar
+%token Lbracket
+%token Rbracket%token Eof
+
+%start text
+%type text
+
+%%
+
+text:
+ declarations rules
+ { { text_decls = $1; text_rules = $2; } }
+
+declarations:
+ declaration declarations
+ { $1 :: $2 }
+| Separator
+ { [] }
+
+declaration:
+ Token Uname
+ { D_token $2 }
+| Token Type Uname
+ { D_typed_token $3 }
+
+rules:
+ rule rules
+ { $1 :: $2 }
+| Separator
+ { [] }
+
+rule:
+ Lname Lparen formal_arguments Colon branches
+ { { rule_name = $1;
+ rule_arguments = $3;
+ rule_branches = $5;
+ }
+ }
+
+formal_arguments:
+ Rparen
+ { [] }
+| Lname comma_formal_arguments
+ { $1 :: $2 }
+
+comma_formal_arguments:
+ Comma Lname comma_formal_arguments
+ { $2 :: $3 }
+| Rparen
+ { [] }
+
+branches:
+ branch alt_branches
+ { $1 :: $2 }
+
+alt_branches:
+ Alt branch alt_branches
+ { $2 :: $3 }
+|
+ { [] }
+
+branch:
+ simple_branch
+ { $1 }
+| Dollar Code simple_branch
+ { { $3 with branch_early_code = $2 } }
+
+simple_branch:
+ symbol Dollar Code patterns Code opt_error_handler
+ { { branch_selector = $1;
+ branch_early_code = ("",0,0);
+ branch_binding_code = $3;
+ branch_pattern = $4;
+ branch_result_code = $5;
+ branch_error_code = $6;
+ }
+ }
+| symbol patterns Code opt_error_handler
+ { { branch_selector = $1;
+ branch_early_code = ("",0,0);
+ branch_binding_code = ("", 0, 0);
+ branch_pattern = $2;
+ branch_result_code = $3;
+ branch_error_code = $4;
+ }
+ }
+
+patterns:
+ pattern patterns
+ { $1 :: $2 }
+|
+ { [] }
+
+pattern:
+ symbol Loop_star
+ { { pat_symbol = $1;
+ pat_modifier = Repetition;
+ }
+ }
+| symbol Error
+ { { pat_symbol = $1;
+ pat_modifier = Option;
+ }
+ }
+| symbol
+ { { pat_symbol = $1;
+ pat_modifier = Exact;
+ }
+ }
+
+symbol:
+ Lname Colon Uname
+ { U_symbol($3, Some $1) }
+| Lname Colon Lname Lparen actual_arguments
+ { L_symbol($3, $5, Some $1) }
+| Lname Colon Lbracket Lname Rbracket Lparen actual_arguments
+ { L_indirect($4, $7, Some $1) }
+| Uname
+ { U_symbol($1, None) }
+| Lname Lparen actual_arguments
+ { L_symbol($1, $3, None) }
+| Lbracket Lname Rbracket Lparen actual_arguments
+ { L_indirect($2, $5, None) }
+
+
+actual_arguments:
+ Rparen
+ { [] }
+| Lname comma_actual_arguments
+ { $1 :: $2 }
+
+comma_actual_arguments:
+ Rparen
+ { [] }
+| Comma Lname comma_actual_arguments
+ { $2 :: $3 }
+
+opt_error_handler:
+ Error Code
+ { Some $2 }
+|
+ { None }
+
+%%
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.4 2000/05/09 00:03:22 gerd
+ * Added [ ml_name ] symbols, where ml_name is an arbitrary
+ * OCaml identifier.
+ *
+ * Revision 1.3 2000/05/08 22:03:01 gerd
+ * It is now possible to have a $ {{ }} sequence right BEFORE
+ * the first token. This code is executed just after the first token
+ * has been recognized.
+ *
+ * Revision 1.2 2000/05/06 21:51:46 gerd
+ * New Dollar tag.
+ *
+ * Revision 1.1 2000/05/06 17:36:17 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/m2parsergen/x.m2y b/helm/DEVEL/pxp/pxp/m2parsergen/x.m2y
new file mode 100644
index 000000000..3fa80b070
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/m2parsergen/x.m2y
@@ -0,0 +1,45 @@
+
+type token =
+ A | B | C of int | EOF
+;;
+
+%%
+
+%token A
+%token B
+%token <> C
+%token EOF
+
+%%
+
+r():
+ one:s()
+ {{ }}
+ b:B
+ two:B?
+ three:s()
+ {{ prerr_endline ("Result: " ^ string_of_int three) }}
+? {{ prerr_endline ("ERROR: " ^ !yy_position) }}
+
+s():
+ A
+ {{ }}
+ {{ prerr_endline "A"; 0 }}
+| B
+ {{ }}
+ {{ prerr_endline "B"; 0 }}
+| n:C
+ {{ }}
+ {{ prerr_endline ("C: " ^ string_of_int n); n }}
+%%
+
+let input = ref [ A; B; B; B; C 5; EOF ] in
+let current() = List.hd !input in
+let next_token () =
+ prerr_endline "get_next";
+ input := List.tl !input;
+ List.hd !input
+in
+parse_r current next_token
+;;
+
diff --git a/helm/DEVEL/pxp/pxp/pxp_aux.ml b/helm/DEVEL/pxp/pxp/pxp_aux.ml
new file mode 100644
index 000000000..aa1212b6f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/pxp_aux.ml
@@ -0,0 +1,651 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ * Some auxiliary functions
+ *)
+
+(**********************************************************************)
+(* Lexing *)
+
+
+open Pxp_types
+open Pxp_lexer_types
+open Pxp_lexers
+open Netconversion
+
+let character enc warner k =
+ assert (k>=0);
+ if (k >= 0xd800 & k < 0xe000) or (k >= 0xfffe & k <= 0xffff) or k > 0x10ffff
+ or (k < 8) or (k = 11) or (k = 12) or (k >= 14 & k <= 31)
+ then
+ raise (WF_error("Code point " ^ string_of_int k ^
+ " outside the accepted range of code points"));
+
+ try
+ makechar (enc : rep_encoding :> encoding) k
+ with
+ Not_found ->
+ warner # warn ("Code point cannot be represented in internal encoding: "
+ ^ string_of_int k);
+ ""
+;;
+
+
+let check_name warner name =
+ (* produces a warning for names beginning with "xml". *)
+ if String.length name >= 3 then begin
+ match String.sub name 0 3 with
+ ("xml" | "xmL" | "xMl" | "xML" | "Xml" | "XmL" | "XMl" | "XML") ->
+ warner # warn ("Name is reserved for future extensions: " ^ name)
+ | _ ->
+ ()
+ end
+;;
+
+
+let tokens_of_content_string lexerset s =
+ (* tokenizes general entities and character entities *)
+ let lexbuf = Lexing.from_string s in
+ let rec next_token () =
+ match lexerset.scan_content_string lexbuf with
+ Eof -> []
+ | tok -> tok :: next_token()
+ in
+ next_token()
+;;
+
+
+let rec expand_attvalue_with_rec_check lexerset dtd s warner entities norm_crlf =
+ (* recursively expands general entities and character entities;
+ * checks "standalone" document declaration;
+ * normalizes whitespace
+ *)
+ let toklist = tokens_of_content_string lexerset s in
+ let rec expand tl =
+ match tl with
+ [] -> ""
+ | ERef n :: tl' ->
+ if List.mem n entities then
+ raise(WF_error("Recursive reference to general entity `" ^ n ^ "'"));
+ let en, extdecl = dtd # gen_entity n in
+ if dtd # standalone_declaration && extdecl then
+ raise(Validation_error("Reference to entity `" ^ n ^
+ "' violates standalone declaration"));
+ let rtext, rtext_contains_ext_refs = en # replacement_text in
+ if rtext_contains_ext_refs then
+ raise(Validation_error("Found reference to external entity in attribute value"));
+ expand_attvalue_with_rec_check
+ lexerset dtd rtext warner (n :: entities) false ^ expand tl'
+ | CRef(-1) :: tl' ->
+ if norm_crlf then
+ " " ^ expand tl'
+ else
+ " " ^ expand tl'
+ | CRef n :: tl' ->
+ character lexerset.lex_encoding warner n ^ expand tl'
+ | CharData "<" :: tl' ->
+ raise
+ (WF_error
+ ("Attribute value contains character '<' literally"))
+ | CharData x :: tl' ->
+ x ^ expand tl'
+ | _ -> assert false
+ in
+ expand toklist
+;;
+
+
+let expand_attvalue lexerset dtd s warner norm_crlf =
+ (* norm_crlf: whether the sequence CRLF is recognized as one character or
+ * not (i.e. two characters)
+ *)
+ expand_attvalue_with_rec_check lexerset dtd s warner [] norm_crlf
+;;
+
+
+let count_lines s =
+ (* returns number of lines in s, number of columns of the last line *)
+ let l = String.length s in
+
+ let rec count n k no_cr no_lf =
+ let next_cr =
+ if no_cr then
+ (-1)
+ else
+ try String.index_from s k '\013' with Not_found -> (-1) in
+ let next_lf =
+ if no_lf then
+ (-1)
+ else
+ try String.index_from s k '\010' with Not_found -> (-1) in
+ if next_cr >= 0 & (next_lf < 0 or next_cr < next_lf) then begin
+ if next_cr+1 < l & s.[next_cr+1] = '\010' then
+ count (n+1) (next_cr+2) false (next_lf < 0)
+ else
+ count (n+1) (next_cr+1) false (next_lf < 0)
+ end
+ else if next_lf >= 0 then begin
+ count (n+1) (next_lf+1) (next_cr < 0) false
+ end
+ else
+ n, (l - k)
+
+ in
+ count 0 0 false false
+;;
+
+
+let tokens_of_xml_pi lexers s =
+ let lexbuf = Lexing.from_string (s ^ " ") in
+ let rec collect () =
+ let t = lexers.scan_xml_pi lexbuf in
+ match t with
+ Pro_eof -> []
+ | _ -> t :: collect()
+ in
+ collect()
+;;
+
+
+let decode_xml_pi pl =
+ (* 'pl' must consist of name="value" or name='value' pairs which are returned
+ * as list of pairs.
+ * The "value" is returned as it is; no substitution of &entities; happens.
+ *)
+ let rec decode pl =
+ match pl with
+ Pro_name name :: Pro_eq :: Pro_string value :: pl' ->
+ (name, value) :: decode pl'
+ | [] ->
+ []
+ | _ ->
+ raise (WF_error("Bad XML processing instruction"))
+ in
+ decode pl
+;;
+
+
+let decode_doc_xml_pi pl =
+ match pl with
+ [ "version", v ] -> (v, None, None)
+ | [ "version", v; "encoding", e ] -> (v, Some e, None)
+ | [ "version", v; "standalone", s ] -> (v, None, Some s)
+ | [ "version", v; "encoding", e; "standalone", s ] -> (v, Some e, Some s)
+ | _ ->
+ raise(WF_error("Bad XML declaration"))
+;;
+
+
+let check_text_xml_pi pl =
+ match pl with
+ | [ "version", v; "encoding", e ] -> ()
+ | [ "encoding", e ] -> ()
+ | _ ->
+ raise(WF_error("Bad XML declaration"))
+;;
+
+
+let check_version_num s =
+ let l = String.length s in
+ for i = 0 to l - 1 do
+ match s.[i] with
+ ('a'..'z'|'A'..'Z'|'0'..'9'|
+ '-'|'_'|'.'|':') -> ()
+ | _ ->
+ raise(WF_error("Bad XML version string"))
+ done
+;;
+
+
+let check_public_id s =
+ let l = String.length s in
+ for i = 0 to l - 1 do
+ match s.[i] with
+ (' '|'\013'|'\010'|'a'..'z'|'A'..'Z'|'0'..'9'|
+ '-'|'\''|'('|')'|'+'|','|'.'|'/'|':'|'='|'?'|
+ ';'|'!'|'*'|'#'|'@'|'$'|'_'|'%') -> ()
+ | _ ->
+ raise(WF_error("Illegal character in PUBLIC identifier"))
+ done
+;;
+
+
+(**********************************************************************)
+(* list functions *)
+
+
+let rec check_dups l =
+ match l with
+ [] -> false
+ | c :: l' ->
+ if List.mem c l' then true else check_dups l'
+;;
+
+
+let rec count pred l =
+ match l with
+ [] -> 0
+ | x :: l' ->
+ if pred x then 1 + (count pred l') else count pred l'
+;;
+
+
+(**********************************************************************)
+(* attributes *)
+
+let check_attribute_value_lexically lexerset x t v =
+ (* raises x if the attribute value v does not match the lexical rules
+ * for attribute type t:
+ * - t = A_id: v must be a
+ * - t = A_idref: v must match
+ * - t = A_idrefs: v must match
+ * - t = A_entity: v must match
+ * - t = A_entities: v must match
+ * - t = A_nmtoken: v must match
+ * - t = A_nmtokens: v must match
+ * - t = A_notation _: v must match
+ * - t = A_enum _: v must match
+ * - t = A_cdata: not checked
+ *)
+ let lexbuf = Lexing.from_string v in
+ let rec get_name_list() =
+ match lexerset.scan_name_string lexbuf with
+ Eof -> []
+ | Ignore -> get_name_list()
+ | tok -> tok :: get_name_list()
+ in
+ let l = get_name_list() in
+ match t with
+ (A_id | A_idref | A_entity | A_notation _) ->
+ begin match l with
+ [ Name n ] -> ()
+ | _ -> raise (Lazy.force x)
+ end
+ | (A_idrefs | A_entities) ->
+ if List.exists (fun tok ->
+ match tok with
+ Name _ -> false
+ | _ -> true) l then
+ raise (Lazy.force x)
+ | (A_nmtoken | A_enum _) ->
+ begin match l with
+ [ Name n ] -> ()
+ | [ Nametoken n ] -> ()
+ | _ -> raise (Lazy.force x)
+ end
+ | A_nmtokens ->
+ if List.exists (fun tok ->
+ match tok with
+ Name _ -> false
+ | Nametoken _ -> false
+ | _ -> true
+ ) l then
+ raise (Lazy.force x)
+ | _ -> ()
+;;
+
+
+let split_attribute_value lexerset v =
+ (* splits 'v' into a list of names or nmtokens. The white space separating
+ * the names/nmtokens in 'v' is suppressed and not returned.
+ *)
+ let lexbuf = Lexing.from_string v in
+ let rec get_name_list() =
+ match lexerset.scan_name_string lexbuf with
+ Eof -> []
+ | Ignore -> get_name_list()
+ | Name s -> s :: get_name_list()
+ | Nametoken s -> s :: get_name_list()
+ | _ -> raise(Validation_error("Illegal attribute value"))
+ in
+ get_name_list()
+;;
+
+
+let normalize_line_separators lexerset s =
+ let lexbuf = Lexing.from_string s in
+ let rec get_string() =
+ match lexerset.scan_for_crlf lexbuf with
+ Eof -> ""
+ | CharData s -> s ^ get_string()
+ | _ -> assert false
+ in
+ get_string()
+;;
+
+
+let value_of_attribute lexerset dtd n atype v =
+ (* The attribute with name 'n', type 'atype' and string value 'v' is
+ * decomposed, and the att_value is returned:
+ * - It is checked whether 'v' conforms to the lexical rules for attributes
+ * of type 'atype'
+ * - If 'atype <> A_cdata', leading and trailing spaces are removed from 'v'.
+ * - If 'atype = A_notation d', it is checked if 'v' matches one of the
+ * notation names contained in d.
+ * - If 'atype = A_enum d', it is checked whether 'v' matches one of the
+ * tokens from d
+ * - If 'atype' refers to a "single-value" type, the value is retured as
+ * Value u, where u is the normalized value. If 'atype' refers to a
+ * "list" type, the value if returned as Valuelist l, where l contains
+ * the tokens.
+ *
+ * Note that this function does not implement all normalization rules.
+ * It is expected that the string passed as 'v' is already preprocessed;
+ * i.e. character and entity references are resolved, and the substitution
+ * of white space characters by space characters has already been performed.
+ * If these requirements are met, the value returned by this function
+ * will be perfectly normalized.
+ *
+ * Further checks:
+ * - ENTITY and ENTITIES values: It is checked whether there is an
+ * unparsed general entity
+ * [ Other checks planned: ID, IDREF, IDREFS but not yet implemented ]
+ *)
+
+ let lexical_error() =
+ lazy (raise(Validation_error("Attribute `" ^ n ^ "' is lexically malformed"))) in
+
+ let remove_leading_and_trailing_spaces u =
+ (* Precondition: 'u' matches or *)
+ match split_attribute_value lexerset u with
+ [ u' ] -> u'
+ | _ -> assert false
+ in
+
+ let check_ndata_entity u =
+ let en, extdecl = dtd # gen_entity u in (* or Validation_error *)
+ if not (en # is_ndata) then
+ raise(Validation_error("Reference to entity `" ^ u ^
+ "': NDATA entity expected"));
+ if dtd # standalone_declaration && extdecl then
+ raise(Validation_error("Reference to entity `" ^ u ^
+ "' violates standalone declaration"));
+ in
+
+ match atype with
+ A_cdata ->
+ Value v
+
+ | (A_id | A_idref | A_nmtoken) ->
+ check_attribute_value_lexically lexerset (lexical_error()) atype v;
+ Value (remove_leading_and_trailing_spaces v)
+ | A_entity ->
+ check_attribute_value_lexically lexerset (lexical_error()) atype v;
+ let v' = remove_leading_and_trailing_spaces v in
+ check_ndata_entity v';
+ Value v'
+
+ | (A_idrefs | A_nmtokens) ->
+ check_attribute_value_lexically lexerset (lexical_error()) atype v;
+ Valuelist (split_attribute_value lexerset v)
+
+ | A_entities ->
+ check_attribute_value_lexically lexerset (lexical_error()) atype v;
+ let l = split_attribute_value lexerset v in
+ List.iter check_ndata_entity l;
+ Valuelist l
+
+ | A_notation nl ->
+ check_attribute_value_lexically lexerset (lexical_error()) atype v;
+ let v' = remove_leading_and_trailing_spaces v in
+ if not (List.mem v' nl) then
+ raise(Validation_error
+ ("Attribute `" ^ n ^
+ "' does not match one of the declared notation names"));
+ Value v'
+
+ | A_enum enuml ->
+ check_attribute_value_lexically lexerset (lexical_error()) atype v;
+ let v' = remove_leading_and_trailing_spaces v in
+ if not (List.mem v' enuml) then
+ raise(Validation_error
+ ("Attribute `" ^ n ^
+ "' does not match one of the declared enumerator tokens"));
+ Value v'
+;;
+
+
+let normalization_changes_value lexerset atype v =
+ (* Returns true if:
+ * - 'atype' is a "single-value" type, and the normalization of the string
+ * value 'v' of this type discards leading and/or trailing spaces
+ * - 'atype' is a "list" type, and the normalization of the string value
+ * 'v' of this type discards leading and/or trailing spaces, or spaces
+ * separating the tokens of the list (i.e. the normal form is that
+ * the tokens are separated by exactly one space character).
+ *
+ * Note: It is assumed that TABs, CRs, and LFs in 'v' are already converted
+ * to spaces.
+ *)
+
+ match atype with
+ A_cdata ->
+ false
+
+ | (A_id | A_idref | A_entity | A_nmtoken | A_notation _ | A_enum _) ->
+ (* Return 'true' if the first or last character is a space.
+ * The following check works for both ISO-8859-1 and UTF-8.
+ *)
+ v <> "" && (v.[0] = ' ' || v.[String.length v - 1] = ' ')
+
+ | (A_idrefs | A_entities | A_nmtokens) ->
+ (* Split the list, and concatenate the tokens as required by
+ * the normal form. Return 'true' if this operation results in
+ * a different string than 'v'.
+ * This check works for both ISO-8859-1 and UTF-8.
+ *)
+ let l = split_attribute_value lexerset v in
+ let v' = String.concat " " l in
+ v <> v'
+;;
+
+
+(**********************************************************************)
+
+let write_markup_string ~(from_enc:rep_encoding) ~to_enc os s =
+ (* Write the 'from_enc'-encoded string 's' as 'to_enc'-encoded string to
+ * 'os'. All characters are written as they are.
+ *)
+ let s' =
+ if to_enc = (from_enc :> encoding)
+ then s
+ else recode_string
+ ~in_enc:(from_enc :> encoding)
+ ~out_enc:to_enc
+ ~subst:(fun n ->
+ failwith
+ ("Pxp_aux.write_markup_string: Cannot represent " ^
+ "code point " ^ string_of_int n))
+ s
+ in
+ write os s' 0 (String.length s')
+;;
+
+
+let write_data_string ~(from_enc:rep_encoding) ~to_enc os content =
+ (* Write the 'from_enc'-encoded string 's' as 'to_enc'-encoded string to
+ * 'os'. The characters '&', '<', '>', '"', '%' and every character that
+ * cannot be represented in 'to_enc' are paraphrased as entity reference
+ * "&...;".
+ *)
+ let convert_ascii s =
+ (* Convert the ASCII-encoded string 's'. Note that 'from_enc' is
+ * always ASCII-compatible
+ *)
+ if to_enc = (from_enc :> encoding)
+ then s
+ else
+ recode_string
+ ~in_enc:(from_enc :> encoding)
+ ~out_enc:to_enc
+ ~subst:(fun n -> assert false)
+ s
+ in
+
+ let write_ascii s =
+ (* Write the ASCII-encoded string 's' *)
+ let s' = convert_ascii s in
+ write os s' 0 (String.length s')
+ in
+
+ let write_part j l =
+ (* Writes the substring of 'content' beginning at pos 'j' with length 'l'
+ *)
+ if to_enc = (from_enc :> encoding) then
+ write os content j l
+ else begin
+ let s' = recode_string
+ ~in_enc:(from_enc :> encoding)
+ ~out_enc:to_enc
+ ~subst:(fun n ->
+ convert_ascii ("" ^ string_of_int n ^ ";"))
+ (String.sub content j l)
+ in
+ write os s' 0 (String.length s')
+ end
+ in
+
+ let i = ref 0 in
+ for k = 0 to String.length content - 1 do
+ match content.[k] with
+ ('&' | '<' | '>' | '"' | '%') as c ->
+ if !i < k then
+ write_part !i (k - !i);
+ begin match c with
+ '&' -> write_ascii "&"
+ | '<' -> write_ascii "<"
+ | '>' -> write_ascii ">"
+ | '"' -> write_ascii """
+ | '%' -> write_ascii "%" (* reserved in DTDs *)
+ | _ -> assert false
+ end;
+ i := k+1
+ | _ -> ()
+ done;
+ if !i < String.length content then
+ write_part !i (String.length content - !i)
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.6 2000/08/14 22:24:55 gerd
+ * Moved the module Pxp_encoding to the netstring package under
+ * the new name Netconversion.
+ *
+ * Revision 1.5 2000/07/25 00:30:01 gerd
+ * Added support for pxp:dtd PI options.
+ *
+ * Revision 1.4 2000/07/16 18:31:09 gerd
+ * The exception Illegal_character has been dropped.
+ *
+ * Revision 1.3 2000/07/16 16:33:57 gerd
+ * New function write_markup_string: Handles the encoding
+ * of the string.
+ *
+ * Revision 1.2 2000/07/08 22:15:45 gerd
+ * [Merging 0.2.10:] write_data_string: The character '%' is special, too.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_aux.ml:
+ *
+ * Revision 1.12 2000/05/27 19:08:30 gerd
+ * Added functionality to check standalone declaration:
+ *
+ * expand_attvalue: Checks whether included entities violate the
+ * stand-alone declaration.
+ *
+ * value_of_attribute: Checks whether ENTITY/ENTITIES values violate
+ * this declaration. (Furthermore, it is checked whether the NDATA
+ * entity exists - this has been forgotten in previous versions.)
+ *
+ * value_of_attribute/check_attribute_value_lexically: improved.
+ *
+ * New function normalization_changes_value: helps detecting
+ * one case which violates the standalone declaration.
+ *
+ * Revision 1.11 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.10 2000/05/01 20:41:56 gerd
+ * New function write_data_string.
+ *
+ * Revision 1.9 2000/04/30 18:11:31 gerd
+ * New function normalize_line_separators.
+ * In function expand_attvalue: New argument norm_crlf. If the attvalue
+ * is read directly from a file, the sequence CR LF must be converted to a
+ * single space. If the attvalue is read from a replacement text, CR LF has
+ * already converted to a single LF, and CR LF, if still occurring, must be
+ * converted to two spaces. The caller can indicate the case by passing
+ * true/false as norm_crlf.
+ *
+ * Revision 1.8 1999/09/01 22:51:07 gerd
+ * Added functions.
+ * 'character' raises Illegal_character if characters are found that
+ * do not match the production Char.
+ *
+ * Revision 1.7 1999/09/01 16:17:37 gerd
+ * Added function 'check_name'.
+ *
+ * Revision 1.6 1999/08/15 20:33:19 gerd
+ * Added: a function that checks public identifiers. Only certain
+ * characters may occur in these identifiers.
+ * Control characters are rejected by the "character" function.
+ * Bugfix: recursive entity references are detected in attribute
+ * expansion
+ *
+ * Revision 1.5 1999/08/15 02:18:02 gerd
+ * That '<' is not allowed in attribute values, is a violation
+ * of well-formedness, not of the validity; so WF_error is raised.
+ *
+ * Revision 1.4 1999/08/15 00:20:37 gerd
+ * When expanding attribute values, references to parameter
+ * entities are now resolved by the method "replacement_text" which
+ * has an additional return value, and no longer by "attlist_replacement_text".
+ * The new return value indicates whether references to external entities
+ * have been resolved (directly or indirectly); this is allowed at some
+ * locations but not in attribute values.
+ *
+ * Revision 1.3 1999/08/14 22:05:53 gerd
+ * Several functions have now a "warner" as argument which is
+ * an object with a "warn" method. This is used to warn about characters
+ * that cannot be represented in the Latin 1 alphabet.
+ *
+ * Revision 1.2 1999/08/10 21:35:06 gerd
+ * The XML/encoding declaration at the beginning of entities is
+ * evaluated. In particular, entities have now a method "xml_declaration"
+ * which returns the name/value pairs of such a declaration. The "encoding"
+ * setting is interpreted by the entity itself; "version", and "standalone"
+ * are interpreted by Markup_yacc.parse_document_entity. Other settings
+ * are ignored (this does not conform to the standard; the standard prescribes
+ * that "version" MUST be given in the declaration of document; "standalone"
+ * and "encoding" CAN be declared; no other settings are allowed).
+ * TODO: The user should be warned if the standard is not exactly
+ * fulfilled. -- The "standalone" property is not checked yet.
+ *
+ * Revision 1.1 1999/08/10 00:35:50 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_codewriter.ml b/helm/DEVEL/pxp/pxp/pxp_codewriter.ml
new file mode 100644
index 000000000..a6ab0db41
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/pxp_codewriter.ml
@@ -0,0 +1,518 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+open Pxp_document
+open Pxp_yacc
+open Pxp_dtd
+open Pxp_types
+
+let write_expr_ext_id out extid =
+ match extid with
+ System s ->
+ output_string out ("(Pxp_types.System\"" ^ String.escaped s ^ "\")")
+ | Public(s,t) ->
+ output_string out ("(Pxp_types.Public(\"" ^ String.escaped s ^
+ "\",\"" ^
+ String.escaped t ^ "\"))")
+ | Anonymous ->
+ output_string out "Pxp_types.Anonymous"
+;;
+
+
+let rec write_expr_content_model out cm =
+ match cm with
+ Unspecified -> output_string out "Pxp_types.Unspecified"
+ | Empty -> output_string out "Pxp_types.Empty"
+ | Any -> output_string out "Pxp_types.Any"
+ | Mixed msl -> output_string out "(Pxp_types.Mixed [";
+ List.iter
+ (fun ms ->
+ write_expr_mixed_spec out ms;
+ output_string out "; ";
+ )
+ msl;
+ output_string out "])";
+ | Regexp re -> output_string out "(Pxp_types.Regexp ";
+ write_expr_regexp_spec out re;
+ output_string out ")";
+
+and write_expr_mixed_spec out ms =
+ match ms with
+ MPCDATA -> output_string out "Pxp_types.MPCDATA"
+ | MChild s -> output_string out ("(Pxp_types.MChild \"" ^
+ String.escaped s ^ "\")")
+
+and write_expr_regexp_spec out re =
+ match re with
+ Optional re' -> output_string out "(Pxp_types.Optional ";
+ write_expr_regexp_spec out re';
+ output_string out ")";
+ | Repeated re' -> output_string out "(Pxp_types.Repeated ";
+ write_expr_regexp_spec out re';
+ output_string out ")";
+ | Repeated1 re' -> output_string out "(Pxp_types.Repeated1 ";
+ write_expr_regexp_spec out re';
+ output_string out ")";
+ | Alt rel -> output_string out "(Pxp_types.Alt [";
+ List.iter
+ (fun re' ->
+ write_expr_regexp_spec out re';
+ output_string out "; ";
+ )
+ rel;
+ output_string out "])";
+ | Seq rel -> output_string out "(Pxp_types.Seq [";
+ List.iter
+ (fun re' ->
+ write_expr_regexp_spec out re';
+ output_string out "; ";
+ )
+ rel;
+ output_string out "])";
+ | Child s -> output_string out ("(Pxp_types.Child \"" ^
+ String.escaped s ^ "\")")
+;;
+
+
+let write_expr_att_type out at =
+ match at with
+ A_cdata -> output_string out "Pxp_types.A_cdata"
+ | A_id -> output_string out "Pxp_types.A_id"
+ | A_idref -> output_string out "Pxp_types.A_idref"
+ | A_idrefs -> output_string out "Pxp_types.A_idrefs"
+ | A_entity -> output_string out "Pxp_types.A_entity"
+ | A_entities -> output_string out "Pxp_types.A_entities"
+ | A_nmtoken -> output_string out "Pxp_types.A_nmtoken"
+ | A_nmtokens -> output_string out "Pxp_types.A_nmtokens"
+ | A_notation sl -> output_string out "(Pxp_types.A_notation [";
+ List.iter
+ (fun s ->
+ output_string out ("\"" ^
+ String.escaped s ^ "\"; "))
+ sl;
+ output_string out "])";
+ | A_enum sl -> output_string out "(Pxp_types.A_enum [";
+ List.iter
+ (fun s ->
+ output_string out ("\"" ^
+ String.escaped s ^ "\"; "))
+ sl;
+ output_string out "])";
+;;
+
+
+let write_expr_att_default out ad =
+ match ad with
+ D_required -> output_string out "Pxp_types.D_required"
+ | D_implied -> output_string out "Pxp_types.D_implied"
+ | D_default s -> output_string out ("(Pxp_types.D_default \"" ^
+ String.escaped s ^ "\")")
+ | D_fixed s -> output_string out ("(Pxp_types.D_fixed \"" ^
+ String.escaped s ^ "\")")
+;;
+
+
+let write_expr_att_value out av =
+ match av with
+ Value s -> output_string out ("(Pxp_types.Value \"" ^
+ String.escaped s ^ "\")")
+ | Valuelist sl -> output_string out ("(Pxp_types.Valuelist [");
+ List.iter
+ (fun s ->
+ output_string out ("\"" ^ String.escaped s ^
+ "\"; ")
+ )
+ sl;
+ output_string out "])";
+ | Implied_value -> output_string out "Pxp_types.Implied_value"
+;;
+
+
+let ocaml_encoding enc =
+ match enc with
+ `Enc_utf8 -> "`Enc_utf8"
+ | `Enc_utf16 -> "`Enc_utf16"
+ | `Enc_utf16_le -> "`Enc_utf16_le"
+ | `Enc_utf16_be -> "`Enc_utf16_be"
+ | `Enc_iso88591 -> "`Enc_iso88591"
+;;
+
+
+let write_expr_new_pi out pi =
+ output_string out ("(new Pxp_dtd.proc_instruction \"" ^
+ String.escaped(pi # target) ^ "\" \"" ^
+ String.escaped(pi # value) ^ "\" " ^
+ ocaml_encoding(pi # encoding) ^ ")")
+;;
+
+
+let write_expr_node_type out nt =
+ match nt with
+ T_data -> output_string out "Pxp_document.T_data"
+ | T_element s -> output_string out ("(Pxp_document.T_element \"" ^
+ String.escaped s ^ "\")")
+ | T_super_root -> output_string out "Pxp_document.T_super_root"
+ | T_pinstr s -> output_string out ("(Pxp_document.T_pinstr \"" ^
+ String.escaped s ^ "\")")
+ | T_comment -> output_string out "Pxp_document.T_comment"
+ | _ -> assert false
+;;
+
+
+let write_local_dtd out (dtd : dtd) =
+ (* Outputs "let mkdtd warner = ... in" to 'out' *)
+ output_string out "let mkdtd warner =\n";
+ output_string out ("let encoding = " ^ ocaml_encoding (dtd # encoding) ^
+ " in\n");
+ output_string out "let dtdobj = new Pxp_dtd.dtd warner encoding in\n";
+
+ (* Set the ID: *)
+ output_string out "dtdobj # set_id ";
+ begin match dtd # id with
+ None -> ()
+ | Some(External x) ->
+ output_string out "(Pxp_types.External ";
+ write_expr_ext_id out x;
+ output_string out ");\n"
+ | Some(Derived x) ->
+ output_string out "(Pxp_types.Derived ";
+ write_expr_ext_id out x;
+ output_string out ");\n"
+ | Some Internal ->
+ output_string out "Pxp_types.Internal;\n";
+ end;
+
+ (* Set standalone declaration: *)
+ output_string out ("dtdobj # set_standalone_declaration " ^
+ string_of_bool (dtd # standalone_declaration) ^ ";\n");
+
+ (* Add notations: *)
+ List.iter
+ (fun noname ->
+ let no = dtd # notation noname in
+ output_string out ("let no = new Pxp_dtd.dtd_notation \"" ^
+ String.escaped noname ^ "\" ");
+ write_expr_ext_id out (no # ext_id);
+ output_string out " encoding in\n";
+ output_string out "dtdobj # add_notation no;\n";
+ )
+ (List.sort Pervasives.compare (dtd # notation_names));
+
+ (* Add unparsed entities: *)
+ List.iter
+ (fun enname ->
+ let en, _ = dtd # gen_entity enname in
+ if en # is_ndata then begin
+ let ext_id = en # ext_id in
+ let notation = en # notation in
+ let encoding = en # encoding in
+ output_string out ("let ndata = new Pxp_entity.ndata_entity \"" ^
+ String.escaped enname ^ "\" ");
+ write_expr_ext_id out ext_id;
+ output_string out ("\"" ^ String.escaped notation ^ "\" " ^
+ ocaml_encoding encoding ^ " in \n");
+ output_string out "dtdobj # add_gen_entity (ndata :> Pxp_entity.entity) false;\n";
+ end;
+ )
+ (List.sort Pervasives.compare (dtd # gen_entity_names));
+
+
+ (* Add elements: *)
+ List.iter
+ (fun elname ->
+ (* Create the element 'el': *)
+ let el = dtd # element elname in
+ output_string out ("let el = new Pxp_dtd.dtd_element dtdobj \"" ^
+ String.escaped elname ^ "\" in\n");
+ output_string out "let cm = ";
+ write_expr_content_model out (el # content_model);
+ output_string out " in\n";
+ output_string out "el # set_cm_and_extdecl cm false;\n";
+ (* Add attributes: *)
+ List.iter
+ (fun attname ->
+ let atttype, attdefault = el # attribute attname in
+ output_string out ("el # add_attribute \"" ^
+ String.escaped attname ^ "\" ");
+ write_expr_att_type out atttype;
+ output_string out " ";
+ write_expr_att_default out attdefault;
+ output_string out " false;\n";
+ )
+ (List.sort Pervasives.compare (el # attribute_names));
+
+ (* Allow arbitrary? *)
+ if el # arbitrary_allowed then
+ output_string out "el # allow_arbitrary;\n"
+ else
+ output_string out "el # disallow_arbitrary;\n";
+
+ (* Validate: *)
+ output_string out "el # validate;\n";
+
+ (* Add the element 'el' to 'dtdobj': *)
+ output_string out "dtdobj # add_element el;\n";
+ )
+ (List.sort Pervasives.compare (dtd # element_names));
+
+ (* Add processing instructions: *)
+ List.iter
+ (fun target ->
+ let pilist = dtd # pinstr target in
+ List.iter
+ (fun pi ->
+ output_string out "let pi = ";
+ write_expr_new_pi out pi;
+ output_string out " in\n";
+ output_string out "dtdobj # add_pinstr pi;\n";
+ )
+ pilist;
+ )
+ (List.sort Pervasives.compare (dtd # pinstr_names));
+
+ (* Set the name of the root element: *)
+ begin match dtd # root with
+ None -> ()
+ | Some rootname ->
+ output_string out ("dtdobj # set_root \"" ^
+ String.escaped rootname ^ "\";\n")
+ end;
+
+ (* Special options: *)
+ if dtd # arbitrary_allowed then
+ output_string out "dtdobj # allow_arbitrary;\n"
+ else
+ output_string out "dtdobj # disallow_arbitrary;\n";
+
+ (* Return dtdobj: *)
+ output_string out "dtdobj in\n"
+;;
+
+
+let rec write_local_subtree out n =
+ (* Outputs the term generating the subtree *)
+
+ output_string out "let nt = ";
+ write_expr_node_type out (n # node_type);
+ output_string out " in\n";
+
+ begin match n # node_type with
+ T_data ->
+ output_string out ("let t = Pxp_document.create_data_node spec dtd \"" ^
+ String.escaped (n # data) ^ "\" in\n")
+ | T_element elname ->
+ let loc, line, col = n # position in
+ output_string out
+ ("let pos = \"" ^ String.escaped loc ^ "\", " ^
+ string_of_int line ^ ", " ^
+ string_of_int col ^ " in\n");
+ output_string out
+ ("let t = Pxp_document.create_element_node ~position:pos spec dtd \"" ^
+ String.escaped elname ^ "\" [ ");
+ List.iter
+ (fun (name,value) ->
+ begin match value with
+ Value s ->
+ output_string out ("\"" ^ String.escaped name ^ "\", ");
+ output_string out ("\"" ^ String.escaped s ^ "\"; ")
+ | Valuelist sl ->
+ output_string out ("\"" ^ String.escaped name ^ "\", ");
+ output_string out ("\"" ^
+ String.escaped (String.concat " " sl) ^
+ "\"; ")
+ | Implied_value ->
+ ()
+ end
+ )
+ (n # attributes);
+ output_string out " ] in\n";
+ | T_super_root ->
+ let loc, line, col = n # position in
+ output_string out
+ ("let pos = \"" ^ String.escaped loc ^ "\", " ^
+ string_of_int line ^ ", " ^
+ string_of_int col ^ " in\n");
+ output_string out
+ ("let t = Pxp_document.create_super_root_node ~position:pos spec dtd in\n")
+ | T_pinstr piname ->
+ let loc, line, col = n # position in
+ output_string out
+ ("let pos = \"" ^ String.escaped loc ^ "\", " ^
+ string_of_int line ^ ", " ^
+ string_of_int col ^ " in\n");
+ output_string out "let pi = ";
+ write_expr_new_pi out (List.hd (n # pinstr piname));
+ output_string out " in\n";
+ output_string out
+ ("let t = Pxp_document.create_pinstr_node ~position:pos spec dtd pi in\n")
+ | T_comment ->
+ let loc, line, col = n # position in
+ output_string out
+ ("let pos = \"" ^ String.escaped loc ^ "\", " ^
+ string_of_int line ^ ", " ^
+ string_of_int col ^ " in\n");
+ output_string out "let comment = ";
+ ( match n # comment with
+ None -> assert false
+ | Some c -> output_string out ("\"" ^ String.escaped c ^ "\"")
+ );
+ output_string out " in\n";
+ output_string out
+ ("let t = Pxp_document.create_comment_node ~position:pos spec dtd comment in\n")
+ | _ ->
+ assert false
+ end;
+
+ (* Add processing instructions: *)
+ begin match n # node_type with
+ T_pinstr _ ->
+ ()
+ | _ ->
+ List.iter
+ (fun target ->
+ let pilist = n # pinstr target in
+ List.iter
+ (fun pi ->
+ output_string out "let pi = ";
+ write_expr_new_pi out pi;
+ output_string out " in\n";
+ output_string out "add_pinstr t pi;\n";
+ )
+ pilist;
+ )
+ (List.sort Pervasives.compare (n # pinstr_names));
+ end;
+
+ (* Add the sub nodes: *)
+ n # iter_nodes
+ (fun n' ->
+ output_string out "add_node t (\n";
+ write_local_subtree out n';
+ output_string out ");\n";
+ );
+
+ (* Validate: *)
+ output_string out "local_validate t;\n";
+
+ (* Return: *)
+ output_string out "t\n"
+;;
+
+
+let write_local_document out (d : 'ext document) =
+ (* Outputs "let mkdoc warner spec = ... in" *)
+
+ output_string out "let mkdoc warner spec =\n";
+ output_string out "let doc = new Pxp_document.document warner in\n";
+ output_string out ("doc # init_xml_version \"" ^
+ String.escaped (d # xml_version) ^ "\";\n");
+ write_local_dtd out (d # dtd);
+ output_string out "let dtd = mkdtd warner in\n";
+ output_string out "let root = ";
+ write_local_subtree out (d # root);
+ output_string out " in\n";
+ output_string out "doc # init_root root;\n";
+
+ (* Add processing instructions: *)
+ List.iter
+ (fun target ->
+ let pilist = d # pinstr target in
+ List.iter
+ (fun pi ->
+ output_string out "let pi = ";
+ write_expr_new_pi out pi;
+ output_string out " in\n";
+ output_string out "doc # add_pinstr pi;\n";
+ )
+ pilist;
+ )
+ (List.sort Pervasives.compare (d # pinstr_names));
+
+ (* Return the result: *)
+ output_string out "doc in\n"
+;;
+
+
+let write_helpers out =
+ output_string out "let add_node t n = (t : 'ext Pxp_document.node) # add_node (n : 'ext Pxp_document.node) in\n";
+ output_string out "let add_pinstr t pi = (t : 'ext Pxp_document.node) # add_pinstr (pi : Pxp_dtd.proc_instruction) in\n";
+ output_string out "let local_validate t = (t : 'ext Pxp_document.node) # local_validate ()in\n"
+;;
+
+
+let write_document out d =
+ output_string out "let create_document warner spec =\n";
+ write_helpers out;
+ write_local_document out d;
+ output_string out "mkdoc warner spec;;\n"
+;;
+
+
+let write_dtd out dtd =
+ output_string out "let create_dtd warner =\n";
+ write_local_dtd out dtd;
+ output_string out "mkdtd warner;;\n"
+;;
+
+
+let write_subtree out t =
+ output_string out "let create_subtree dtd spec =\n";
+ write_helpers out;
+ write_local_subtree out t;
+ output_string out "mktree dtd spec;;\n"
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.7 2000/08/30 15:48:07 gerd
+ * Minor update.
+ *
+ * Revision 1.6 2000/08/18 20:16:59 gerd
+ * Updates because of new node types T_comment, T_pinstr, T_super_root.
+ *
+ * Revision 1.5 2000/07/23 02:16:51 gerd
+ * Changed signature of local_validate.
+ *
+ * Revision 1.4 2000/07/09 17:59:35 gerd
+ * Updated: The position of element nodes is also written.
+ *
+ * Revision 1.3 2000/07/09 00:30:00 gerd
+ * Notations are written before they are used.
+ * Unparsed entities are included.
+ * Further changes.
+ *
+ * Revision 1.2 2000/07/08 22:59:14 gerd
+ * [Merging 0.2.10:] Improved: The resulting code can be compiled
+ * faster, and the compiler is less hungry on memory.
+ * Updated because of PXP interface changes.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_codewriter.ml:
+ *
+ * Revision 1.1 2000/03/11 22:57:28 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_codewriter.mli b/helm/DEVEL/pxp/pxp/pxp_codewriter.mli
new file mode 100644
index 000000000..e04bd8a98
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/pxp_codewriter.mli
@@ -0,0 +1,94 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+open Pxp_document
+open Pxp_yacc
+open Pxp_dtd
+
+val write_document : out_channel -> 'ext document -> unit
+ (* Writes O'Caml code to the out_channel that is a top-level function
+ * creating a fresh document which is equal to the passed document:
+ *
+ * "let create_document warner spec = ...;;"
+ *
+ * If you compile the code and call "create_document warner map" the
+ * function creates a document tree which is (almost) equal to the
+ * passed document.
+ *
+ * The following properties may not be equal:
+ * - Parsed entities
+ * - Whether a declaration occurs in an external entity or not
+ *
+ * 'warner': a collect_warnings object
+ * 'spec': a Pxp_document.spec
+ *)
+
+
+val write_dtd : out_channel -> dtd -> unit
+ (* Writes O'Caml code to the out_channel that is a top-level function
+ * creating a fresh DTD which is equal to the passed DTD:
+ *
+ * "let create_dtd warner = ...;;"
+ *
+ * If you compile the code and call "create_dtd warner" the
+ * function creates a DTD object which is (almost) equal to the
+ * passed object.
+ *
+ * The following properties may not be equal:
+ * - Parsed entities
+ * - Whether a declaration occurs in an external entity or not
+ *
+ * 'warner': a collect_warnings object
+ *)
+
+val write_subtree : out_channel -> 'ext node -> unit
+ (* Writes O'Caml code to the out_channel that is a top-level function
+ * creating a fresh node tree which is equal to the passed tree:
+ *
+ * "let create_subtree dtd map = ...;;"
+ *
+ * If you compile the code and call "create_subtree dtd map" the
+ * function creates a DTD object which is equal to the passed object.
+ *
+ * 'dtd': a DTD object
+ * 'map': a domspec
+ *)
+
+
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/07/09 00:30:14 gerd
+ * Updated.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_codewriter.mli:
+ *
+ * Revision 1.1 2000/03/11 22:57:28 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_dfa.ml b/helm/DEVEL/pxp/pxp/pxp_dfa.ml
new file mode 100644
index 000000000..b7baeb4d6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/pxp_dfa.ml
@@ -0,0 +1,271 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+module StringOrd = struct
+ type t = string
+ let compare = (compare : string -> string -> int)
+end;;
+
+module StringMap = Map.Make(StringOrd);;
+ (* 'a StringMap.t: the type of maps (dictionaries) from string to 'a *)
+
+module Graph = struct
+ type vertex =
+ { mutable edges_out : (string * vertex) list;
+ mutable edges_out_map : vertex StringMap.t;
+ mutable edges_in : (vertex * string) list;
+ mutable graph : graph;
+ mutable id : int;
+ }
+ and graph =
+ { mutable vertexes : vertex list;
+ mutable mid : int; (* maximum id + 1 *)
+ }
+
+ exception Edge_not_unique
+
+ let create () =
+ { vertexes = [];
+ mid = 0;
+ }
+
+ let new_vertex g =
+ let v =
+ { edges_out = [];
+ edges_out_map = StringMap.empty;
+ edges_in = [];
+ graph = g;
+ id = g.mid;
+ } in
+ g.vertexes <- v :: g.vertexes;
+ g.mid <- g.mid + 1;
+ v
+
+ let new_edge v_from e v_to =
+ if v_from.graph != v_to.graph then
+ invalid_arg "Pxp_dfa.Graph.new_edge";
+ try
+ let v = StringMap.find e v_from.edges_out_map in
+ if v != v_to then
+ raise Edge_not_unique;
+ with
+ Not_found ->
+ v_from.edges_out <- (e, v_to) :: v_from.edges_out;
+ v_from.edges_out_map <- StringMap.add e v_to v_from.edges_out_map;
+ v_to.edges_in <- (v_from, e) :: v_to.edges_in;
+ ()
+
+ let graph_of_vertex v = v.graph
+
+ let union g1 g2 =
+ List.iter
+ (fun v ->
+ v.graph <- g1;
+ v.id <- v.id + g1.mid;
+ )
+ g2.vertexes;
+ g1.vertexes <- g2.vertexes @ g1.vertexes;
+ g1.mid <- g1.mid + g2.mid;
+ g2.vertexes <- [];
+ g2.mid <- 0
+
+ let outgoing_edges v =
+ v.edges_out
+
+ let ingoing_edges v =
+ v.edges_in
+
+ let follow_edge v e =
+ StringMap.find e v.edges_out_map (* or raise Not_found *)
+end
+;;
+
+
+module VertexOrd = struct
+ type t = Graph.vertex
+ let compare v1 v2 =
+ if v1.Graph.graph != v2.Graph.graph then
+ invalid_arg "Pxp_dfa.VertexOrd.compare";
+ compare v1.Graph.id v2.Graph.id
+end
+;;
+
+module VertexSet = Set.Make(VertexOrd);;
+
+
+type dfa_definition =
+ { dfa_graph : Graph.graph;
+ dfa_start : Graph.vertex;
+ dfa_stops : VertexSet.t;
+ dfa_null : bool;
+ }
+;;
+
+(**********************************************************************)
+
+(* Now that we have all the auxiliary data types, it is time for the
+ * algorithm that transforms regexps to DFAs.
+ *)
+
+open Pxp_types
+
+let dfa_of_regexp_content_model re =
+ let rec get_dfa re =
+ match re with
+ Child e ->
+ let g = Graph.create() in
+ let v1 = Graph.new_vertex g in
+ let v2 = Graph.new_vertex g in
+ Graph.new_edge v1 e v2;
+ { dfa_graph = g;
+ dfa_start = v1;
+ dfa_stops = VertexSet.singleton v2;
+ dfa_null = false;
+ }
+
+ | Seq [] ->
+ invalid_arg "Pxp_dfa.dfa_of_regexp_content_model"
+ | Seq [re'] ->
+ get_dfa re'
+ | Seq (re1 :: seq2) ->
+ let dfa1 = get_dfa re1 in
+ let dfa2 = get_dfa (Seq seq2) in
+ (* Merge the two graphs. The result is in dfa1.dfa_graph: *)
+ Graph.union dfa1.dfa_graph dfa2.dfa_graph;
+ (* Concatenation I: Add additional edges to the graph such
+ * that if w1 matches dfa1, and w2 matches dfa2, and w2 is not
+ * empty, w1w2 will match the merged DFAs.
+ *)
+ List.iter
+ (fun (e,v') ->
+ VertexSet.iter
+ (fun v ->
+ Graph.new_edge v e v')
+ dfa1.dfa_stops
+ )
+ (Graph.outgoing_edges dfa2.dfa_start);
+ (* Concatenation II: If the emtpy string matches dfa2, the stop
+ * nodes of dfa1 remain stop nodes.
+ *)
+ let stops =
+ if dfa2.dfa_null then
+ VertexSet.union dfa1.dfa_stops dfa2.dfa_stops
+ else
+ dfa2.dfa_stops
+ in
+ (* The resulting DFA: *)
+ { dfa_graph = dfa1.dfa_graph;
+ dfa_start = dfa1.dfa_start;
+ dfa_stops = stops;
+ dfa_null = dfa1.dfa_null && dfa2.dfa_null;
+ }
+
+ | Alt [] ->
+ invalid_arg "Pxp_dfa.dfa_of_regexp_content_model"
+ | Alt [re'] ->
+ get_dfa re'
+ | Alt alt ->
+ let dfa_alt = List.map get_dfa alt in
+ (* Merge the graphs. The result is in g: *)
+ let g = (List.hd dfa_alt).dfa_graph in
+ List.iter
+ (fun dfa ->
+ Graph.union g dfa.dfa_graph
+ )
+ (List.tl dfa_alt);
+ (* Get the new start node: *)
+ let start = Graph.new_vertex g in
+ (* Add the new edges starting at 'start': *)
+ List.iter
+ (fun dfa ->
+ List.iter
+ (fun (e, v) ->
+ Graph.new_edge start e v)
+ (Graph.outgoing_edges dfa.dfa_start)
+ )
+ dfa_alt;
+ (* If one of the old start nodes was a stop node, the new start
+ * node will be a stop node, too.
+ *)
+ let null = List.exists (fun dfa -> dfa.dfa_null) dfa_alt in
+ let stops =
+ List.fold_left
+ (fun s dfa -> VertexSet.union s dfa.dfa_stops)
+ VertexSet.empty
+ dfa_alt in
+ let stops' =
+ if null then
+ VertexSet.union stops (VertexSet.singleton start)
+ else
+ stops in
+ (* The resulting DFA: *)
+ { dfa_graph = g;
+ dfa_start = start;
+ dfa_stops = stops';
+ dfa_null = null;
+ }
+
+ | Optional re' ->
+ let dfa' = get_dfa re' in
+ if dfa'.dfa_null then
+ (* simple case *)
+ dfa'
+ else begin
+ (* Optimization possible: case ingoing_edges dfa_start = [] *)
+ let start = Graph.new_vertex dfa'.dfa_graph in
+ List.iter
+ (fun (e, v) ->
+ Graph.new_edge start e v)
+ (Graph.outgoing_edges dfa'.dfa_start);
+
+ (* The resulting DFA: *)
+ { dfa_graph = dfa'.dfa_graph;
+ dfa_start = start;
+ dfa_stops = VertexSet.union dfa'.dfa_stops
+ (VertexSet.singleton start);
+ dfa_null = true;
+ }
+ end
+
+ | Repeated1 re' ->
+ let dfa' = get_dfa re' in
+ List.iter
+ (fun (e, v') ->
+ VertexSet.iter
+ (fun v ->
+ Graph.new_edge v e v')
+ dfa'.dfa_stops
+ )
+ (Graph.outgoing_edges dfa'.dfa_start);
+
+ (* The resulting DFA: *)
+ { dfa_graph = dfa'.dfa_graph;
+ dfa_start = dfa'.dfa_start;
+ dfa_stops = dfa'.dfa_stops;
+ dfa_null = dfa'.dfa_null;
+ }
+
+ | Repeated re' ->
+ get_dfa (Optional (Repeated1 re'))
+
+ in
+ try
+ get_dfa re
+ with
+ Graph.Edge_not_unique -> raise Not_found
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/07/23 02:16:08 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_dfa.mli b/helm/DEVEL/pxp/pxp/pxp_dfa.mli
new file mode 100644
index 000000000..515eacee1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/pxp_dfa.mli
@@ -0,0 +1,77 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+module Graph : sig
+ type graph
+ type vertex
+
+ (* A directed graph whose edges are marked with strings (= element types)
+ * and with the constraint that for a given vertex and a given element
+ * type the edge must be unique.
+ *)
+
+ exception Edge_not_unique
+
+ val create : unit -> graph
+ (* Creates an empty graph *)
+
+ val new_vertex : graph -> vertex
+ (* Adds a new vertex to the graph, and returns the vertex *)
+
+ val new_edge : vertex -> string -> vertex -> unit
+ (* new_edge v_from etype v_to:
+ * Adds a new edge from vertex v_from to vertex v_to, marked with
+ * etype.
+ * Raises Edge_not_unique if there is already an edge etype starting
+ * at v_from to a different vertex than v_to.
+ *)
+
+ val graph_of_vertex : vertex -> graph
+ (* Returns the graph the passed vertex is contained in. *)
+
+ val union : graph -> graph -> unit
+ (* union g1 g2:
+ * Moves the vertexes and edged found in g2 to g1.
+ * After that, g2 is empty again.
+ *)
+
+ val outgoing_edges : vertex -> (string * vertex) list
+ (* Returns the list of outgoing edges starting in the passed vertex *)
+
+ val follow_edge : vertex -> string -> vertex
+ (* Follows the edge starting in the passed vertex which is marked
+ * with the passed element type.
+ * Raises Not_found if there is no such edge.
+ *)
+
+ val ingoing_edges : vertex -> (vertex * string) list
+ (* Returns the list of ingoing edges ending in the passed vertex *)
+end
+
+module VertexSet : Set.S with type elt = Graph.vertex
+
+
+type dfa_definition =
+ { dfa_graph : Graph.graph;
+ dfa_start : Graph.vertex; (* Where the automaton starts *)
+ dfa_stops : VertexSet.t; (* Where the automaton may stop *)
+ dfa_null : bool; (* Whether dfa_start member of dfa_stops *)
+ }
+
+val dfa_of_regexp_content_model : Pxp_types.regexp_spec -> dfa_definition
+ (* Computes the DFA or raises Not_found if it does not exist *)
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/07/23 02:16:08 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_document.ml b/helm/DEVEL/pxp/pxp/pxp_document.ml
new file mode 100644
index 000000000..1f1d4cf68
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/pxp_document.ml
@@ -0,0 +1,1985 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+open Pxp_types
+open Pxp_lexer_types
+open Pxp_dtd
+open Pxp_aux
+open Pxp_dfa
+
+
+exception Skip
+
+type node_type =
+ T_element of string
+ | T_data
+ | T_super_root
+ | T_pinstr of string
+ | T_comment
+ | T_none
+ | T_attribute of string
+ | T_namespace of string
+;;
+
+
+class type ['node] extension =
+ object ('self)
+ method clone : 'self
+ method node : 'node
+ method set_node : 'node -> unit
+ end
+;;
+
+
+class type [ 'ext ] node =
+ object ('self)
+ constraint 'ext = 'ext node #extension
+ method extension : 'ext
+ method delete : unit
+ method parent : 'ext node
+ method root : 'ext node
+ method orphaned_clone : 'self
+ method orphaned_flat_clone : 'self
+ method add_node : ?force:bool -> 'ext node -> unit
+ method add_pinstr : proc_instruction -> unit
+ method pinstr : string -> proc_instruction list
+ method pinstr_names : string list
+ method node_position : int
+ method node_path : int list
+ method sub_nodes : 'ext node list
+ method iter_nodes : ('ext node -> unit) -> unit
+ method iter_nodes_sibl :
+ ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
+ method nth_node : int -> 'ext node
+ method previous_node : 'ext node
+ method next_node : 'ext node
+ method set_nodes : 'ext node list -> unit
+ method data : string
+ method node_type : node_type
+ method position : (string * int * int)
+ method attribute : string -> att_value
+ method attribute_names : string list
+ method attribute_type : string -> att_type
+ method attributes : (string * Pxp_types.att_value) list
+ method required_string_attribute : string -> string
+ method required_list_attribute : string -> string list
+ method optional_string_attribute : string -> string option
+ method optional_list_attribute : string -> string list
+ method id_attribute_name : string
+ method id_attribute_value : string
+ method idref_attribute_names : string list
+ method quick_set_attributes : (string * Pxp_types.att_value) list -> unit
+ method attributes_as_nodes : 'ext node list
+ method set_comment : string option -> unit
+ method comment : string option
+ method dtd : dtd
+ method encoding : rep_encoding
+ method create_element :
+ ?position:(string * int * int) ->
+ dtd -> node_type -> (string * string) list -> 'ext node
+ method create_data : dtd -> string -> 'ext node
+ method local_validate : ?use_dfa:bool -> unit -> unit
+ method keep_always_whitespace_mode : unit
+ method write : output_stream -> encoding -> unit
+ method write_compact_as_latin1 : output_stream -> unit
+ method internal_adopt : 'ext node option -> int -> unit
+ method internal_set_pos : int -> unit
+ method internal_delete : 'ext node -> unit
+ method internal_init : (string * int * int) ->
+ dtd -> string -> (string * string) list -> unit
+ method internal_init_other : (string * int * int) ->
+ dtd -> node_type -> unit
+ end
+;;
+
+type 'ext spec_table =
+ { mapping : (string, 'ext node) Hashtbl.t;
+ data_node : 'ext node;
+ default_element : 'ext node;
+ super_root_node : 'ext node option;
+ pinstr_mapping : (string, 'ext node) Hashtbl.t;
+ default_pinstr_node : 'ext node option;
+ comment_node : 'ext node option;
+ }
+;;
+
+type 'ext spec =
+ Spec_table of 'ext spec_table
+;;
+
+
+let make_spec_from_mapping
+ ?super_root_exemplar
+ ?comment_exemplar
+ ?default_pinstr_exemplar
+ ?pinstr_mapping
+ ~data_exemplar ~default_element_exemplar ~element_mapping () =
+ Spec_table
+ { mapping = element_mapping;
+ data_node = data_exemplar;
+ default_element = default_element_exemplar;
+ super_root_node = super_root_exemplar;
+ comment_node = comment_exemplar;
+ default_pinstr_node = default_pinstr_exemplar;
+ pinstr_mapping =
+ (match pinstr_mapping with
+ None -> Hashtbl.create 1
+ | Some m -> m
+ )
+ }
+;;
+
+
+let make_spec_from_alist
+ ?super_root_exemplar
+ ?comment_exemplar
+ ?default_pinstr_exemplar
+ ?(pinstr_alist = [])
+ ~data_exemplar ~default_element_exemplar ~element_alist () =
+ let m = List.length pinstr_alist in
+ let pinstr_mapping = Hashtbl.create m in
+ List.iter
+ (fun (name,ex) -> Hashtbl.add pinstr_mapping name ex)
+ pinstr_alist;
+ let n = List.length element_alist in
+ let element_mapping = Hashtbl.create m in
+ List.iter
+ (fun (name,ex) -> Hashtbl.add element_mapping name ex)
+ element_alist;
+ make_spec_from_mapping
+ ?super_root_exemplar: super_root_exemplar
+ ?comment_exemplar: comment_exemplar
+ ?default_pinstr_exemplar: default_pinstr_exemplar
+ ~pinstr_mapping: pinstr_mapping
+ ~data_exemplar: data_exemplar
+ ~default_element_exemplar: default_element_exemplar
+ ~element_mapping: element_mapping
+ ()
+;;
+
+(**********************************************************************)
+
+exception Found;;
+
+let validate_content ?(use_dfa=None) model (el : 'a node) =
+ (* checks that the nodes of 'el' matches the DTD. Returns 'true'
+ * on success and 'false' on failure.
+ *)
+
+ let rec is_empty cl =
+ (* Whether the node list counts as empty or not. *)
+ match cl with
+ [] -> true
+ | n :: cl' ->
+ ( match n # node_type with
+ | T_element _ -> false
+ | _ -> is_empty cl' (* ignore other nodes *)
+ )
+ in
+
+ let rec run_regexp cl ml =
+ (* Validates regexp content models ml against instances cl. This
+ * function works for deterministic and non-determninistic models.
+ * The implementation uses backtracking and may sometimes be slow.
+ *
+ * cl: the list of children that will have to be matched
+ * ml: the list of regexps that will have to match (to be read as
+ * sequence)
+ * returns () meaning that no match has been found, or raises Found.
+ *)
+ match ml with
+ [] ->
+ if cl = [] then raise Found; (* Frequent case *)
+ if is_empty cl then raise Found; (* General condition *)
+ | Seq seq :: ml' ->
+ assert (seq <> []); (* necessary to ensure termination *)
+ run_regexp cl (seq @ ml')
+ | Alt alts :: ml' ->
+ let rec find alts =
+ match alts with
+ [] -> ()
+ | alt :: alts' ->
+ run_regexp cl (alt :: ml');
+ find alts'
+ in
+ assert (alts <> []); (* Alt [] matches nothing *)
+ find alts
+ | Repeated re :: ml' ->
+ let rec norm re = (* to avoid infinite loops *)
+ match re with
+ Repeated subre -> norm subre (* necessary *)
+ | Optional subre -> norm subre (* necessary *)
+ | Repeated1 subre -> norm subre (* an optimization *)
+ | _ -> re
+ in
+ let re' = norm re in
+ run_regexp cl (re' :: Repeated re' :: ml');
+ run_regexp cl ml'
+ | Repeated1 re :: ml' ->
+ run_regexp cl (re :: Repeated re :: ml')
+ | Optional re :: ml' ->
+ run_regexp cl (re :: ml');
+ run_regexp cl ml';
+ | Child chld :: ml' ->
+ match cl with
+ [] ->
+ ()
+ | sub_el :: cl' ->
+ begin match sub_el # node_type with
+ T_data -> (* Ignore data *)
+ run_regexp cl' ml
+ (* Note: It can happen that we find a data node here
+ * if the 'keep_always_whitespace' mode is turned on.
+ *)
+ | T_element nt ->
+ if nt = chld then run_regexp cl' ml'
+ | _ -> (* Ignore this element *)
+ run_regexp cl' ml
+ end
+ in
+
+ let run_dfa cl dfa =
+ (* Validates regexp content models ml against instances cl. This
+ * function works ONLY for deterministic models.
+ * The implementation executes the automaton.
+ *)
+ let current_vertex = ref dfa.dfa_start in
+ let rec next_step cl =
+ match cl with
+ el :: cl' ->
+ begin match el # node_type with
+ T_data -> (* Ignore data *)
+ next_step cl'
+ (* Note: It can happen that we find a data node here
+ * if the 'keep_always_whitespace' mode is turned on.
+ *)
+ | T_element nt ->
+ begin try
+ current_vertex := Graph.follow_edge !current_vertex nt;
+ next_step cl'
+ with
+ Not_found -> false
+ end
+ | _ -> (* Ignore this node *)
+ next_step cl'
+ end
+ | [] ->
+ VertexSet.mem !current_vertex dfa.dfa_stops
+ in
+ next_step cl
+ in
+
+ match model with
+ Unspecified -> true
+ | Any -> true
+ | Empty ->
+ let cl = el # sub_nodes in
+ is_empty cl
+ | Mixed (MPCDATA :: mix) ->
+ let mix' = List.map (function
+ MPCDATA -> assert false
+ | MChild x -> x)
+ mix in
+ begin try
+ el # iter_nodes
+ (fun sub_el ->
+ let nt = sub_el # node_type in
+ match nt with
+ | T_element name ->
+ if not (List.mem name mix') then raise Not_found;
+ | _ -> ()
+ );
+ true
+ with
+ Not_found ->
+ false
+ end
+ | Regexp re ->
+ let cl = el # sub_nodes in
+ begin match use_dfa with
+ None ->
+ (* General backtracking implementation: *)
+ begin try
+ run_regexp cl [re];
+ false
+ with
+ Found -> true
+ end
+ | Some dfa ->
+ run_dfa cl dfa
+ end
+
+ | _ -> assert false
+;;
+
+(**********************************************************************)
+
+
+class virtual ['ext] node_impl an_ext =
+ object (self)
+ constraint 'ext = 'ext node #extension
+
+ val mutable parent = (None : 'ext node option)
+ val mutable node_position = -1
+ val mutable dtd = (None : dtd option)
+ val mutable extension = an_ext
+
+ initializer
+ extension # set_node (self : 'ext #node :> 'ext node)
+
+
+ method extension = (extension : 'ext)
+
+ method delete =
+ match parent with
+ None -> ()
+ | Some p -> p # internal_delete (self : 'ext #node :> 'ext node)
+
+ method parent =
+ match parent with
+ None -> raise Not_found
+ | Some p -> p
+
+ method root =
+ match parent with
+ None -> (self : 'ext #node :> 'ext node)
+ | Some p -> p # root
+
+ method node_position =
+ if node_position >= 0 then node_position else
+ raise Not_found
+
+ method node_path =
+ let rec collect n path =
+ try
+ let p = n # node_position in
+ collect (n # parent) (p :: path)
+ with
+ Not_found ->
+ (* n is the root *)
+ path
+ in
+ collect (self : 'ext #node :> 'ext node) []
+
+ method previous_node =
+ self # parent # nth_node (self # node_position - 1)
+
+ method next_node =
+ self # parent # nth_node (self # node_position + 1)
+
+ method orphaned_clone =
+ let x = extension # clone in
+ let n =
+ {< parent = None;
+ node_position = -1;
+ extension = x;
+ >} in
+ x # set_node (n : 'ext #node :> 'ext node);
+ n
+
+ method orphaned_flat_clone =
+ let x = extension # clone in
+ let n =
+ {< parent = None;
+ node_position = -1;
+ extension = x;
+ >} in
+ x # set_node (n : 'ext #node :> 'ext node);
+ n
+
+ method dtd =
+ match dtd with
+ None -> failwith "Pxp_document.node_impl#dtd: No DTD available"
+ | Some d -> d
+
+ method encoding =
+ match dtd with
+ None -> failwith "Pxp_document.node_impl#encoding: No DTD available"
+ | Some d -> d # encoding
+
+ method internal_adopt (new_parent : 'ext node option) pos =
+ begin match parent with
+ None -> ()
+ | Some p ->
+ if new_parent <> None then
+ failwith "Pxp_document.node_impl#internal_adopt: Tried to add a bound element"
+ end;
+ parent <- new_parent;
+ node_position <- pos
+
+ method internal_set_pos pos =
+ node_position <- pos
+
+ method virtual add_node : ?force:bool -> 'ext node -> unit
+ method virtual add_pinstr : proc_instruction -> unit
+ method virtual sub_nodes : 'ext node list
+ method virtual pinstr : string -> proc_instruction list
+ method virtual pinstr_names : string list
+ method virtual iter_nodes : ('ext node -> unit) -> unit
+ method virtual iter_nodes_sibl : ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
+ method virtual nth_node : int -> 'ext node
+ method virtual set_nodes : 'ext node list -> unit
+ method virtual data : string
+ method virtual node_type : node_type
+ method virtual position : (string * int * int)
+ method virtual attribute : string -> att_value
+ method virtual attribute_names : string list
+ method virtual attribute_type : string -> att_type
+ method virtual attributes : (string * Pxp_types.att_value) list
+ method virtual required_string_attribute : string -> string
+ method virtual required_list_attribute : string -> string list
+ method virtual optional_string_attribute : string -> string option
+ method virtual optional_list_attribute : string -> string list
+ method virtual quick_set_attributes : (string * Pxp_types.att_value) list -> unit
+ method virtual attributes_as_nodes : 'ext node list
+ method virtual set_comment : string option -> unit
+ method virtual comment : string option
+ method virtual create_element :
+ ?position:(string * int * int) ->
+ dtd -> node_type -> (string * string) list -> 'ext node
+ method virtual create_data : dtd -> string -> 'ext node
+ method virtual keep_always_whitespace_mode : unit
+ method virtual write : output_stream -> encoding -> unit
+ method virtual write_compact_as_latin1 : output_stream -> unit
+ method virtual local_validate : ?use_dfa:bool -> unit -> unit
+ method virtual internal_delete : 'ext node -> unit
+ method virtual internal_init : (string * int * int) ->
+ dtd -> string -> (string * string) list -> unit
+ method virtual internal_init_other : (string * int * int) ->
+ dtd -> node_type -> unit
+ end
+;;
+
+
+(**********************************************************************)
+
+let no_position = ("?", 0, 0) ;;
+
+
+class ['ext] data_impl an_ext : ['ext] node =
+ object (self)
+ inherit ['ext] node_impl an_ext
+ val mutable content = ("" : string)
+
+ method position = no_position
+
+ method add_node ?(force=false) _ =
+ failwith "method 'add_node' not applicable to data node"
+ method add_pinstr _ =
+ failwith "method 'add_pinstr' not applicable to data node"
+ method pinstr _ = []
+ method pinstr_names = []
+ method sub_nodes = []
+ method iter_nodes _ = ()
+ method iter_nodes_sibl _ = ()
+ method nth_node _ = raise Not_found
+ method set_nodes _ =
+ failwith "method 'set_nodes' not applicable to data node"
+ method data = content
+ method node_type = T_data
+ method attribute _ = raise Not_found
+ method attribute_names = []
+ method attribute_type _ = raise Not_found
+ method attributes = []
+ method required_string_attribute _ =
+ failwith "Markup.document, method required_string_attribute: not found"
+ method required_list_attribute _ =
+ failwith "Markup.document, method required_list_attribute: not found"
+ method optional_string_attribute _ = None
+ method optional_list_attribute _ = []
+ method id_attribute_name = raise Not_found
+ method id_attribute_value = raise Not_found
+ method idref_attribute_names = []
+ method quick_set_attributes _ =
+ failwith "method 'quick_set_attributes' not applicable to data node"
+ method attributes_as_nodes = []
+ method comment = None
+ method set_comment c =
+ match c with
+ None -> ()
+ | Some _ -> failwith "method 'set_comment' not applicable to data node"
+ method create_element ?position _ _ _ =
+ failwith "method 'create_element' not applicable to data node"
+ method create_data new_dtd new_str =
+ let x = extension # clone in
+ let n =
+ ( {< parent = None;
+ extension = x;
+ dtd = Some new_dtd;
+ content = new_str;
+ >}
+ : 'ext #node :> 'ext node) in
+ x # set_node n;
+ n
+ method local_validate ?use_dfa () = ()
+ method keep_always_whitespace_mode = ()
+
+
+ method write os enc =
+ let encoding = self # encoding in
+ write_data_string ~from_enc:encoding ~to_enc:enc os content
+
+
+ method write_compact_as_latin1 os =
+ self # write os `Enc_iso88591
+
+ method internal_delete _ =
+ assert false
+ method internal_init _ _ _ _ =
+ assert false
+ method internal_init_other _ _ _ =
+ assert false
+ end
+;;
+
+
+(**********************************************************************)
+
+class ['ext] attribute_impl ~element ~name value dtd =
+ (object (self)
+ val mutable parent = (None : 'ext node option)
+ val mutable dtd = dtd
+ val mutable element_name = element
+ val mutable att_name = name
+ val mutable att_value = value
+
+ method parent =
+ match parent with
+ None -> raise Not_found
+ | Some p -> p
+
+ method root =
+ match parent with
+ None -> (self : 'ext #node :> 'ext node)
+ | Some p -> p # root
+
+ method internal_adopt new_parent _ =
+ parent <- new_parent
+
+ method orphaned_clone =
+ {< parent = None >}
+
+ method orphaned_flat_clone =
+ {< parent = None >}
+
+ method dtd = dtd
+
+ method encoding = dtd # encoding
+
+ method node_type = T_attribute att_name
+
+ method attribute n =
+ if n = att_name then att_value else raise Not_found
+
+ method attribute_names = [ att_name ]
+
+ method attribute_type n =
+ let eltype = dtd # element element_name in
+ ( try
+ let atype, adefault = eltype # attribute n in
+ atype
+ with
+ Undeclared ->
+ A_cdata
+ )
+
+ method attributes = [ att_name, att_value ]
+
+ method required_string_attribute n =
+ if n = att_name then
+ match att_value with
+ Value s -> s
+ | Valuelist l -> String.concat " " l
+ | Implied_value -> raise Not_found
+ else
+ failwith "Pxp_document.attribute_impl#required_string_attribute: not found"
+
+
+ method required_list_attribute n =
+ if n = att_name then
+ match att_value with
+ Value s -> [ s ]
+ | Valuelist l -> l
+ | Implied_value -> raise Not_found
+ else
+ failwith "Pxp_document.attribute_impl#required_list_attribute: not found"
+
+ method optional_string_attribute n =
+ if n = att_name then
+ match att_value with
+ Value s -> Some s
+ | Valuelist l -> Some(String.concat " " l)
+ | Implied_value -> None
+ else
+ None
+
+ method optional_list_attribute n =
+ if n = att_name then
+ match att_value with
+ Value s -> [ s ]
+ | Valuelist l -> l
+ | Implied_value -> []
+ else
+ []
+
+ (* Senseless methods: *)
+
+ method sub_nodes = []
+ method pinstr _ = []
+ method pinstr_names = []
+ method iter_nodes _ = ()
+ method iter_nodes_sibl _ = ()
+ method nth_node _ = raise Not_found
+ method data = ""
+ method position = ("?",0,0)
+ method comment = None
+ method local_validate ?use_dfa () = ()
+
+ (* Non-applicable methods: *)
+
+ method extension =
+ failwith "Pxp_document.attribute_impl#extension: not applicable"
+ method delete =
+ failwith "Pxp_document.attribute_impl#delete: not applicable"
+ method node_position =
+ failwith "Pxp_document.attribute_impl#node_position: not applicable"
+ method node_path =
+ failwith "Pxp_document.attribute_impl#node_path: not applicable"
+ method previous_node =
+ failwith "Pxp_document.attribute_impl#previous_node: not applicable"
+ method next_node =
+ failwith "Pxp_document.attribute_impl#next_node: not applicable"
+ method internal_set_pos _ =
+ failwith "Pxp_document.attribute_impl#internal_set_pos: not applicable"
+ method internal_delete _ =
+ failwith "Pxp_document.attribute_impl#internal_delete: not applicable"
+ method internal_init _ _ _ _ =
+ failwith "Pxp_document.attribute_impl#internal_init: not applicable"
+ method internal_init_other _ _ _ =
+ failwith "Pxp_document.attribute_impl#internal_init_other: not applicable"
+ method add_node ?force _ =
+ failwith "Pxp_document.attribute_impl#add_node: not applicable"
+ method add_pinstr _ =
+ failwith "Pxp_document.attribute_impl#add_pinstr: not applicable"
+ method set_nodes _ =
+ failwith "Pxp_document.attribute_impl#set_nodes: not applicable"
+ method quick_set_attributes _ =
+ failwith "Pxp_document.attribute_impl#quick_set_attributes: not applicable"
+ method attributes_as_nodes =
+ failwith "Pxp_document.attribute_impl#dattributes_as_nodes: not applicable"
+ method set_comment c =
+ if c <> None then
+ failwith "Pxp_document.attribute_impl#set_comment: not applicable"
+ method create_element ?position _ _ _ =
+ failwith "Pxp_document.attribute_impl#create_element: not applicable"
+ method create_data _ _ =
+ failwith "Pxp_document.attribute_impl#create_data: not applicable"
+ method keep_always_whitespace_mode =
+ failwith "Pxp_document.attribute_impl#keep_always_whitespace_mode: not applicable"
+ method write _ _ =
+ failwith "Pxp_document.attribute_impl#write: not applicable"
+ method write_compact_as_latin1 _ =
+ failwith "Pxp_document.attribute_impl#write_compact_as_latin1: not applicable"
+ method id_attribute_name =
+ failwith "Pxp_document.attribute_impl#id_attribute_name: not applicable"
+ method id_attribute_value =
+ failwith "Pxp_document.attribute_impl#id_attribute_value: not applicable"
+ method idref_attribute_names =
+ failwith "Pxp_document.attribute_impl#idref_attribute_names: not applicable"
+ end
+ : ['ext] node)
+;;
+
+(**********************************************************************)
+
+class ['ext] element_impl an_ext : ['ext] node =
+ object (self:'self)
+ inherit ['ext] node_impl an_ext as super
+
+ val mutable content_model = Any
+ val mutable content_dfa = lazy None
+ val mutable ext_decl = false
+ val mutable ntype = T_none
+ val mutable id_att_name = None
+ val mutable idref_att_names = []
+ val mutable rev_nodes = ([] : 'c list)
+ val mutable nodes = (None : 'c list option)
+ val mutable array = (None : 'c array option)
+ val mutable size = 0
+ val mutable attributes = []
+ val mutable att_nodes = []
+ val mutable comment = None
+ val pinstr = lazy (Hashtbl.create 10 : (string,proc_instruction) Hashtbl.t)
+ val mutable keep_always_whitespace = false
+
+ val mutable position = no_position
+
+ method comment = comment
+
+ method set_comment c =
+ if ntype = T_comment then
+ comment <- c
+ else
+ failwith "set_comment: not applicable to node types other than T_comment"
+
+ method attributes = attributes
+
+ method position = position
+
+ method private error_name =
+ match ntype with
+ T_element n -> "Element `" ^ n ^ "'"
+ | T_super_root -> "Super root"
+ | T_pinstr n -> "Wrapper element for processing instruction `" ^ n ^
+ "'"
+ | T_comment -> "Wrapper element for comment"
+ | T_none -> "NO element"
+ | T_attribute _ -> assert false
+ | T_namespace _ -> assert false
+ | T_data -> assert false
+
+ method add_node ?(force = false) n =
+ let only_whitespace s =
+ (* Checks that the string "s" contains only whitespace. On failure,
+ * Validation_error is raised.
+ *)
+ let l = String.length s in
+ if l < 100 then begin
+ for i=0 to l - 1 do (* for loop is faster for small 'l' *)
+ match s.[i] with
+ ('\009'|'\010'|'\013'|'\032') -> ()
+ | _ ->
+ raise(Validation_error(self # error_name ^
+ " must not have character contents"));
+ done
+ end
+ else begin
+ let lexbuf = Lexing.from_string s in
+ let lexerset = Pxp_lexers.get_lexer_set (self # dtd # encoding) in
+ let t = lexerset.scan_name_string lexbuf in
+ if t <> Ignore or
+ (lexerset.scan_name_string lexbuf <> Eof)
+ then
+ raise(Validation_error(self # error_name ^
+ " must not have character contents"));
+ ()
+ end
+ in
+ (* general DTD check: *)
+ begin match dtd with
+ None -> ()
+ | Some d -> if n # dtd != d then
+ failwith "Pxp_document.element_impl # add_node: the sub node has a different DTD";
+ end;
+ (* specific checks: *)
+ try
+ begin match n # node_type with
+ T_data ->
+ begin match content_model with
+ Any -> ()
+ | Unspecified -> ()
+ | Empty ->
+ if not force then begin
+ if n # data <> "" then
+ raise(Validation_error(self # error_name ^
+ " must be empty"));
+ raise Skip
+ end
+ | Mixed _ -> ()
+ | Regexp _ ->
+ if not force then begin
+ only_whitespace (n # data);
+ (* TODO: following check faster *)
+ if n # dtd # standalone_declaration &&
+ n # data <> ""
+ then begin
+ (* The standalone declaration is violated if the
+ * element declaration is contained in an external
+ * entity.
+ *)
+ if ext_decl then
+ raise
+ (Validation_error
+ (self # error_name ^
+ " violates standalone declaration" ^
+ " because extra white space separates" ^
+ " the sub elements"));
+ end;
+ if not keep_always_whitespace then raise Skip
+ end
+ end
+ | _ ->
+ ()
+ end;
+ (* all OK, so add this node: *)
+ n # internal_adopt (Some (self : 'ext #node :> 'ext node)) size;
+ rev_nodes <- n :: rev_nodes;
+ nodes <- None;
+ array <- None;
+ size <- size + 1
+ with Skip ->
+ ()
+
+ method add_pinstr pi =
+ begin match dtd with
+ None -> ()
+ | Some d ->
+ if pi # encoding <> d # encoding then
+ failwith "Pxp_document.element_impl # add_pinstr: Inconsistent encodings";
+ end;
+ let name = pi # target in
+ Hashtbl.add (Lazy.force pinstr) name pi
+
+ method pinstr name =
+ Hashtbl.find_all (Lazy.force pinstr) name
+
+ method pinstr_names =
+ let l = ref [] in
+ Hashtbl.iter
+ (fun n _ -> l := n :: !l)
+ (Lazy.force pinstr);
+ !l
+
+ method sub_nodes =
+ match nodes with
+ None ->
+ let cl = List.rev rev_nodes in
+ nodes <- Some cl;
+ cl
+ | Some cl ->
+ cl
+
+ method iter_nodes f =
+ let cl = self # sub_nodes in
+ List.iter f cl
+
+ method iter_nodes_sibl f =
+ let cl = self # sub_nodes in
+ let rec next last_node l =
+ match l with
+ [] -> ()
+ | [x] ->
+ f last_node x None
+ | x :: y :: l' ->
+ f last_node x (Some y);
+ next (Some x) l'
+ in
+ next None cl
+
+ method nth_node p =
+ if p < 0 or p >= size then raise Not_found;
+ if array = None then
+ array <- Some (Array.of_list (self # sub_nodes));
+ match array with
+ None -> assert false
+ | Some a ->
+ a.(p)
+
+ method set_nodes nl =
+ let old_size = size in
+ List.iter
+ (fun n -> n # internal_adopt None (-1))
+ rev_nodes;
+ begin try
+ size <- 0;
+ List.iter
+ (fun n -> n # internal_adopt
+ (Some (self : 'ext #node :> 'ext node))
+ size;
+ size <- size + 1)
+ nl
+ with
+ e ->
+ (* revert action as much as possible *)
+ List.iter
+ (fun n -> n # internal_adopt None (-1))
+ rev_nodes;
+ size <- old_size;
+ let pos = ref (size-1) in
+ List.iter
+ (fun n -> n # internal_adopt
+ (Some (self : 'ext #node :> 'ext node))
+ !pos;
+ decr pos
+ )
+ rev_nodes;
+ (* [TODO] Note: there may be bad members in nl *)
+ raise e
+ end;
+ rev_nodes <- List.rev nl;
+ array <- None;
+ nodes <- None
+
+
+ method orphaned_clone : 'self =
+ let sub_clones =
+ List.map
+ (fun m ->
+ m # orphaned_clone)
+ rev_nodes
+ in
+
+ let x = extension # clone in
+ let n =
+ {< parent = None;
+ node_position = -1;
+ extension = x;
+ rev_nodes = sub_clones;
+ nodes = None;
+ array = None;
+ >} in
+
+ let pos = ref (size - 1) in
+ List.iter
+ (fun m -> m # internal_adopt
+ (Some (n : 'ext #node :> 'ext node))
+ !pos;
+ decr pos
+ )
+ sub_clones;
+
+ x # set_node (n : 'ext #node :> 'ext node);
+ n
+
+ method orphaned_flat_clone : 'self =
+ let x = extension # clone in
+ let n =
+ {< parent = None;
+ node_position = -1;
+ extension = x;
+ rev_nodes = [];
+ nodes = None;
+ size = 0;
+ array = None;
+ >} in
+
+ x # set_node (n : 'ext #node :> 'ext node);
+ n
+
+
+ method internal_delete n =
+ rev_nodes <- List.filter (fun n' -> n' != n) rev_nodes;
+ size <- size - 1;
+ let p = ref (size-1) in
+ List.iter
+ (fun n' -> n' # internal_set_pos !p; decr p)
+ rev_nodes;
+ nodes <- None;
+ n # internal_adopt None (-1);
+
+
+ method data =
+ let cl = self # sub_nodes in
+ String.concat "" (List.map (fun n -> n # data) cl)
+
+ method node_type = ntype
+
+
+ method attribute n =
+ List.assoc n attributes
+
+ method attribute_names =
+ List.map fst attributes
+
+ method attribute_type n =
+ match ntype with
+ T_element name ->
+ let d =
+ match dtd with
+ None -> assert false
+ | Some d -> d in
+ let eltype = d # element name in
+ ( try
+ let atype, adefault = eltype # attribute n in
+ atype
+ with
+ Undeclared ->
+ A_cdata
+ )
+ | _ ->
+ failwith "attribute_type: not available for non-element nodes"
+
+
+ method required_string_attribute n =
+ try
+ match List.assoc n attributes with
+ Value s -> s
+ | Valuelist l -> String.concat " " l
+ | Implied_value -> raise Not_found
+ with
+ Not_found ->
+ failwith "Pxp_document, method required_string_attribute: not found"
+
+ method optional_string_attribute n =
+ try
+ match List.assoc n attributes with
+ Value s -> Some s
+ | Valuelist l -> Some (String.concat " " l)
+ | Implied_value -> None
+ with
+ Not_found ->
+ None
+
+ method required_list_attribute n =
+ try
+ match List.assoc n attributes with
+ Value s -> [ s ]
+ | Valuelist l -> l
+ | Implied_value -> raise Not_found
+ with
+ Not_found ->
+ failwith "Markup.document, method required_list_attribute: not found"
+
+ method optional_list_attribute n =
+ try
+ match List.assoc n attributes with
+ Value s -> [ s ]
+ | Valuelist l -> l
+ | Implied_value -> []
+ with
+ Not_found ->
+ []
+
+ method id_attribute_name =
+ match id_att_name with
+ None -> raise Not_found
+ | Some name -> name
+
+ method id_attribute_value =
+ match id_att_name with
+ None -> raise Not_found
+ | Some name ->
+ begin match List.assoc name attributes (* may raise Not_found *)
+ with
+ Value s -> s
+ | _ -> raise Not_found
+ end
+
+
+ method idref_attribute_names = idref_att_names
+
+
+ method quick_set_attributes atts =
+ match ntype with
+ T_element _ ->
+ attributes <- atts;
+ att_nodes <- []
+ | _ ->
+ failwith "quick_set_attributes: not applicable for non-element node"
+
+
+ method attributes_as_nodes =
+ match att_nodes with
+ [] when attributes = [] ->
+ []
+ | [] ->
+ let dtd = self # dtd in
+ let element_name =
+ match ntype with
+ T_element n -> n
+ | _ ->
+ assert false in
+ let l =
+ List.map
+ (fun (n,v) ->
+ new attribute_impl
+ ~element:element_name
+ ~name:n
+ v
+ dtd)
+ attributes in
+ att_nodes <- l;
+ l
+ | _ ->
+ att_nodes
+
+
+ method create_element
+ ?(position = no_position) new_dtd new_type new_attlist =
+ let x = extension # clone in
+ let obj = ( {< parent = None;
+ extension = x;
+ pinstr = lazy (Hashtbl.create 10)
+ >}
+ : 'ext #node :> 'ext node
+ ) in
+ x # set_node obj;
+ match new_type with
+ T_data ->
+ failwith "create_element: Cannot create T_data node"
+ | T_element name ->
+ obj # internal_init position new_dtd name new_attlist;
+ obj
+ | (T_comment | T_pinstr _ | T_super_root | T_none) ->
+ obj # internal_init_other position new_dtd new_type;
+ obj
+ | _ ->
+ failwith "create_element: Cannot create such node"
+
+
+ method internal_init_other new_pos new_dtd new_ntype =
+ (* resets the contents of the object *)
+ parent <- None;
+ rev_nodes <- [];
+ nodes <- None;
+ ntype <- new_ntype;
+ position <- new_pos;
+ content_model <- Any;
+ content_dfa <- lazy None;
+ attributes <- [];
+ att_nodes <- [];
+ dtd <- Some new_dtd;
+ ext_decl <- false;
+ id_att_name <- None;
+ idref_att_names <- [];
+ comment <- None;
+
+
+ method internal_init new_pos new_dtd new_name new_attlist =
+ (* ONLY FOR T_Element NODES!!! *)
+ (* resets the contents of the object *)
+ parent <- None;
+ rev_nodes <- [];
+ nodes <- None;
+ ntype <- T_element new_name;
+ position <- new_pos;
+ comment <- None;
+ att_nodes <- [];
+
+ let lexerset = Pxp_lexers.get_lexer_set (new_dtd # encoding) in
+ let sadecl = new_dtd # standalone_declaration in
+
+ (* First validate the element name and the attributes: *)
+ (* Well-Formedness Constraint: Unique Att Spec *)
+ let rec check_uniqueness al =
+ match al with
+ [] -> ()
+ | (n, av) :: al' ->
+ if List.mem_assoc n al' then
+ raise (WF_error("Attribute `" ^ n ^ "' occurs twice in element `" ^ new_name ^ "'"));
+ check_uniqueness al'
+ in
+ check_uniqueness new_attlist;
+ (* Validity Constraint: Element Valid [element has been declared] *)
+ try
+ let eltype = new_dtd # element new_name in
+ content_model <- eltype # content_model;
+ content_dfa <- lazy(eltype # content_dfa);
+ ext_decl <- eltype # externally_declared;
+ id_att_name <- eltype # id_attribute_name;
+ idref_att_names <- eltype # idref_attribute_names;
+ (* Validity Constraint: Attribute Value Type *)
+ (* Validity Constraint: Fixed Attribute Default *)
+ (* Validity Constraint: Standalone Document Declaration (partly) *)
+ let undeclared_attlist = ref [] in
+ let new_attlist' =
+ List.map
+ (fun (n,v) ->
+ try
+ (* Get type, default, and the normalized attribute
+ * value 'av':
+ *)
+ let atype, adefault = eltype # attribute n in
+ let av = value_of_attribute lexerset new_dtd n atype v in
+ (* If necessary, check whether normalization violates
+ * the standalone declaration.
+ *)
+ if sadecl &&
+ eltype #
+ attribute_violates_standalone_declaration n (Some v)
+ then
+ raise
+ (Validation_error
+ ("Attribute `" ^ n ^ "' of element type `" ^
+ new_name ^ "' violates standalone declaration"));
+ (* If the default is "fixed", check that. *)
+ begin match adefault with
+ (D_required | D_implied) -> ()
+ | D_default _ -> ()
+ | D_fixed u ->
+ let uv = value_of_attribute
+ lexerset new_dtd "[default]" atype u in
+ if av <> uv then
+ raise
+ (Validation_error
+ ("Attribute `" ^ n ^
+ "' is fixed, but has here a different value"));
+ end;
+ n,av
+ with
+ Undeclared ->
+ (* raised by method "# attribute" *)
+ undeclared_attlist :=
+ (n, value_of_attribute lexerset new_dtd n A_cdata v) ::
+ !undeclared_attlist;
+ n, Implied_value (* does not matter *)
+ )
+ new_attlist in
+ (* Validity Constraint: Required Attribute *)
+ (* Validity Constraint: Standalone Document Declaration (partly) *)
+ (* Add attributes with default values *)
+ let new_attlist'' =
+ List.map
+ (fun n ->
+ try
+ n, List.assoc n new_attlist'
+ with
+ Not_found ->
+ (* Check standalone declaration: *)
+ if sadecl &&
+ eltype #
+ attribute_violates_standalone_declaration
+ n None then
+ raise
+ (Validation_error
+ ("Attribute `" ^ n ^ "' of element type `" ^
+ new_name ^ "' violates standalone declaration"));
+ (* add default value or Implied *)
+ let atype, adefault = eltype # attribute n in
+ match adefault with
+ D_required ->
+ raise(Validation_error("Required attribute `" ^ n ^ "' is missing"))
+ | D_implied ->
+ n, Implied_value
+ | D_default v ->
+ n, value_of_attribute lexerset new_dtd n atype v
+ | D_fixed v ->
+ n, value_of_attribute lexerset new_dtd n atype v
+ )
+ (eltype # attribute_names)
+ in
+ dtd <- Some new_dtd;
+ attributes <- new_attlist'' @ !undeclared_attlist;
+ with
+ Undeclared ->
+ (* The DTD allows arbitrary attributes/contents for this
+ * element
+ *)
+ dtd <- Some new_dtd;
+ attributes <- List.map (fun (n,v) -> n, Value v) new_attlist;
+ content_model <- Any;
+ content_dfa <- lazy None;
+
+ method local_validate ?(use_dfa=false) () =
+ (* validates that the content of this element matches the model *)
+ let dfa = if use_dfa then Lazy.force content_dfa else None in
+ if not (validate_content
+ ~use_dfa:dfa
+ content_model
+ (self : 'ext #node :> 'ext node)) then
+ raise(Validation_error(self # error_name ^
+ " does not match its content model"))
+
+
+ method create_data _ _ =
+ failwith "method 'create_data' not applicable to element node"
+
+ method keep_always_whitespace_mode =
+ keep_always_whitespace <- true
+
+ method write os enc =
+ let encoding = self # encoding in
+ let wms =
+ write_markup_string ~from_enc:encoding ~to_enc:enc os in
+
+ begin match ntype with
+ T_element name ->
+ wms ("<" ^ name);
+ List.iter
+ (fun (aname, avalue) ->
+ match avalue with
+ Implied_value -> ()
+ | Value v ->
+ wms ("\n" ^ aname ^ "=\"");
+ write_data_string ~from_enc:encoding ~to_enc:enc os v;
+ wms "\"";
+ | Valuelist l ->
+ let v = String.concat " " l in
+ wms ("\n" ^ aname ^ "=\"");
+ write_data_string ~from_enc:encoding ~to_enc:enc os v;
+ wms "\"";
+ )
+ attributes;
+ wms "\n>";
+ | _ ->
+ ()
+ end;
+
+ Hashtbl.iter
+ (fun n pi ->
+ pi # write os enc
+ )
+ (Lazy.force pinstr);
+ List.iter
+ (fun n -> n # write os enc)
+ (self # sub_nodes);
+
+ begin match ntype with
+ T_element name ->
+ wms ("" ^ name ^ "\n>");
+ | _ ->
+ ()
+ end
+
+ (* TODO: How to write comments? The comment string may contain
+ * illegal characters or "--".
+ *)
+
+
+ method write_compact_as_latin1 os =
+ self # write os `Enc_iso88591
+
+ end
+;;
+
+
+let spec_table_find_exemplar tab eltype =
+ try
+ Hashtbl.find tab.mapping eltype
+ with
+ Not_found -> tab.default_element
+;;
+
+
+let create_data_node spec dtd str =
+ match spec with
+ Spec_table tab ->
+ let exemplar = tab.data_node in
+ exemplar # create_data dtd str
+;;
+
+
+let create_element_node ?position spec dtd eltype atts =
+ match spec with
+ Spec_table tab ->
+ let exemplar = spec_table_find_exemplar tab eltype in
+ exemplar # create_element ?position:position dtd (T_element eltype) atts
+;;
+
+
+let create_super_root_node ?position spec dtd =
+ match spec with
+ Spec_table tab ->
+ ( match tab.super_root_node with
+ None ->
+ failwith "Pxp_document.create_super_root_node: No exemplar"
+ | Some x ->
+ x # create_element ?position:position dtd T_super_root []
+ )
+;;
+
+let create_no_node ?position spec dtd =
+ match spec with
+ Spec_table tab ->
+ let x = tab.default_element in
+ x # create_element ?position:position dtd T_none []
+;;
+
+
+let create_comment_node ?position spec dtd text =
+ match spec with
+ Spec_table tab ->
+ ( match tab.comment_node with
+ None ->
+ failwith "Pxp_document.create_comment_node: No exemplar"
+ | Some x ->
+ let e = x # create_element ?position:position dtd T_comment []
+ in
+ e # set_comment (Some text);
+ e
+ )
+;;
+
+
+let create_pinstr_node ?position spec dtd pi =
+ let target = pi # target in
+ let exemplar =
+ match spec with
+ Spec_table tab ->
+ ( try
+ Hashtbl.find tab.pinstr_mapping target
+ with
+ Not_found ->
+ ( match tab.default_pinstr_node with
+ None ->
+ failwith
+ "Pxp_document.create_pinstr_node: No exemplar"
+ | Some x -> x
+ )
+ )
+ in
+ let el =
+ exemplar # create_element ?position:position dtd (T_pinstr target) [] in
+ el # add_pinstr pi;
+ el
+;;
+
+
+let find ?(deeply=false) f base =
+ let rec search_flat children =
+ match children with
+ [] -> raise Not_found
+ | n :: children' ->
+ if f n then n else search_flat children'
+ in
+ let rec search_deep children =
+ match children with
+ [] -> raise Not_found
+ | n :: children' ->
+ if f n then
+ n
+ else
+ try search_deep (n # sub_nodes)
+ with Not_found -> search_deep children'
+ in
+ (if deeply then search_deep else search_flat)
+ (base # sub_nodes)
+;;
+
+
+let find_all ?(deeply=false) f base =
+ let rec search_flat children =
+ match children with
+ [] -> []
+ | n :: children' ->
+ if f n then n :: search_flat children' else search_flat children'
+ in
+ let rec search_deep children =
+ match children with
+ [] -> []
+ | n :: children' ->
+ let rest =
+ search_deep (n # sub_nodes) @ search_deep children' in
+ if f n then
+ n :: rest
+ else
+ rest
+ in
+ (if deeply then search_deep else search_flat)
+ (base # sub_nodes)
+;;
+
+
+let find_element ?deeply eltype base =
+ find
+ ?deeply:deeply
+ (fun n ->
+ match n # node_type with
+ T_element name -> name = eltype
+ | _ -> false)
+ base
+;;
+
+
+let find_all_elements ?deeply eltype base =
+ find_all
+ ?deeply:deeply
+ (fun n ->
+ match n # node_type with
+ T_element name -> name = eltype
+ | _ -> false)
+ base
+;;
+
+
+exception Skip;;
+
+let map_tree ~pre ?(post=(fun x -> x)) base =
+ let rec map_rec n =
+ (try
+ let n' = pre n in
+ if n' # node_type <> T_data then begin
+ let children = n # sub_nodes in
+ let children' = map_children children in
+ n' # set_nodes children';
+ end;
+ post n'
+ with
+ Skip -> raise Not_found
+ )
+ and map_children l =
+ match l with
+ [] -> []
+ | child :: l' ->
+ (try
+ let child' = map_rec child in
+ child' :: map_children l'
+ with
+ Not_found ->
+ map_children l'
+ )
+ in
+ map_rec base
+;;
+
+
+let map_tree_sibl ~pre ?(post=(fun _ x _ -> x)) base =
+ let rec map_rec l n r =
+ (try
+ let n' = pre l n r in
+ if n' # node_type <> T_data then begin
+ let children = n # sub_nodes in
+ let children' = map_children None children in
+ let children'' = postprocess_children None children' in
+ n' # set_nodes children'';
+ end;
+ n'
+ with
+ Skip -> raise Not_found
+ )
+ and map_children predecessor l =
+ (match l with
+ [] -> []
+ | child :: l' ->
+ let successor =
+ match l' with
+ [] -> None
+ | x :: _ -> Some x in
+ (try
+ let child' = map_rec predecessor child successor in
+ child' :: map_children (Some child) l'
+ with
+ Not_found ->
+ map_children (Some child) l'
+ )
+ )
+ and postprocess_children predecessor l =
+ (match l with
+ [] -> []
+ | child :: l' ->
+ let successor =
+ match l' with
+ [] -> None
+ | x :: _ -> Some x in
+ (try
+ let child' = post predecessor child successor in
+ child' :: postprocess_children (Some child) l'
+ with
+ Skip ->
+ postprocess_children (Some child) l'
+ )
+ )
+ in
+ let base' = map_rec None base None in
+ try post None base' None with Skip -> raise Not_found
+;;
+
+
+let iter_tree ?(pre=(fun x -> ())) ?(post=(fun x -> ())) base =
+ let rec iter_rec n =
+ (try
+ pre n;
+ let children = n # sub_nodes in
+ iter_children children;
+ post n
+ with
+ Skip -> raise Not_found
+ )
+ and iter_children l =
+ match l with
+ [] -> []
+ | child :: l' ->
+ (try
+ iter_rec child;
+ iter_children l'
+ with
+ Not_found ->
+ iter_children l'
+ )
+ in
+ iter_rec base
+;;
+
+
+let iter_tree_sibl ?(pre=(fun _ _ _ -> ())) ?(post=(fun _ _ _ -> ())) base =
+ let rec iter_rec l n r =
+ (try
+ pre l n r;
+ let children = n # sub_nodes in
+ iter_children None children;
+ post l n r
+ with
+ Skip -> raise Not_found
+ )
+ and iter_children predecessor l =
+ (match l with
+ [] -> []
+ | child :: l' ->
+ let successor =
+ match l' with
+ [] -> None
+ | x :: _ -> Some x in
+ (try
+ iter_rec predecessor child successor;
+ iter_children (Some child) l'
+ with
+ Not_found ->
+ iter_children (Some child) l'
+ )
+ )
+ in
+ iter_rec None base None
+;;
+
+
+let compare a b =
+ let rec cmp p1 p2 =
+ match p1, p2 with
+ [], [] -> 0
+ | [], _ -> -1
+ | _, [] -> 1
+ | x::p1', y::p2' -> if x = y then cmp p1' p2' else x - y
+ in
+
+ let a_path = a # node_path in
+ let b_path = b # node_path in
+
+ cmp a_path b_path
+;;
+
+
+type 'ext ord_index = ('ext node, int) Hashtbl.t;;
+
+let create_ord_index base =
+ let n = ref 0 in
+ iter_tree ~pre:(fun _ -> incr n) base;
+ let idx = Hashtbl.create !n in
+ let k = ref 0 in
+ iter_tree ~pre:(fun node -> Hashtbl.add idx node !k; incr k) base;
+ idx
+;;
+
+
+let ord_number idx node =
+ Hashtbl.find idx node
+;;
+
+let ord_compare idx a b =
+ let ord_a = Hashtbl.find idx a in
+ let ord_b = Hashtbl.find idx b in
+ ord_a - ord_b
+;;
+
+class ['ext] document the_warner =
+ object (self)
+ val mutable xml_version = "1.0"
+ val mutable dtd = (None : dtd option)
+ val mutable root = (None : 'ext node option)
+
+ val pinstr = lazy (Hashtbl.create 10 : (string,proc_instruction) Hashtbl.t)
+ val warner = (the_warner : collect_warnings)
+
+ method init_xml_version s =
+ if s <> "1.0" then
+ warner # warn ("XML version '" ^ s ^ "' not supported");
+ xml_version <- s
+
+ method init_root r =
+ let dtd_r = r # dtd in
+ match r # node_type with
+
+ (**************** CASE: We have a super root element ***************)
+
+ | T_super_root ->
+ if not (dtd_r # arbitrary_allowed) then begin
+ match dtd_r # root with
+ Some declared_root_element_name ->
+ let real_root_element =
+ try
+ List.find
+ (fun r' ->
+ match r' # node_type with
+ | T_element _ -> true
+ | _ -> false)
+ (r # sub_nodes)
+ with
+ Not_found ->
+ failwith "Pxp_document.document#init_root: Super root does not contain root element"
+ (* TODO: Check also that there is at most one
+ * element in the super root node
+ *)
+
+ in
+ let real_root_element_name =
+ match real_root_element # node_type with
+ T_element name -> name
+ | _ -> assert false
+ in
+ if real_root_element_name <> declared_root_element_name then
+ raise
+ (Validation_error ("The root element is `" ^
+ real_root_element_name ^
+ "' but is declared as `" ^
+ declared_root_element_name))
+ | None -> ()
+ end;
+ (* All is okay, so store dtd and root node: *)
+ dtd <- Some dtd_r;
+ root <- Some r
+
+ (**************** CASE: No super root element **********************)
+
+ | T_element root_element_name ->
+ if not (dtd_r # arbitrary_allowed) then begin
+ match dtd_r # root with
+ Some declared_root_element_name ->
+ if root_element_name <> declared_root_element_name then
+ raise
+ (Validation_error ("The root element is `" ^
+ root_element_name ^
+ "' but is declared as `" ^
+ declared_root_element_name))
+ | None ->
+ (* This may happen if you initialize your DTD yourself.
+ * The value 'None' means that the method 'set_root' was
+ * never called for the DTD; we interpret it here as:
+ * The root element does not matter.
+ *)
+ ()
+ end;
+ (* All is okay, so store dtd and root node: *)
+ dtd <- Some dtd_r;
+ root <- Some r
+
+ | _ ->
+ failwith "Pxp_document.document#init_root: the root node must be an element or super-root"
+
+ method xml_version = xml_version
+
+ method xml_standalone =
+ match dtd with
+ None -> false
+ | Some d -> d # standalone_declaration
+
+ method dtd =
+ match dtd with
+ None -> failwith "Pxp_document.document#dtd: Document has no DTD"
+ | Some d -> d
+
+ method encoding =
+ match dtd with
+ None -> failwith "Pxp_document.document#encoding: Document has no DTD"
+ | Some d -> d # encoding
+
+ method root =
+ match root with
+ None -> failwith "Pxp_document.document#root: Document has no root element"
+ | Some r -> r
+
+ method add_pinstr pi =
+ begin match dtd with
+ None -> ()
+ | Some d ->
+ if pi # encoding <> d # encoding then
+ failwith "Pxp_document.document # add_pinstr: Inconsistent encodings";
+ end;
+ let name = pi # target in
+ Hashtbl.add (Lazy.force pinstr) name pi
+
+ method pinstr name =
+ Hashtbl.find_all (Lazy.force pinstr) name
+
+ method pinstr_names =
+ let l = ref [] in
+ Hashtbl.iter
+ (fun n _ -> l := n :: !l)
+ (Lazy.force pinstr);
+ !l
+
+ method write os enc =
+ let encoding = self # encoding in
+ let wms =
+ write_markup_string ~from_enc:encoding ~to_enc:enc os in
+
+ let r = self # root in
+ wms ("\n");
+ ( match self # dtd # root with
+ None ->
+ self # dtd # write os enc false
+ | Some _ ->
+ self # dtd # write os enc true
+ );
+ Hashtbl.iter
+ (fun n pi ->
+ pi # write os enc
+ )
+ (Lazy.force pinstr);
+ r # write os enc;
+ wms "\n";
+
+ method write_compact_as_latin1 os =
+ self # write os `Enc_iso88591
+
+ end
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.14 2000/08/30 15:47:52 gerd
+ * Implementation of pxp_document.mli rev 1.10.
+ *
+ * Revision 1.13 2000/08/26 23:29:10 gerd
+ * Implementations for the changed in rev 1.9 of pxp_document.mli.
+ *
+ * Revision 1.12 2000/08/18 20:14:00 gerd
+ * New node_types: T_super_root, T_pinstr, T_comment, (T_attribute),
+ * (T_none), (T_namespace).
+ *
+ * Revision 1.11 2000/08/14 22:24:55 gerd
+ * Moved the module Pxp_encoding to the netstring package under
+ * the new name Netconversion.
+ *
+ * Revision 1.10 2000/07/23 02:16:34 gerd
+ * Support for DFAs.
+ *
+ * Revision 1.9 2000/07/16 19:37:09 gerd
+ * Simplification.
+ *
+ * Revision 1.8 2000/07/16 17:50:01 gerd
+ * Fixes in 'write'
+ *
+ * Revision 1.7 2000/07/16 16:34:41 gerd
+ * New method 'write', the successor of 'write_compact_as_latin1'.
+ *
+ * Revision 1.6 2000/07/14 13:56:11 gerd
+ * Added methods id_attribute_name, id_attribute_value,
+ * idref_attribute_names.
+ *
+ * Revision 1.5 2000/07/09 17:51:14 gerd
+ * Element nodes can store positions.
+ *
+ * Revision 1.4 2000/07/08 23:04:06 gerd
+ * [Merging 0.2.10:] Bugfix: allow_undeclared_attribute
+ *
+ * Revision 1.3 2000/07/04 22:10:06 gerd
+ * Implemented rev 1.3 of pxp_document.mli in a straight-
+ * forward fashion.
+ *
+ * Revision 1.2 2000/06/14 22:19:06 gerd
+ * Added checks such that it is impossible to mix encodings.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_document.ml:
+ *
+ * Revision 1.19 2000/05/27 19:14:42 gerd
+ * value_of_attribute: this function has been moved to
+ * markup_aux.ml.
+ *
+ * Added the following checks whether there is a violation
+ * against the standalone declaration:
+ * - Externally declared elements with regexp content model
+ * must not contain extra white space
+ * - The effect of normalization of externally declared attributes
+ * must not depend on the type of the attributes
+ * - Declared default values of externally declared attributes
+ * must not have an effect on the value of the attributes.
+ *
+ * Removed the method init_xml_standalone. It is now stored in
+ * the DTD whether there is a standalone declaration.
+ *
+ * Revision 1.18 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.17 2000/05/06 23:12:20 gerd
+ * Allow undeclared attributes.
+ *
+ * Revision 1.16 2000/05/01 20:42:28 gerd
+ * New method write_compact_as_latin1.
+ *
+ * Revision 1.15 2000/04/30 18:15:22 gerd
+ * In function validate_content: Special handling of the pseudo
+ * nodes "-pi" and "-vr".
+ * Method init_root, class document: Recognizes whether the
+ * root is virtual or real. The check on the root element name is different
+ * in each case.
+ * New method keep_always_whitespace_mode: Turns a special mode
+ * on in which ignorable whitespace is included into the document.
+ *
+ * Revision 1.14 2000/03/11 22:58:15 gerd
+ * Updated to support Markup_codewriter.
+ *
+ * Revision 1.13 2000/01/27 21:51:56 gerd
+ * Added method 'attributes'.
+ *
+ * Revision 1.12 2000/01/27 21:19:34 gerd
+ * Added methods.
+ * Bugfix: 'orphaned_clone' performs now really a clone.
+ *
+ * Revision 1.11 2000/01/20 21:57:58 gerd
+ * Bugfix: method set_nodes does no longer add the new subnodes
+ * in the reverse order.
+ *
+ * Revision 1.10 1999/12/17 21:35:37 gerd
+ * Bugfix: If the name of the root element is not specified in
+ * the DTD, the document does not check whether the root element is a
+ * specific element.
+ *
+ * Revision 1.9 1999/11/09 22:22:01 gerd
+ * The "document" classes now checks that the root element is the
+ * same as the declared root element. Thanks to Claudio Sacerdoti Coen
+ * for his bug report.
+ *
+ * Revision 1.8 1999/09/01 22:51:40 gerd
+ * Added methods to store processing instructions.
+ *
+ * Revision 1.7 1999/09/01 16:19:18 gerd
+ * Added some warnings.
+ * If an element type has the content model EMPTY, it is now strictly
+ * checked that the element instance is really empty. Especially, white space
+ * is NOT allowed in such instances.
+ *
+ * Revision 1.6 1999/08/19 21:58:59 gerd
+ * Added method "reset_finder". This is not very convincing, but
+ * currently the simplest way to update the ID hash table.
+ *
+ * Revision 1.5 1999/08/19 01:08:15 gerd
+ * Added method "find" that searches node by ID in the whole
+ * tree.
+ * Bugfix: After the extension has been cloned, the "set_node" method
+ * is invoked telling the clone to which node it is associated.
+ *
+ * Revision 1.4 1999/08/15 13:52:52 gerd
+ * Bugfix: WF_error "Attribute x occurs twice in element [unnamed]"
+ * no longer possible; instead of "[unnamed]" the actual name is printed.
+ * Improved some of the error messages.
+ *
+ * Revision 1.3 1999/08/15 02:19:01 gerd
+ * If the DTD allows arbitrary elements, unknown elements are not
+ * rejected.
+ *
+ * Revision 1.2 1999/08/11 14:54:23 gerd
+ * Optimizations: The hashtable for the 'pinstr' variable is only
+ * created on demand. -- The 'only_whitespace' function uses a simple "for"
+ * loop is the string is small and a lexer if the string is big.
+ *
+ * Revision 1.1 1999/08/10 00:35:50 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_document.mli b/helm/DEVEL/pxp/pxp/pxp_document.mli
new file mode 100644
index 000000000..67c6e4d62
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/pxp_document.mli
@@ -0,0 +1,838 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+(**********************************************************************)
+(* *)
+(* Pxp_document: *)
+(* Object model of the document/element instances *)
+(* *)
+(**********************************************************************)
+
+
+(* ======================================================================
+ * OVERVIEW
+ *
+ * class type node ............. The common class type of the nodes of
+ * the element tree. Nodes are either
+ * elements (inner nodes) or data nodes
+ * (leaves)
+ * class type extension ........ The minimal properties of the so-called
+ * extensions of the nodes: Nodes can be
+ * customized by applying a class parameter
+ * that adds methods/values to nodes.
+ * class data_impl : node ...... Implements data nodes.
+ * class element_impl : node ... Implements element nodes
+ * class document .............. A document is an element with some additional
+ * properties
+ *
+ * ======================================================================
+ *
+ * THE STRUCTURE OF NODE TREES:
+ *
+ * Every node except the root node has a parent node. The parent node is
+ * always an element, because data nodes never contain other nodes.
+ * In the other direction, element nodes may have children; both elements
+ * and data nodes are possible as children.
+ * Every node knows its parent (if any) and all its children (if any);
+ * the linkage is maintained in both directions. A node without a parent
+ * is called a root.
+ * It is not possible that a node is the child of two nodes (two different nodes
+ * or a multiple child of the same node).
+ * You can break the connection between a node and its parent; the method
+ * "delete" performs this operations and deletes the node from the parent's
+ * list of children. The node is now a root, for itself and for all
+ * subordinate nodes. In this context, the node is also called an orphan,
+ * because it has lost its parent (this is a bit misleading because the
+ * parent is not always the creator of a node).
+ * In order to simplify complex operations, you can also set the list of
+ * children of an element. Nodes that have been children before are unchanged;
+ * new nodes are added (and the linkage is set up), nodes no more occurring
+ * in the list are handled if they have been deleted.
+ * If you try to add a node that is not a root (either by an "add" or by a
+ * "set" operation) the operation fails.
+ *
+ * CREATION OF NODES
+ *
+ * The class interface supports creation of nodes by cloning a so-called
+ * exemplar. The idea is that it is sometimes useful to implement different
+ * element types by different classes, and to implement this by looking up
+ * exemplars.
+ * Imagine you have three element types A, B, and C, and three classes
+ * a, b, and c implementing the node interface (for example, by providing
+ * different extensions, see below). The XML parser can be configured to
+ * have a lookup table
+ * { A --> a0, B --> b0, C --> c0 }
+ * where a0, b0, c0 are exemplars of the classes a, b, and c, i.e. empty
+ * objects belonging to these classes. If the parser finds an instance of
+ * A, it looks up the exemplar a0 of A and clones it (actually, the method
+ * "create_element" performs this for elements, and "create_data" for data
+ * nodes). Clones belong to the same class as the original nodes, so the
+ * instances of the elements have the same classes as the configured
+ * exemplars.
+ * Note: This technique assumes that the interface of all exemplars is the
+ * same!
+ *
+ * THE EXTENSION
+ *
+ * The class type node and all its implementations have a class parameter
+ * 'ext which must at least fulfil the properties of the class type "extension".
+ * The idea is that you can add properties, for example:
+ *
+ * class my_extension =
+ * object
+ * (* minimal properties required by class type "extension": *)
+ * method clone = ...
+ * method node = ...
+ * method set_node n = ...
+ * (* here my own methods: *)
+ * method do_this_and_that ...
+ * end
+ *
+ * class my_element_impl = [ my_extension ] element_impl
+ * class my_data_impl = [ my_extension ] data_impl
+ *
+ * The whole XML parser is parameterized with 'ext, so your extension is
+ * visible everywhere (this is the reason why extensibility is solved by
+ * parametric polymorphism and not by inclusive polymorphism (subtyping)).
+ *
+ *
+ * SOME COMPLICATED TYPE EXPRESSIONS
+ *
+ * Sometimes the following type expressions turn out to be necessary:
+ *
+ * 'a node extension as 'a
+ * This is the type of an extension that belongs to a node that
+ * has an extension that is the same as we started with.
+ *
+ * 'a extension node as 'a
+ * This is the type of a node that has an extension that belongs to a
+ * node of the type we started with.
+ *
+ *
+ * DOCUMENTS
+ * ...
+ *
+ * ======================================================================
+ *
+ * SIMPLE USAGE: ...
+ *)
+
+
+open Pxp_dtd
+
+
+type node_type =
+ (* The basic and most important node types:
+ * - T_element element_type is the type of element nodes
+ * - T_data is the type of text data nodes
+ * By design of the parser, neither CDATA sections nor entity references
+ * are represented in the node tree; so there are no types for them.
+ *)
+ T_element of string
+ | T_data
+
+ (* The following types are extensions to my original design. They have mainly
+ * been added to simplify the implementation of standards (such as
+ * XPath) that require that nodes of these types are included into the
+ * main document tree.
+ * There are options (see Pxp_yacc) forcing the parser to insert such
+ * nodes; in this case, the nodes are actually element nodes serving
+ * as wrappers for the additional data structures. The options are:
+ * enable_super_root_node, enable_pinstr_nodes, enable_comment_nodes.
+ * By default, such nodes are not created.
+ *)
+ | T_super_root
+ | T_pinstr of string (* The string is the target of the PI *)
+ | T_comment
+
+ (* The following types are fully virtual. This means that it is impossible
+ * to make the parser insert such nodes. However, these types might be
+ * practical when defining views on the tree.
+ * Note that the list of virtual node types will be extended if necessary.
+ *)
+ | T_none
+ | T_attribute of string (* The string is the name of the attribute *)
+ | T_namespace of string (* The string is the namespace prefix *)
+;;
+
+
+class type [ 'node ] extension =
+ object ('self)
+ method clone : 'self
+ (* "clone" should return an exact deep copy of the object. *)
+ method node : 'node
+ (* "node" returns the corresponding node of this extension. This method
+ * intended to return exactly what previously has been set by "set_node".
+ *)
+ method set_node : 'node -> unit
+ (* "set_node" is invoked once the extension is associated to a new
+ * node object.
+ *)
+ end
+;;
+
+
+class type [ 'ext ] node =
+ object ('self)
+ constraint 'ext = 'ext node #extension
+
+ method extension : 'ext
+ (* Return the extension of this node: *)
+
+ method delete : unit
+ (* Delete this node from the parent's list of sub nodes. This node gets
+ * orphaned.
+ * 'delete' does nothing if this node does not have a parent.
+ *)
+
+ method parent : 'ext node
+ (* Get the parent, or raise Not_found if this node is an orphan. *)
+
+ method root : 'ext node
+ (* Get the direct or indirect parent that does not have a parent itself,
+ * i.e. the root of the tree.
+ *)
+
+ method orphaned_clone : 'self
+ (* return an exact clone of this element and all sub nodes (deep copy)
+ * except string values which are shared by this node and the clone.
+ * The other exception is that the clone has no parent (i.e. it is now
+ * a root).
+ *)
+
+ method orphaned_flat_clone : 'self
+ (* return a clone of this element where all subnodes are omitted.
+ * The type of the node, and the attributes are the same as in the
+ * original node.
+ * The clone has no parent.
+ *)
+
+ method add_node : ?force:bool -> 'ext node -> unit
+ (* Append new sub nodes -- mainly used by the parser itself, but
+ * of course open for everybody. If an element is added, it must be
+ * an orphan (i.e. does not have a parent node); and after addition
+ * *this* node is the new parent.
+ * The method performs some basic validation checks if the current node
+ * has a regular expression as content model, or is EMPTY. You can
+ * turn these checks off by passing ~force:true to the method.
+ *)
+
+ method add_pinstr : proc_instruction -> unit
+ (* Add a processing instruction to the set of processing instructions of
+ * this node. Usually only elements contain processing instructions.
+ *)
+
+ method pinstr : string -> proc_instruction list
+ (* Get all processing instructions with the passed name *)
+
+ method pinstr_names : string list
+ (* Get a list of all names of processing instructions *)
+
+ method node_position : int
+ (* Returns the position of this node among all children of the parent
+ * node. Positions are counted from 0.
+ * Raises Not_found if the node is the root node.
+ *)
+
+ method node_path : int list
+ (* Returns the list of node positions of the ancestors of this node,
+ * including this node. The first list element is the node position
+ * of this child of the root, and the last list element is the
+ * node position of this node.
+ * Returns [] if the node is the root node.
+ *)
+
+ method sub_nodes : 'ext node list
+ (* Get the list of sub nodes *)
+
+ method iter_nodes : ('ext node -> unit) -> unit
+ (* iterate over the sub nodes *)
+
+ method iter_nodes_sibl :
+ ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
+ (* Here every iteration step can also access to the previous and to the
+ * following node if present.
+ *)
+
+ method nth_node : int -> 'ext node
+ (* Returns the n-th sub node of this node, n >= 0. Raises Not_found
+ * if the index is out of the valid range.
+ * Note that the first invocation of this method requires additional
+ * overhead.
+ *)
+
+ method previous_node : 'ext node
+ method next_node : 'ext node
+ (* Return the previous and next nodes, respectively. These methods are
+ * equivalent to
+ * - parent # nth_node (self # node_position - 1) and
+ * - parent # nth_node (self # node_position + 1), respectively.
+ *)
+
+ method set_nodes : 'ext node list -> unit
+ (* Set the list of sub nodes. Elements that are no longer sub nodes gets
+ * orphaned, and all new elements that previously were not sub nodes
+ * must have been orphaned.
+ *)
+
+ method data : string
+ (* Get the data string of this node. For data nodes, this string is just
+ * the content. For elements, this string is the concatenation of all
+ * subordinate data nodes.
+ *)
+
+ method node_type : node_type
+ (* Get the name of the element type. *)
+
+ method position : (string * int * int)
+ (* Return the name of the entity, the line number, and the column
+ * position (byte offset) of the beginning of the element.
+ * Only available if the element has been created with position
+ * information.
+ * Returns "?",0,0 if not available. (Note: Line number 0 is not
+ * possible otherwise.)
+ *)
+
+ method attribute : string -> Pxp_types.att_value
+ method attribute_names : string list
+ method attribute_type : string -> Pxp_types.att_type
+ method attributes : (string * Pxp_types.att_value) list
+ (* Get a specific attribute; get the names of all attributes; get the
+ * type of a specific attribute; get names and values of all attributes.
+ * Only elements have attributes.
+ * Note: If the DTD allows arbitrary for this element, "attribute_type"
+ * raises Undeclared.
+ *)
+
+ method required_string_attribute : string -> string
+ method required_list_attribute : string -> string list
+ (* Return the attribute or fail if the attribute is not present:
+ * The first version passes the value always as string back;
+ * the second version always as list.
+ *)
+
+ method optional_string_attribute : string -> string option
+ method optional_list_attribute : string -> string list
+ (* Return some attribute value or return None if the attribute is not
+ * present:
+ * The first version passes the value always as string back;
+ * the second version always as list.
+ *)
+
+ method id_attribute_name : string
+ method id_attribute_value : string
+ (* Return the name and value of the ID attribute. The methods may
+ * raise Not_found if there is no ID attribute in the DTD, or no
+ * ID attribute in the element, respectively.
+ *)
+
+ method idref_attribute_names : string list
+ (* Returns the list of attribute names of IDREF or IDREFS type. *)
+
+ method quick_set_attributes : (string * Pxp_types.att_value) list -> unit
+ (* Sets the attributes but does not check whether they match the DTD.
+ *)
+
+ method attributes_as_nodes : 'ext node list
+ (* Experimental feature: Return the attributes as node list. Every node
+ * has type T_attribute n, and contains only the single attribute n.
+ * This node list is computed on demand, so the first invocation of this
+ * method will create the list, and following invocations will only
+ * return the existing list.
+ *)
+
+ method set_comment : string option -> unit
+ (* Sets the comment string; only applicable for T_comment nodes *)
+
+ method comment : string option
+ (* Get the comment string.
+ * Returns always None for nodes with a type other than T_comment.
+ *)
+
+ method dtd : dtd
+ (* Get the DTD. Fails if no DTD is specified (which is impossible if
+ * 'create_element' or 'create_data' have been used to create this
+ * object)
+ *)
+
+ method encoding : Pxp_types.rep_encoding
+ (* Get the encoding which is always the same as the encoding of the
+ * DTD. See also method 'dtd' (Note: This method fails, too, if
+ * no DTD is present.)
+ *)
+
+ method create_element :
+ ?position:(string * int * int) ->
+ dtd -> node_type -> (string * string) list -> 'ext node
+ (* create an "empty copy" of this element:
+ * - new DTD
+ * - new node type (which must not be T_data)
+ * - new attribute list
+ * - empty list of nodes
+ *)
+
+ method create_data : dtd -> string -> 'ext node
+ (* create an "empty copy" of this data node: *)
+
+ method local_validate :
+ ?use_dfa:bool ->
+ unit -> unit
+ (* Check that this element conforms to the DTD.
+ * Option ~use_dfa: If true, the deterministic finite automaton of
+ * regexp content models is used for validation, if available.
+ * Defaults to false.
+ *)
+
+ method keep_always_whitespace_mode : unit
+ (* Normally, add_node does not accept data nodes when the DTD does not
+ * allow data nodes or only whitespace ("ignorable whitespace").
+ * Once you have invoked this method, ignorable whitespace is forced
+ * to be included into the document.
+ *)
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+ (* Write the contents of this node and the subtrees to the passed
+ * output stream; the passed encoding is used. The format
+ * is compact (the opposite of "pretty printing").
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+
+
+ (* ---------------------------------------- *)
+ (* The methods 'find' and 'reset_finder' are no longer supported.
+ * The functionality is provided by the configurable index object
+ * (see Pxp_yacc).
+ *)
+
+
+ (* ---------------------------------------- *)
+ (* internal methods: *)
+ method internal_adopt : 'ext node option -> int -> unit
+ method internal_set_pos : int -> unit
+ method internal_delete : 'ext node -> unit
+ method internal_init : (string * int * int) ->
+ dtd -> string -> (string * string) list -> unit
+ method internal_init_other : (string * int * int) ->
+ dtd -> node_type -> unit
+ end
+;;
+
+
+class [ 'ext ] data_impl : 'ext -> [ 'ext ] node
+ (* Creation:
+ * new data_impl an_extension
+ * creates a new data node with the given extension and the empty string
+ * as content.
+ *)
+;;
+
+
+class [ 'ext ] element_impl : 'ext -> [ 'ext ] node
+ (* Creation:
+ * new element_impl an_extension
+ * creates a new empty element node with the given extension.
+ *)
+;;
+
+
+(* Attribute and namespace nodes are experimental: *)
+
+class [ 'ext ] attribute_impl :
+ element:string -> name:string -> Pxp_types.att_value -> dtd -> [ 'ext ] node
+
+ (* Creation:
+ * new attribute_impl element_name attribute_name attribute_value dtd
+ * Note that attribute nodes do intentionally not have extensions.
+ *)
+
+(* Once namespaces get implemented:
+class [ 'ext ] namespace_impl :
+ prefix:string -> name:string -> dtd -> [ 'ext ] node
+*)
+
+(********************************** spec *********************************)
+
+type 'ext spec
+constraint 'ext = 'ext node #extension
+ (* Contains the exemplars used for the creation of new nodes
+ *)
+
+
+val make_spec_from_mapping :
+ ?super_root_exemplar : 'ext node ->
+ ?comment_exemplar : 'ext node ->
+ ?default_pinstr_exemplar : 'ext node ->
+ ?pinstr_mapping : (string, 'ext node) Hashtbl.t ->
+ data_exemplar: 'ext node ->
+ default_element_exemplar: 'ext node ->
+ element_mapping: (string, 'ext node) Hashtbl.t ->
+ unit ->
+ 'ext spec
+ (* Specifies:
+ * - For new data nodes, the ~data_exemplar must be used
+ * - For new element nodes: If the element type is mentioned in the
+ * ~element_mapping hash table, the exemplar found in this table is
+ * used. Otherwise, the ~default_element_exemplar is used.
+ * Optionally:
+ * - You may also specify exemplars for super root nodes, for comments
+ * and for processing instructions
+ *)
+
+val make_spec_from_alist :
+ ?super_root_exemplar : 'ext node ->
+ ?comment_exemplar : 'ext node ->
+ ?default_pinstr_exemplar : 'ext node ->
+ ?pinstr_alist : (string * 'ext node) list ->
+ data_exemplar: 'ext node ->
+ default_element_exemplar: 'ext node ->
+ element_alist: (string * 'ext node) list ->
+ unit ->
+ 'ext spec
+ (* This is a convenience function: You can pass the mappings from
+ * elements and PIs to exemplar by associative lists.
+ *)
+
+val create_data_node :
+ 'ext spec -> dtd -> string -> 'ext node
+val create_element_node :
+ ?position:(string * int * int) ->
+ 'ext spec -> dtd -> string -> (string * string) list -> 'ext node
+val create_super_root_node :
+ ?position:(string * int * int) ->
+ 'ext spec -> dtd -> 'ext node
+val create_comment_node :
+ ?position:(string * int * int) ->
+ 'ext spec -> dtd -> string -> 'ext node
+val create_pinstr_node :
+ ?position:(string * int * int) ->
+ 'ext spec -> dtd -> proc_instruction -> 'ext node
+ (* These functions use the exemplars contained in a spec and create fresh
+ * node objects from them.
+ *)
+
+val create_no_node :
+ ?position:(string * int * int) -> 'ext spec -> dtd -> 'ext node
+ (* Creates a T_none node with limited functionality *)
+
+(*********************** Ordering of nodes ******************************)
+
+val compare : 'ext node -> 'ext node -> int
+ (* Returns -1 if the first node is before the second node, or +1 if the
+ * first node is after the second node, or 0 if both nodes are identical.
+ * If the nodes are unrelated (do not have a common ancestor), the result
+ * is undefined.
+ * This test is rather slow.
+ *)
+
+type 'ext ord_index
+constraint 'ext = 'ext node #extension
+ (* The type of ordinal indexes *)
+
+val create_ord_index : 'ext node -> 'ext ord_index
+ (* Creates an ordinal index for the subtree starting at the passed node.
+ * This index assigns to every node an ordinal number (beginning with 0) such
+ * that nodes are numbered upon the order of the first character in the XML
+ * representation (document order).
+ * Note that the index is not automatically updated when the tree is
+ * modified.
+ *)
+
+val ord_number : 'ext ord_index -> 'ext node -> int
+ (* Returns the ordinal number of the node, or raises Not_found *)
+
+val ord_compare : 'ext ord_index -> 'ext node -> 'ext node -> int
+ (* Compares two nodes like 'compare':
+ * Returns -1 if the first node is before the second node, or +1 if the
+ * first node is after the second node, or 0 if both nodes are identical.
+ * If one of the nodes does not occur in the ordinal index, Not_found
+ * is raised.
+ * This test is much faster than 'compare'.
+ *)
+
+
+(***************************** Iterators ********************************)
+
+val find : ?deeply:bool ->
+ f:('ext node -> bool) -> 'ext node -> 'ext node
+ (* Searches the first node for which the predicate f is true, and returns
+ * it. Raises Not_found if there is no such node.
+ * By default, ~deeply=false. In this case, only the children of the
+ * passed node are searched.
+ * If passing ~deeply=true, the children are searched recursively
+ * (depth-first search).
+ *)
+
+val find_all : ?deeply:bool ->
+ f:('ext node -> bool) -> 'ext node -> 'ext node list
+ (* Searches all nodes for which the predicate f is true, and returns them.
+ * By default, ~deeply=false. In this case, only the children of the
+ * passed node are searched.
+ * If passing ~deeply=true, the children are searched recursively
+ * (depth-first search).
+ *)
+
+val find_element : ?deeply:bool ->
+ string -> 'ext node -> 'ext node
+ (* Searches the first element with the passed element type.
+ * By default, ~deeply=false. In this case, only the children of the
+ * passed node are searched.
+ * If passing ~deeply=true, the children are searched recursively
+ * (depth-first search).
+ *)
+
+val find_all_elements : ?deeply:bool ->
+ string -> 'ext node -> 'ext node list
+ (* Searches all elements with the passed element type.
+ * By default, ~deeply=false. In this case, only the children of the
+ * passed node are searched.
+ * If passing ~deeply=true, the children are searched recursively
+ * (depth-first search).
+ *)
+
+exception Skip
+val map_tree : pre:('exta node -> 'extb node) ->
+ ?post:('extb node -> 'extb node) ->
+ 'exta node ->
+ 'extb node
+ (* Traverses the passed node and all children recursively. After entering
+ * a node, the function ~pre is called. The result of this function must
+ * be a new node; it must not have children nor a parent (you can simply
+ * pass (fun n -> n # orphaned_flat_clone) as ~pre).
+ * After that, the children are processed in the same way (from left to
+ * right); the results of the transformation will be added to the
+ * new node as new children.
+ * Now, the ~post function is invoked with this node as argument, and
+ * the result is the result of the function (~post should return a root
+ * node, too; if not specified, the identity is the ~post function).
+ * Both ~pre and ~post may raise Skip, which causes that the node is
+ * left out. If the top node is skipped, the exception Not_found is
+ * raised.
+ *)
+
+val map_tree_sibl :
+ pre: ('exta node option -> 'exta node -> 'exta node option ->
+ 'extb node) ->
+ ?post:('extb node option -> 'extb node -> 'extb node option ->
+ 'extb node) ->
+ 'exta node ->
+ 'extb node
+ (* Works like map_tree, but the function ~pre and ~post have additional
+ * arguments:
+ * - ~pre l n r: The node n is the node to map, and l is the previous
+ * node, and r is the next node (both None if not present). l and r
+ * are both nodes before the transformation.
+ * - ~post l n r: The node n is the node which is the result of ~pre
+ * plus adding children. l and r are again the previous and the next
+ * node, respectively, but after being transformed.
+ *)
+
+val iter_tree : ?pre:('ext node -> unit) ->
+ ?post:('ext node -> unit) ->
+ 'ext node ->
+ unit
+ (* Iterates only instead of mapping the nodes. *)
+
+val iter_tree_sibl :
+ ?pre: ('ext node option -> 'ext node -> 'ext node option -> unit) ->
+ ?post:('ext node option -> 'ext node -> 'ext node option -> unit) ->
+ 'ext node ->
+ unit
+ (* Iterates only instead of mapping the nodes. *)
+
+
+(******************************* document ********************************)
+
+
+class [ 'ext ] document :
+ Pxp_types.collect_warnings ->
+ object
+ (* Documents: These are containers for root elements and for DTDs.
+ *
+ * Important invariant: A document is either empty (no root element,
+ * no DTD), or it has both a root element and a DTD.
+ *
+ * A fresh document created by 'new' is empty.
+ *)
+
+ method init_xml_version : string -> unit
+ (* Set the XML version string of the XML declaration. *)
+
+ method init_root : 'ext node -> unit
+ (* Set the root element. It is expected that the root element has
+ * a DTD.
+ * Note that 'init_root' checks whether the passed root element
+ * has the type expected by the DTD. The check takes into account
+ * that the root element might be a virtual root node.
+ *)
+
+ method xml_version : string
+ (* Returns the XML version from the XML declaration. Returns "1.0"
+ * if the declaration is missing.
+ *)
+
+ method xml_standalone : bool
+ (* Returns whether this document is declared as being standalone.
+ * This method returns the same value as 'standalone_declaration'
+ * of the DTD (if there is a DTD).
+ * Returns 'false' if there is no DTD.
+ *)
+
+ method dtd : dtd
+ (* Returns the DTD of the root element.
+ * Fails if there is no root element.
+ *)
+
+ method encoding : Pxp_types.rep_encoding
+ (* Returns the string encoding of the document = the encoding of
+ * the root element = the encoding of the element tree = the
+ * encoding of the DTD.
+ * Fails if there is no root element.
+ *)
+
+ method root : 'ext node
+ (* Returns the root element, or fails if there is not any. *)
+
+ method add_pinstr : proc_instruction -> unit
+ (* Adds a processing instruction to the document container.
+ * The parser does this for PIs occurring outside the DTD and outside
+ * the root element.
+ *)
+
+ method pinstr : string -> proc_instruction list
+ (* Return all PIs for a passed target string. *)
+
+ method pinstr_names : string list
+ (* Return all target strings of all PIs. *)
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+ (* Write the document to the passed
+ * output stream; the passed encoding used. The format
+ * is compact (the opposite of "pretty printing").
+ * If a DTD is present, the DTD is included into the internal subset.
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+
+ end
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.10 2000/08/30 15:47:37 gerd
+ * New method node_path.
+ * New function compare.
+ * New type ord_index with functions.
+ *
+ * Revision 1.9 2000/08/26 23:27:53 gerd
+ * New function: make_spec_from_alist.
+ * New iterators: find, find_all, find_element, find_all_elements,
+ * map_tree, map_tree_sibl, iter_tree, iter_tree_sibl.
+ * New node methods: node_position, nth_node, previous_node,
+ * next_node.
+ * Attribute and namespace types have now a string argument:
+ * the name/prefix. I hope this simplifies the handling of view nodes.
+ * First implementation of view nodes: attribute_impl. The
+ * method attributes_as_nodes returns the attributes wrapped into
+ * T_attribute nodes which reside outside the document tree.
+ *
+ * Revision 1.8 2000/08/18 20:14:00 gerd
+ * New node_types: T_super_root, T_pinstr, T_comment, (T_attribute),
+ * (T_none), (T_namespace).
+ *
+ * Revision 1.7 2000/07/23 02:16:34 gerd
+ * Support for DFAs.
+ *
+ * Revision 1.6 2000/07/16 16:34:41 gerd
+ * New method 'write', the successor of 'write_compact_as_latin1'.
+ *
+ * Revision 1.5 2000/07/14 13:56:11 gerd
+ * Added methods id_attribute_name, id_attribute_value,
+ * idref_attribute_names.
+ *
+ * Revision 1.4 2000/07/09 17:51:14 gerd
+ * Element nodes can store positions.
+ *
+ * Revision 1.3 2000/07/04 22:05:10 gerd
+ * New functions make_spec_from_mapping, create_data_node,
+ * create_element_node.
+ *
+ * Revision 1.2 2000/06/14 22:19:06 gerd
+ * Added checks such that it is impossible to mix encodings.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_document.mli:
+ *
+ * Revision 1.13 2000/05/27 19:15:08 gerd
+ * Removed the method init_xml_standalone.
+ *
+ * Revision 1.12 2000/05/01 20:42:34 gerd
+ * New method write_compact_as_latin1.
+ *
+ * Revision 1.11 2000/04/30 18:15:57 gerd
+ * Beautifications.
+ * New method keep_always_whitespace_mode.
+ *
+ * Revision 1.10 2000/03/11 22:58:15 gerd
+ * Updated to support Markup_codewriter.
+ *
+ * Revision 1.9 2000/01/27 21:51:56 gerd
+ * Added method 'attributes'.
+ *
+ * Revision 1.8 2000/01/27 21:19:07 gerd
+ * Added further methods.
+ *
+ * Revision 1.7 1999/11/09 22:20:14 gerd
+ * Removed method init_dtd from class "document". The DTD is
+ * implicitly passed to the document by the root element.
+ *
+ * Revision 1.6 1999/09/01 22:51:40 gerd
+ * Added methods to store processing instructions.
+ *
+ * Revision 1.5 1999/09/01 16:19:57 gerd
+ * The "document" class has now a "warner" as class argument.
+ *
+ * Revision 1.4 1999/08/19 21:59:13 gerd
+ * Added method "reset_finder".
+ *
+ * Revision 1.3 1999/08/19 01:08:29 gerd
+ * Added method "find".
+ *
+ * Revision 1.2 1999/08/15 02:19:41 gerd
+ * Some new explanations: That unknown elements are not rejected
+ * if the DTD allows them.
+ *
+ * Revision 1.1 1999/08/10 00:35:51 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_dtd.ml b/helm/DEVEL/pxp/pxp/pxp_dtd.ml
new file mode 100644
index 000000000..ee62c4ff0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/pxp_dtd.ml
@@ -0,0 +1,1090 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+open Pxp_types
+open Pxp_lexer_types
+open Pxp_lexers
+open Pxp_entity
+open Pxp_aux
+open Pxp_dfa
+
+(**********************************************************************)
+
+class dtd the_warner init_encoding =
+ object (self)
+ val mutable root = (None : string option)
+ val mutable id = (None : dtd_id option)
+
+ val warner = (the_warner : collect_warnings)
+ val encoding = init_encoding
+ val lexerset = Pxp_lexers.get_lexer_set init_encoding
+
+ val elements = (Hashtbl.create 100 : (string,dtd_element) Hashtbl.t)
+ val gen_entities = (Hashtbl.create 100 : (string,entity * bool) Hashtbl.t)
+ val par_entities = (Hashtbl.create 100 : (string,entity) Hashtbl.t)
+ val notations = (Hashtbl.create 100 : (string,dtd_notation) Hashtbl.t)
+ val pinstr = (Hashtbl.create 100 : (string,proc_instruction) Hashtbl.t)
+ val mutable element_names = []
+ val mutable gen_entity_names = []
+ val mutable par_entity_names = []
+ val mutable notation_names = []
+ val mutable pinstr_names = []
+
+ val mutable allow_arbitrary = false
+ val mutable standalone_declaration = false
+
+ val mutable validated = false
+
+ initializer
+ let w = new drop_warnings in
+ self # add_gen_entity
+ (new internal_entity self "lt" w "<" false false false encoding)
+ false;
+ self # add_gen_entity
+ (new internal_entity self "gt" w ">" false false false encoding)
+ false;
+ self # add_gen_entity
+ (new internal_entity self "amp" w "&" false false false encoding)
+ false;
+ self # add_gen_entity
+ (new internal_entity self "apos" w "'" false false false encoding)
+ false;
+ self # add_gen_entity
+ (new internal_entity self "quot" w """ false false false encoding)
+ false;
+
+
+ method encoding = encoding
+
+ method warner = warner
+
+ method set_root r =
+ if root = None then
+ root <- Some r
+ else
+ assert false
+
+
+ method set_id j =
+ if id = None then
+ id <- Some j
+ else
+ assert false
+
+
+ method standalone_declaration = standalone_declaration
+
+ method set_standalone_declaration b =
+ standalone_declaration <- b
+
+ method allow_arbitrary =
+ allow_arbitrary <- true
+
+ method disallow_arbitrary =
+ allow_arbitrary <- false
+
+ method arbitrary_allowed = allow_arbitrary
+
+ method root = root
+ method id = id
+
+
+ method add_element el =
+ (* raises Not_found if 'el' has already been added *)
+ (* Note: 'el' is encoded in the same way as 'self'! *)
+ let name = el # name in
+ check_name warner name;
+ if Hashtbl.mem elements name then
+ raise Not_found;
+ Hashtbl.add elements name el;
+ element_names <- name :: element_names;
+ validated <- false
+
+
+ method add_gen_entity en extdecl =
+ (* The following is commented out; perhaps there should be an option
+ * to reactivate it on demand
+ *)
+ (* raises Validation_error if the predefines entities 'lt', 'gt', 'amp',
+ * 'quot', and 'apos' are redeclared with an improper value.
+ *)
+ if en # encoding <> encoding then
+ failwith "Pxp_dtd.dtd # add_gen_entity: Inconsistent encodings";
+ let name = en # name in
+ check_name warner name;
+ if Hashtbl.mem gen_entities name then begin
+ if List.mem name [ "lt"; "gt"; "amp"; "quot"; "apos" ] then begin
+ (* These are allowed to be declared several times *)
+ let (rt,_) = en # replacement_text in
+ let toks = tokens_of_content_string lexerset rt in
+ try
+ begin match toks with
+ [CRef 60] -> if name <> "lt" then raise Not_found
+ | [CharData ">"] -> if name <> "gt" then raise Not_found
+ | [CRef 62] -> if name <> "gt" then raise Not_found
+ | [CRef 38] -> if name <> "amp" then raise Not_found
+ | [CharData "'"] -> if name <> "apos" then raise Not_found
+ | [CRef 39] -> if name <> "apos" then raise Not_found
+ | [CharData "\""] -> if name <> "quot" then raise Not_found
+ | [CRef 34] -> if name <> "quot" then raise Not_found
+ | _ -> raise Not_found
+ end
+ with
+ Not_found ->
+ raise (Validation_error("Predefined entity `" ^ name ^
+ "' redeclared"))
+ end
+ else
+ warner # warn ("Entity `" ^ name ^ "' declared twice")
+ end
+ else begin
+ Hashtbl.add gen_entities name (en, extdecl);
+ gen_entity_names <- name :: gen_entity_names
+ end
+
+
+ method add_par_entity en =
+ if en # encoding <> encoding then
+ failwith "Pxp_dtd.dtd # add_par_entity: Inconsistent encodings";
+ let name = en # name in
+ check_name warner name;
+ if not (Hashtbl.mem par_entities name) then begin
+ Hashtbl.add par_entities name en;
+ par_entity_names <- name :: par_entity_names
+ end
+ else
+ warner # warn ("Entity `" ^ name ^ "' declared twice")
+
+
+ method add_notation no =
+ (* raises Validation_error if 'no' already added *)
+ if no # encoding <> encoding then
+ failwith "Pxp_dtd.dtd # add_notation: Inconsistent encodings";
+ let name = no # name in
+ check_name warner name;
+ if Hashtbl.mem notations name then
+ raise (Validation_error("Notation `" ^ name ^ "' declared twice"));
+ Hashtbl.add notations name no;
+ notation_names <- name :: notation_names
+
+
+ method add_pinstr pi =
+ if pi # encoding <> encoding then
+ failwith "Pxp_dtd.dtd # add_pinstr: Inconsistent encodings";
+ let name = pi # target in
+ check_name warner name;
+
+ if String.length name >= 4 && String.sub name 0 4 = "pxp:" then begin
+ match name with
+ "pxp:dtd" ->
+ let _, optname, atts = pi # parse_pxp_option in
+ begin match optname with
+ "optional-element-and-notation-declarations" ->
+ self # allow_arbitrary
+ | "optional-attribute-declarations" ->
+ let lexers = Pxp_lexers.get_lexer_set encoding in
+ let el_string =
+ try List.assoc "elements" atts
+ with Not_found ->
+ raise(Error("Missing `elements' attribute for pxp:dtd"))
+ in
+ let el = split_attribute_value lexers el_string in
+ List.iter
+ (fun e_name ->
+ let e =
+ try Hashtbl.find elements e_name
+ with
+ Not_found ->
+ raise(Error("Reference to unknown element `" ^
+ e_name ^ "'"))
+ in
+ e # allow_arbitrary
+ )
+ el
+ | _ ->
+ raise(Error("Unknown PXP option `" ^
+ optname ^ "'"))
+ end
+ | _ ->
+ raise(Error("The processing instruction target `" ^
+ name ^ "' is not defined by this PXP version"))
+ end
+ else begin
+ (*----------------------------------------------------------------------
+ * SUPPORT FOR DEPRECATED PI OPTIONS:
+ * -
+ * is now
+ * - ?>
+ * is now ...'?>
+ * Please update your DTDs! Alternatively, you may uncommment the
+ * following piece of code.
+ *)
+(* if name = "xml:allow_undeclared_elements_and_notations" then *)
+(* self # allow_arbitrary; *)
+(* if name = "xml:allow_undeclared_attributes" then begin *)
+(* let v = pi # value in *)
+(* let e = *)
+(* try *)
+(* Hashtbl.find elements v *)
+(* with *)
+(* Not_found -> *)
+(* raise(Validation_error("Reference to undeclared element `"*)
+(* ^ v ^ "'")) *)
+(* in *)
+(* e # allow_arbitrary; *)
+(* end; *)
+ (*----------------------------------------------------------------------
+ *)
+ ()
+ end;
+ Hashtbl.add pinstr name pi;
+ pinstr_names <- name :: pinstr_names;
+
+
+ method element name =
+ (* returns the element 'name' or raises Validation_error if not found *)
+ try
+ Hashtbl.find elements name
+ with
+ Not_found ->
+ if allow_arbitrary then
+ raise Undeclared
+ else
+ raise(Validation_error("Reference to undeclared element `" ^ name ^ "'"))
+
+ method element_names =
+ (* returns the list of all names of element declarations *)
+ element_names
+
+
+ method gen_entity name =
+ (* returns the entity 'name' or raises WF_error if not found *)
+ try
+ Hashtbl.find gen_entities name
+ with
+ Not_found ->
+ raise(WF_error("Reference to undeclared general entity `" ^ name ^ "'"))
+
+
+ method gen_entity_names = gen_entity_names
+
+
+ method par_entity name =
+ (* returns the entity 'name' or raises WF_error if not found *)
+ try
+ Hashtbl.find par_entities name
+ with
+ Not_found ->
+ raise(WF_error("Reference to undeclared parameter entity `" ^ name ^ "'"))
+
+
+ method par_entity_names = par_entity_names
+
+
+ method notation name =
+ (* returns the notation 'name' or raises Validation_error if not found *)
+ try
+ Hashtbl.find notations name
+ with
+ Not_found ->
+ if allow_arbitrary then
+ raise Undeclared
+ else
+ raise(Validation_error("Reference to undeclared notation `" ^ name ^ "'"))
+
+
+ method notation_names = notation_names
+
+
+ method pinstr name =
+ (* returns the list of all processing instructions contained in the DTD
+ * with target 'name'
+ *)
+ Hashtbl.find_all pinstr name
+
+
+ method pinstr_names = pinstr_names
+
+ method write os enc doctype =
+ let wms =
+ write_markup_string ~from_enc:encoding ~to_enc:enc os in
+
+ let write_sysid s =
+ if String.contains s '"' then
+ wms ("'" ^ s ^ "'")
+ else
+ wms ("\"" ^ s ^ "\"");
+ in
+
+ if doctype then begin
+ wms " failwith "#write: DTD without root";
+ | Some r -> wms r
+ );
+ wms " [\n";
+ end;
+
+ (* Notations: *)
+ List.iter
+ (fun name ->
+ let notation =
+ try Hashtbl.find notations name with Not_found -> assert false in
+ notation # write os enc)
+ (List.sort compare notation_names);
+
+ (* Unparsed entities: *)
+ List.iter
+ (fun name ->
+ let ent,_ =
+ try Hashtbl.find gen_entities name with Not_found -> assert false
+ in
+ if ent # is_ndata then begin
+ let xid = ent # ext_id in
+ let notation = ent # notation in
+ wms ("
+ wms "SYSTEM ";
+ write_sysid s;
+ | Public (p,s) ->
+ wms "PUBLIC ";
+ write_sysid p;
+ if (s <> "") then begin
+ wms " ";
+ write_sysid s;
+ end;
+ | Anonymous ->
+ failwith "#write: External ID Anonymous cannot be represented"
+ );
+ wms (" NDATA " ^ notation ^ ">\n");
+ end
+ )
+ (List.sort compare gen_entity_names);
+
+ (* Elements: *)
+ List.iter
+ (fun name ->
+ let element =
+ try Hashtbl.find elements name with Not_found -> assert false in
+ element # write os enc)
+ (List.sort compare element_names);
+
+ (* Processing instructions: *)
+ List.iter
+ (fun name ->
+ let pi =
+ try Hashtbl.find pinstr name with Not_found -> assert false in
+ pi # write os enc)
+ (List.sort compare pinstr_names);
+
+ if doctype then
+ wms "]>\n";
+
+ method write_compact_as_latin1 os doctype =
+ self # write os `Enc_iso88591 doctype
+
+
+
+ (************************************************************)
+ (* VALIDATION *)
+ (************************************************************)
+
+ method only_deterministic_models =
+ Hashtbl.iter
+ (fun n el ->
+ let cm = el # content_model in
+ match cm with
+ Regexp _ ->
+ if el # content_dfa = None then
+ raise(Validation_error("The content model of element `" ^
+ n ^ "' is not deterministic"))
+ | _ ->
+ ()
+ )
+ elements;
+
+
+ method validate =
+ if validated or allow_arbitrary then
+ ()
+ else begin
+ (* Validity constraint: Notations in NDATA entity declarations must
+ * be declared
+ *)
+ List.iter
+ (fun name ->
+ let ent,_ =
+ try Hashtbl.find gen_entities name with Not_found -> assert false
+ in
+ if ent # is_ndata then begin
+ let xid = ent # ext_id in
+ let notation = ent # notation in
+ try
+ ignore(self # notation notation)
+ (* Raises Validation_error if the constraint is violated *)
+ with
+ Undeclared -> ()
+ end
+ )
+ gen_entity_names;
+
+ (* Validate the elements: *)
+ Hashtbl.iter
+ (fun n el ->
+ el # validate)
+ elements;
+
+ (* Check the root element: *)
+ (* TODO: Check if this piece of code is executed at all! *)
+ begin match root with
+ None -> ()
+ | Some r ->
+ begin try
+ let _ = Hashtbl.find elements r in ()
+ with
+ Not_found ->
+ raise(Validation_error("The root element is not declared"))
+ end
+ end;
+ validated <- true;
+ end
+
+ method invalidate =
+ validated <- false
+
+ (************************************************************)
+
+ end
+
+
+(**********************************************************************)
+
+and dtd_element the_dtd the_name =
+ object (self)
+ val dtd = (the_dtd : dtd)
+ val name = the_name
+ val lexerset = Pxp_lexers.get_lexer_set (the_dtd # encoding)
+ val mutable content_model = Unspecified
+ val mutable content_model_validated = false
+ val mutable content_dfa = lazy None
+
+ val mutable externally_declared = false
+
+ val mutable attributes =
+ ([] : (string * ((att_type * att_default) * bool)) list)
+ val mutable attributes_validated = false
+
+ val mutable id_att_name = None
+ val mutable idref_att_names = []
+
+ val mutable allow_arbitrary = false
+
+ method name = name
+
+ method set_cm_and_extdecl m extdecl =
+ if content_model = Unspecified then begin
+ content_model <- m;
+ content_model_validated <- false;
+ content_dfa <- lazy (self # compute_content_dfa);
+ externally_declared <- extdecl;
+ dtd # invalidate
+ end
+ else
+ raise(Validation_error("Element `" ^ name ^ "' has already a content model"))
+
+ method content_model = content_model
+
+ method content_dfa = Lazy.force content_dfa
+
+ method private compute_content_dfa =
+ match content_model with
+ Regexp re ->
+ ( try Some (dfa_of_regexp_content_model re)
+ with Not_found -> None
+ )
+ | _ ->
+ None
+
+ method externally_declared = externally_declared
+
+ method encoding = dtd # encoding
+
+ method allow_arbitrary =
+ allow_arbitrary <- true
+
+ method disallow_arbitrary =
+ allow_arbitrary <- false
+
+ method arbitrary_allowed = allow_arbitrary
+
+ method add_attribute aname t d extdecl =
+ if aname <> "xml:lang" & aname <> "xml:space" then
+ check_name (dtd#warner) aname;
+ if List.mem_assoc aname attributes then
+ dtd # warner # warn ("More than one declaration for attribute `" ^
+ aname ^ "' of element type `" ^ name ^ "'")
+ else begin
+ begin match aname with
+ "xml:space" ->
+ begin match t with
+ A_enum l ->
+ let l' = Sort.list ( <= ) l in
+ if l' <> [ "default"; "preserve" ] then
+ raise(Validation_error("Declaration of attribute `xml:space' does not conform to XML specification"))
+ | _ ->
+ raise(Validation_error("Declaration of attribute `xml:space' does not conform to XML specification"))
+ end
+ | _ -> ()
+ end;
+ begin match t with
+ A_id ->
+ id_att_name <- Some aname;
+ | (A_idref | A_idrefs) ->
+ idref_att_names <- aname :: idref_att_names
+ | _ ->
+ ()
+ end;
+ attributes <- (aname, ((t,d),extdecl)) :: attributes;
+ attributes_validated <- false;
+ dtd # invalidate;
+ end
+
+ method attribute attname =
+ try
+ fst (List.assoc attname attributes)
+ with
+ Not_found ->
+ if allow_arbitrary then
+ raise Undeclared
+ else
+ raise(Validation_error("Attribute `" ^ attname ^ "' of element `"
+ ^ name ^ "' not declared"))
+
+ method attribute_violates_standalone_declaration attname v =
+ try
+ let (atype, adefault), extdecl = List.assoc attname attributes in
+ extdecl &&
+ ( match v with
+ None ->
+ adefault <> D_required && adefault <> D_implied
+ (* i.e. adefault matches D_default or D_fixed *)
+ | Some s ->
+ atype <> A_cdata &&
+ normalization_changes_value lexerset atype s
+ )
+ with
+ Not_found ->
+ if allow_arbitrary then
+ raise Undeclared
+ else
+ raise(Validation_error("Attribute `" ^ attname ^ "' of element `"
+ ^ name ^ "' not declared"))
+
+
+ method attribute_names =
+ List.map fst attributes
+
+ method names_of_required_attributes =
+ List.flatten
+ (List.map
+ (fun (n,((t,d),_)) ->
+ if d = D_required then
+ [n]
+ else
+ [])
+ attributes)
+
+ method id_attribute_name = id_att_name
+
+ method idref_attribute_names = idref_att_names
+
+
+ method write os enc =
+ let encoding = self # encoding in
+ let wms =
+ write_markup_string ~from_enc:encoding ~to_enc:enc os in
+
+ let rec write_contentspec cs =
+ match cs with
+ Unspecified ->
+ failwith "#write: Unspecified content model found"
+ | Empty ->
+ wms "EMPTY"
+ | Any ->
+ wms "ANY"
+ | Mixed ml ->
+ wms "(";
+ write_mixedspec_list ml;
+ wms ")*";
+ | Regexp re ->
+ write_children re false
+
+ and write_mixedspec_list ml =
+ match ml with
+ MPCDATA :: ml' ->
+ wms "#PCDATA";
+ if ml' <> [] then wms "|";
+ write_mixedspec_list ml';
+ | MChild s :: ml' ->
+ wms s;
+ if ml' <> [] then wms "|";
+ write_mixedspec_list ml';
+ | [] ->
+ ()
+
+ and write_children re cp =
+ match re with
+ Optional re' ->
+ let p = needs_parens re' in
+ if p then wms "(";
+ write_children re' cp;
+ if p then wms ")";
+ wms "?";
+ | Repeated re' ->
+ let p = needs_parens re' in
+ if p then wms "(";
+ write_children re' cp;
+ if p then wms ")";
+ wms "*";
+ | Repeated1 re' ->
+ let p = needs_parens re' in
+ if p then wms "(";
+ write_children re' cp;
+ if p then wms ")";
+ wms "+";
+ | Alt re' ->
+ wms "(";
+ ( match re' with
+ re1' :: rer' ->
+ write_children re1' true;
+ List.iter
+ (fun ren' ->
+ wms "|";
+ write_children ren' true;
+ )
+ rer';
+ | [] ->
+ failwith "#write: Illegal content model"
+ );
+ wms ")";
+ | Seq re' ->
+ wms "(";
+ ( match re' with
+ re1' :: rer' ->
+ write_children re1' true;
+ List.iter
+ (fun ren' ->
+ wms ",";
+ write_children ren' true;
+ )
+ rer';
+ | [] ->
+ failwith "#write: Illegal content model"
+ );
+ wms ")";
+ | Child ch ->
+ if not cp then wms "(";
+ wms ch;
+ if not cp then wms ")";
+
+ and needs_parens re =
+ match re with
+ (Optional _ | Repeated _ | Repeated1 _ ) -> true
+ | _ -> false
+ in
+
+ wms ("\n";
+
+ wms ("
+ wms ("\n " ^ n);
+ ( match t with
+ A_cdata -> wms " CDATA";
+ | A_id -> wms " ID";
+ | A_idref -> wms " IDREF";
+ | A_idrefs -> wms " IDREFS";
+ | A_entity -> wms " ENTITY";
+ | A_entities -> wms " ENTITIES";
+ | A_nmtoken -> wms " NMTOKEN";
+ | A_nmtokens -> wms " NMTOKENS";
+ | A_notation nl ->
+ wms " NOTATION (";
+ ( match nl with
+ nl1:: nl' ->
+ wms nl1;
+ List.iter
+ (fun n ->
+ wms ("|" ^ n);
+ )
+ nl'
+ | [] ->
+ failwith "#write: Illegal content model";
+ );
+ wms ")";
+ | A_enum el ->
+ wms " (";
+ ( match el with
+ el1:: el' ->
+ wms el1;
+ List.iter
+ (fun e ->
+ wms ("|" ^ e);
+ )
+ el'
+ | [] ->
+ failwith "#write: Illegal content model";
+ );
+ wms ")";
+ );
+ ( match d with
+ D_required -> wms " #REQUIRED"
+ | D_implied -> wms " #IMPLIED"
+ | D_default s ->
+ wms " \"";
+ write_data_string ~from_enc:encoding ~to_enc:enc os s;
+ wms "\"";
+ | D_fixed s ->
+ wms " FIXED \"";
+ write_data_string ~from_enc:encoding ~to_enc:enc os s;
+ wms "\"";
+ );
+ )
+ attributes;
+
+ wms ">\n";
+
+ method write_compact_as_latin1 os =
+ self # write os `Enc_iso88591
+
+ (************************************************************)
+ (* VALIDATION *)
+ (************************************************************)
+
+ method validate =
+ self # validate_attributes();
+ self # validate_content_model()
+
+ method private validate_attributes() =
+ if attributes_validated then
+ ()
+ else begin
+ (* Validity Constraint: One ID per Element Type *)
+ let n = count (fun (n,((t,d),_)) -> t = A_id) attributes in
+ if n > 1 then
+ raise(Validation_error("More than one ID attribute for element `" ^ name ^ "'"));
+ (* Validity Constraint: ID Attribute Default *)
+ if List.exists
+ (fun (n,((t,d),_)) ->
+ t = A_id & (d <> D_required & d <> D_implied))
+ attributes
+ then
+ raise(Validation_error("ID attribute must be #IMPLIED or #REQUIRED; element `" ^ name ^ "'"));
+ (* Validity Constraint: One Notation per Element Type *)
+ let n = count (fun (n,((t,d),_)) ->
+ match t with A_notation _ -> true | _ -> false)
+ attributes in
+ if n > 1 then
+ raise(Validation_error("More than one NOTATION attribute for element `" ^ name ^ "'"));
+ (* Validity Constraint: Notation Attributes [second part] *)
+ List.iter
+ (fun (n,((t,d),_)) ->
+ match t with
+ A_notation l ->
+ List.iter
+ (fun nname ->
+ let _ = dtd # notation nname in ())
+ l
+ | _ -> ())
+ attributes;
+ (* Validity Constraint: Attribute Default Legal *)
+ List.iter
+ (fun (n,((t,d),_)) ->
+
+ let check v =
+ let lexical_error() =
+ lazy (raise(Validation_error("Default value for attribute `" ^ n ^ "' is lexically malformed"))) in
+ check_attribute_value_lexically lexerset (lexical_error()) t v;
+ begin match t with
+ (A_entity|A_entities) ->
+ List.iter
+ (fun nd ->
+ let en, extdecl = dtd # gen_entity nd in
+ if not (en # is_ndata) then
+ raise(Validation_error("Attribute default value must be the name of an NDATA entity; attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
+(* if dtd # standalone_declaration && extdecl then
+ raise(Validation_error("Attribute default value violates the standalone declaration; attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
+-- This is checked anyway when the attribute value is normalized
+*)
+ )
+ (split_attribute_value lexerset v)
+ | A_notation nl ->
+ if not (List.mem v nl) then
+ raise(Validation_error("Illegal default value for attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
+ | A_enum nl ->
+ if not (List.mem v nl) then
+ raise(Validation_error("Illegal default value for attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
+ | _ -> ()
+ end
+ in
+
+ match d with
+ D_required -> ()
+ | D_implied -> ()
+ | D_default v -> check v
+ | D_fixed v -> check v
+ )
+ attributes;
+
+ (* Ok: This element declaration is valid *)
+ attributes_validated <- true;
+
+ end
+
+ method private validate_content_model () =
+ (* checks:
+ * - Validity Constraint: No Duplicate Types
+ * It is not an error if there is a child in the declaration for which
+ * no element declaration is provided.
+ *)
+ match content_model with
+ Unspecified ->
+ dtd # warner # warn ("Element type `" ^ name ^ "' mentioned but not declared");
+ ()
+ | Empty -> ()
+ | Any -> ()
+ | Mixed (pcdata :: l) ->
+ (* MPCDATA is always the first element by construction *)
+ assert (pcdata = MPCDATA);
+ if check_dups l then
+ raise (Validation_error("Double children in declaration for element `" ^ name ^ "'"))
+ | Regexp _ -> ()
+ | _ -> assert false
+
+
+
+ (************************************************************)
+
+ end
+
+and dtd_notation the_name the_xid init_encoding =
+object (self)
+ val name = the_name
+ val xid = (the_xid : ext_id)
+ val encoding = (init_encoding : Pxp_types.rep_encoding)
+ method name = name
+ method ext_id = xid
+ method encoding = encoding
+
+ method write os enc =
+ let wms =
+ write_markup_string ~from_enc:encoding ~to_enc:enc os in
+
+ let write_sysid s =
+ if String.contains s '"' then
+ wms ("'" ^ s ^ "'")
+ else
+ wms ("\"" ^ s ^ "\"");
+ in
+
+ wms ("
+ wms "SYSTEM ";
+ write_sysid s;
+ | Public (p,s) ->
+ wms "PUBLIC ";
+ write_sysid p;
+ if (s <> "") then begin
+ wms " ";
+ write_sysid s;
+ end;
+ | Anonymous ->
+ failwith "#write: External ID Anonymous cannot be represented"
+ );
+ wms ">\n";
+
+ method write_compact_as_latin1 os =
+ self # write os `Enc_iso88591
+
+ end
+
+and proc_instruction the_target the_value init_encoding =
+object (self)
+ val target = the_target
+ val value = (the_value : string)
+ val encoding = (init_encoding : Pxp_types.rep_encoding)
+
+ initializer
+ match target with
+ ("xml"|"xmL"|"xMl"|"xML"|"Xml"|"XmL"|"XMl"|"XML") ->
+ (* This is an error, not a warning, because I do not have a
+ * "warner" object by hand.
+ *)
+ raise(WF_error("Reserved processing instruction"))
+ | _ -> ()
+
+ method target = target
+ method value = value
+ method encoding = encoding
+
+ method write os enc =
+ let wms =
+ write_markup_string ~from_enc:encoding ~to_enc:enc os in
+
+ wms "";
+ wms target;
+ wms " ";
+ wms value;
+ wms "?>";
+
+ method write_compact_as_latin1 os =
+ self # write os `Enc_iso88591
+
+ method parse_pxp_option =
+ let lexers = get_lexer_set encoding in
+ try
+ let toks = tokens_of_xml_pi lexers value in (* may raise WF_error *)
+ begin match toks with
+ (Pro_name option_name) :: toks' ->
+ let atts = decode_xml_pi toks' in (* may raise WF_error *)
+ (target, option_name, atts)
+ | _ ->
+ raise(Error("Bad PXP processing instruction"))
+ end
+ with
+ WF_error _ ->
+ raise(Error("Bad PXP processing instruction"))
+
+ end
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.10 2000/08/18 21:18:45 gerd
+ * Updated wrong comments for methods par_entity and gen_entity.
+ * These can raise WF_error and not Validation_error, and this is the
+ * correct behaviour.
+ *
+ * Revision 1.9 2000/07/25 00:30:01 gerd
+ * Added support for pxp:dtd PI options.
+ *
+ * Revision 1.8 2000/07/23 02:16:34 gerd
+ * Support for DFAs.
+ *
+ * Revision 1.7 2000/07/16 17:50:01 gerd
+ * Fixes in 'write'
+ *
+ * Revision 1.6 2000/07/16 16:34:41 gerd
+ * New method 'write', the successor of 'write_compact_as_latin1'.
+ *
+ * Revision 1.5 2000/07/14 13:56:48 gerd
+ * Added methods id_attribute_name and idref_attribute_names.
+ *
+ * Revision 1.4 2000/07/09 00:13:37 gerd
+ * Added methods gen_entity_names, par_entity_names.
+ *
+ * Revision 1.3 2000/07/04 22:10:55 gerd
+ * Update: collect_warnings -> drop_warnings.
+ * Update: Case ext_id = Anonymous.
+ *
+ * Revision 1.2 2000/06/14 22:19:06 gerd
+ * Added checks such that it is impossible to mix encodings.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ *
+ * Revision 1.18 2000/05/28 17:24:55 gerd
+ * Bugfixes.
+ *
+ * Revision 1.17 2000/05/27 19:21:25 gerd
+ * Implemented the changes of rev. 1.10 of markup_dtd.mli.
+ *
+ * Revision 1.16 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.15 2000/05/14 21:50:07 gerd
+ * Updated: change in internal_entity.
+ *
+ * Revision 1.14 2000/05/06 23:08:46 gerd
+ * It is possible to allow undeclared attributes.
+ *
+ * Revision 1.13 2000/05/01 20:42:46 gerd
+ * New method write_compact_as_latin1.
+ *
+ * Revision 1.12 2000/05/01 15:16:57 gerd
+ * The errors "undeclared parameter/general entities" are
+ * well-formedness errors, not validation errors.
+ *
+ * Revision 1.11 2000/03/11 22:58:15 gerd
+ * Updated to support Markup_codewriter.
+ *
+ * Revision 1.10 2000/01/20 20:53:47 gerd
+ * Changed such that it runs with Markup_entity's new interface.
+ *
+ * Revision 1.9 1999/11/09 22:15:41 gerd
+ * Added method "arbitrary_allowed".
+ *
+ * Revision 1.8 1999/09/01 22:52:22 gerd
+ * If 'allow_arbitrary' is in effect, no validation happens anymore.
+ *
+ * Revision 1.7 1999/09/01 16:21:24 gerd
+ * Added several warnings.
+ * The attribute type of "xml:space" is now strictly checked.
+ *
+ * Revision 1.6 1999/08/15 20:34:21 gerd
+ * Improved error messages.
+ * Bugfix: It is no longer allowed to create processing instructions
+ * with target "xml".
+ *
+ * Revision 1.5 1999/08/15 02:20:16 gerd
+ * New feature: a DTD can allow arbitrary elements.
+ *
+ * Revision 1.4 1999/08/15 00:21:39 gerd
+ * Comments have been updated.
+ *
+ * Revision 1.3 1999/08/14 22:12:52 gerd
+ * Several functions have now a "warner" as argument which is
+ * an object with a "warn" method. This is used to warn about characters
+ * that cannot be represented in the Latin 1 alphabet.
+ * Bugfix: if two general entities with the same name are definied,
+ * the first counts, not the second.
+ *
+ * Revision 1.2 1999/08/11 14:56:35 gerd
+ * Declaration of the predfined entities {lt,gt,amp,quot,apos}
+ * is no longer forbidden; but the original definition cannot be overriddden.
+ * TODO: If these entities are redeclared with problematic values,
+ * the user should be warned.
+ *
+ * Revision 1.1 1999/08/10 00:35:51 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_dtd.mli b/helm/DEVEL/pxp/pxp/pxp_dtd.mli
new file mode 100644
index 000000000..1c347fbce
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/pxp_dtd.mli
@@ -0,0 +1,476 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+(*$ markup-dtd1.mli *)
+
+(**********************************************************************)
+(* *)
+(* Pxp_dtd: *)
+(* Object model of document type declarations *)
+(* *)
+(**********************************************************************)
+
+(* ======================================================================
+ * OVERVIEW
+ *
+ * class dtd ............... represents the whole DTD, including element
+ * declarations, entity declarations, notation
+ * declarations, and processing instructions
+ * class dtd_element ....... represents an element declaration consisting
+ * of a content model and an attribute list
+ * declaration
+ * class dtd_notation ...... represents a notation declaration
+ * class proc_instruction .. represents a processing instruction
+ * ======================================================================
+ *
+ *)
+
+
+class dtd :
+ (* Creation:
+ * new dtd
+ * creates a new, empty DTD object without any declaration, without a root
+ * element, without an ID.
+ *)
+ Pxp_types.collect_warnings ->
+ Pxp_types.rep_encoding ->
+ object
+ method root : string option
+ (* get the name of the root element if present *)
+
+ method set_root : string -> unit
+ (* set the name of the root element. This method can be invoked
+ * only once
+ *)
+
+ method id : Pxp_types.dtd_id option
+ (* get the identifier for this DTD *)
+
+ method set_id : Pxp_types.dtd_id -> unit
+ (* set the identifier. This method can be invoked only once *)
+
+ method encoding : Pxp_types.rep_encoding
+ (* returns the encoding used for character representation *)
+
+
+ method allow_arbitrary : unit
+ (* After this method has been invoked, the object changes its behaviour:
+ * - elements and notations that have not been added may be used in an
+ * arbitrary way; the methods "element" and "notation" indicate this
+ * by raising Undeclared instead of Validation_error.
+ *)
+
+ method disallow_arbitrary : unit
+
+ method arbitrary_allowed : bool
+ (* Returns whether arbitrary contents are allowed or not. *)
+
+ method standalone_declaration : bool
+ (* Whether there is a 'standalone' declaration or not. Strictly
+ * speaking, this declaration is not part of the DTD, but it is
+ * included here because of practical reasons.
+ * If not set, this property defaults to 'false'.
+ *)
+
+ method set_standalone_declaration : bool -> unit
+ (* Sets the 'standalone' declaration. *)
+
+
+ method add_element : dtd_element -> unit
+ (* add the given element declaration to this DTD. Raises Not_found
+ * if there is already an element declaration with the same name.
+ *)
+
+ method add_gen_entity : Pxp_entity.entity -> bool -> unit
+ (* add_gen_entity e extdecl:
+ * add the entity 'e' as general entity to this DTD (general entities
+ * are those represented by &name;). If there is already a declaration
+ * with the same name, the second definition is ignored; as exception from
+ * this rule, entities with names "lt", "gt", "amp", "quot", and "apos"
+ * may only be redeclared with a definition that is equivalent to the
+ * standard definition; otherwise a Validation_error is raised.
+ *
+ * 'extdecl': 'true' indicates that the entity declaration occurs in
+ * an external entity. (Used for the standalone check.)
+ *)
+
+ method add_par_entity : Pxp_entity.entity -> unit
+ (* add the given entity as parameter entity to this DTD (parameter
+ * entities are those represented by %name;). If there is already a
+ * declaration with the same name, the second definition is ignored.
+ *)
+
+ method add_notation : dtd_notation -> unit
+ (* add the given notation to this DTD. If there is already a declaration
+ * with the same name, a Validation_error is raised.
+ *)
+
+ method add_pinstr : proc_instruction -> unit
+ (* add the given processing instruction to this DTD. *)
+
+ method element : string -> dtd_element
+ (* looks up the element declaration with the given name. Raises
+ * Validation_error if the element cannot be found. (If "allow_arbitrary"
+ * has been invoked before, Unrestricted is raised instead.)
+ *)
+
+ method element_names : string list
+ (* returns the list of the names of all element declarations. *)
+
+ method gen_entity : string -> (Pxp_entity.entity * bool)
+ (* let e, extdecl = obj # gen_entity n:
+ * looks up the general entity 'e' with the name 'n'. Raises
+ * WF_error if the entity cannot be found.
+ * 'extdecl': indicates whether the entity declaration occured in an
+ * external entity.
+ *)
+
+ method gen_entity_names : string list
+ (* returns the list of all general entity names *)
+
+ method par_entity : string -> Pxp_entity.entity
+ (* looks up the parameter entity with the given name. Raises
+ * WF_error if the entity cannot be found.
+ *)
+
+ method par_entity_names : string list
+ (* returns the list of all parameter entity names *)
+
+ method notation : string -> dtd_notation
+ (* looks up the notation declaration with the given name. Raises
+ * Validation_error if the notation cannot be found. (If "allow_arbitrary"
+ * has been invoked before, Unrestricted is raised instead.)
+ *)
+
+ method notation_names : string list
+ (* Returns the list of the names of all added notations *)
+
+ method pinstr : string -> proc_instruction list
+ (* looks up all processing instructions with the given target.
+ * The "target" is the identifier following "".
+ * Note: It is not possible to find out the exact position of the
+ * processing instruction.
+ *)
+
+ method pinstr_names : string list
+ (* Returns the list of the names (targets) of all added pinstrs *)
+
+ method validate : unit
+ (* ensures that the DTD is valid. This method is optimized such that
+ * actual validation is only performed if DTD has changed.
+ * If the DTD is invalid, mostly a Validation_error is raised,
+ * but other exceptions are possible, too.
+ *)
+
+ method only_deterministic_models : unit
+ (* Succeeds if all regexp content models are deterministic.
+ * Otherwise Validation_error.
+ *)
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> bool -> unit
+ (* write_compact_as_latin1 os enc doctype:
+ * Writes the DTD as 'enc'-encoded string to 'os'. If 'doctype', a
+ * DTD like is written. If 'not doctype',
+ * only the declarations are written (the material within the
+ * square brackets).
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> bool -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+
+
+ (*----------------------------------------*)
+ method invalidate : unit
+ (* INTERNAL METHOD *)
+ method warner : Pxp_types.collect_warnings
+ (* INTERNAL METHOD *)
+ end
+
+(*$-*)
+
+(*$ markup-dtd2.mli *)
+
+(* ---------------------------------------------------------------------- *)
+
+and dtd_element : dtd -> string ->
+ (* Creation:
+ * new dtd_element init_dtd init_name:
+ * creates a new dtd_element object for init_dtd with init_name.
+ * The strings are represented in the same encoding as init_dtd.
+ *)
+ object
+
+ method name : string
+ (* returns the name of the declared element *)
+
+ method externally_declared : bool
+ (* returns whether the element declaration occurs in an external
+ * entity.
+ *)
+
+ method content_model : Pxp_types.content_model_type
+ (* get the content model of this element declaration, or Unspecified *)
+
+ method content_dfa : Pxp_dfa.dfa_definition option
+ (* return the DFA of the content model if there is a DFA, or None.
+ * A DFA exists only for regexp style content models which are
+ * deterministic.
+ *)
+
+ method set_cm_and_extdecl : Pxp_types.content_model_type -> bool -> unit
+ (* set_cm_and_extdecl cm extdecl:
+ * set the content model to 'cm'. Once the content model is not
+ * Unspecified, it cannot be set to a different value again.
+ * Furthermore, it is set whether the element occurs in an external
+ * entity ('extdecl').
+ *)
+
+ method encoding : Pxp_types.rep_encoding
+ (* Return the encoding of the strings *)
+
+ method allow_arbitrary : unit
+ (* After this method has been invoked, the object changes its behaviour:
+ * - attributes that have not been added may be used in an
+ * arbitrary way; the method "attribute" indicates this
+ * by raising Undeclared instead of Validation_error.
+ *)
+
+ method disallow_arbitrary : unit
+
+ method arbitrary_allowed : bool
+ (* Returns whether arbitrary attributes are allowed or not. *)
+
+ method attribute : string ->
+ Pxp_types.att_type * Pxp_types.att_default
+ (* get the type and default value of a declared attribute, or raise
+ * Validation_error if the attribute does not exist.
+ * If 'arbitrary_allowed', the exception Undeclared is raised instead
+ * of Validation_error.
+ *)
+
+ method attribute_violates_standalone_declaration :
+ string -> string option -> bool
+ (* attribute_violates_standalone_declaration name v:
+ * Checks whether the attribute 'name' violates the "standalone"
+ * declaration if it has value 'v'.
+ * The method returns true if:
+ * - The attribute declaration occurs in an external entity,
+ * and if one of the two conditions holds:
+ * - v = None, and there is a default for the attribute value
+ * - v = Some s, and the type of the attribute is not CDATA,
+ * and s changes if normalized according to the rules of the
+ * attribute type.
+ *
+ * The method raises Validation_error if the attribute does not exist.
+ * If 'arbitrary_allowed', the exception Undeclared is raised instead
+ * of Validation_error.
+ *)
+
+ method attribute_names : string list
+ (* get the list of all declared attributes *)
+
+ method names_of_required_attributes : string list
+ (* get the list of all attributes that are specified as required
+ * attributes
+ *)
+
+ method id_attribute_name : string option
+ (* Returns the name of the attribute with type ID, or None. *)
+
+ method idref_attribute_names : string list
+ (* Returns the names of the attributes with type IDREF or IDREFS. *)
+
+ method add_attribute : string ->
+ Pxp_types.att_type ->
+ Pxp_types.att_default ->
+ bool ->
+ unit
+ (* add_attribute name type default extdecl:
+ * add an attribute declaration for an attribute with the given name,
+ * type, and default value. If there is more than one declaration for
+ * an attribute name, the first declaration counts; the other declarations
+ * are ignored.
+ * 'extdecl': if true, the attribute declaration occurs in an external
+ * entity. This property is used to check the "standalone" attribute.
+ *)
+
+ method validate : unit
+ (* checks whether this element declaration (i.e. the content model and
+ * all attribute declarations) is valid for the associated DTD.
+ * Raises mostly Validation_error if the validation fails.
+ *)
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+ (* write_compact_as_latin1 os enc:
+ * Writes the declaration to 'os' as 'enc'-encoded string.
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+ end
+
+(* ---------------------------------------------------------------------- *)
+
+and dtd_notation : string -> Pxp_types.ext_id -> Pxp_types.rep_encoding ->
+ (* Creation:
+ * new dtd_notation a_name an_external_ID init_encoding
+ * creates a new dtd_notation object with the given name and the given
+ * external ID.
+ *)
+ object
+ method name : string
+ method ext_id : Pxp_types.ext_id
+ method encoding : Pxp_types.rep_encoding
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+ (* write_compact_as_latin1 os enc:
+ * Writes the declaration to 'os' as 'enc'-encoded
+ * string.
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+
+ end
+
+(* ---------------------------------------------------------------------- *)
+
+and proc_instruction : string -> string -> Pxp_types.rep_encoding ->
+ (* Creation:
+ * new proc_instruction a_target a_value
+ * creates a new proc_instruction object with the given target string and
+ * the given value string.
+ * Note: A processing instruction is written as .
+ *)
+ object
+ method target : string
+ method value : string
+ method encoding : Pxp_types.rep_encoding
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+ (* write os enc:
+ * Writes the ...?> PI to 'os' as 'enc'-encoded string.
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+
+ method parse_pxp_option : (string * string * (string * string) list)
+ (* Parses a PI containing a PXP option. Such PIs are formed like:
+ *
+ * The method returns a triple
+ * (target, option-name, [option-att, value; ...])
+ * or raises Error.
+ *)
+
+ end
+
+;;
+
+(*$-*)
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.8 2000/08/18 21:18:45 gerd
+ * Updated wrong comments for methods par_entity and gen_entity.
+ * These can raise WF_error and not Validation_error, and this is the
+ * correct behaviour.
+ *
+ * Revision 1.7 2000/07/25 00:30:01 gerd
+ * Added support for pxp:dtd PI options.
+ *
+ * Revision 1.6 2000/07/23 02:16:33 gerd
+ * Support for DFAs.
+ *
+ * Revision 1.5 2000/07/16 16:34:41 gerd
+ * New method 'write', the successor of 'write_compact_as_latin1'.
+ *
+ * Revision 1.4 2000/07/14 13:56:49 gerd
+ * Added methods id_attribute_name and idref_attribute_names.
+ *
+ * Revision 1.3 2000/07/09 00:13:37 gerd
+ * Added methods gen_entity_names, par_entity_names.
+ *
+ * Revision 1.2 2000/06/14 22:19:06 gerd
+ * Added checks such that it is impossible to mix encodings.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_dtd.ml:
+ *
+ * Revision 1.11 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.10 2000/05/27 19:20:38 gerd
+ * Changed the interfaces for the standalone check: New
+ * methods: standalone_declaration, set_standalone_declaration,
+ * externally_declared, attribute_violates_standalone_declaration.
+ * The method set_content_model has been renamed to
+ * set_cm_and_extdecl; it now initializes also whether the element
+ * has been declared in an external entity.
+ * Methods add_gen_entity and gen_entity pass an additional
+ * boolean argument containing whether the declaration of the
+ * general entity happened in an external entity.
+ * Method add_attribute expects this argument, too, which
+ * states whether the declaration of the attribute happened in an
+ * external entity.
+ *
+ * Revision 1.9 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.8 2000/05/06 23:10:26 gerd
+ * allow_arbitrary for elements, too.
+ *
+ * Revision 1.7 2000/05/01 20:42:52 gerd
+ * New method write_compact_as_latin1.
+ *
+ * Revision 1.6 2000/03/11 22:58:15 gerd
+ * Updated to support Markup_codewriter.
+ *
+ * Revision 1.5 2000/02/22 02:32:02 gerd
+ * Updated.
+ *
+ * Revision 1.4 1999/11/09 22:15:41 gerd
+ * Added method "arbitrary_allowed".
+ *
+ * Revision 1.3 1999/09/01 16:21:56 gerd
+ * "dtd" classes have now an argument that passes a "warner".
+ *
+ * Revision 1.2 1999/08/15 02:20:23 gerd
+ * New feature: a DTD can allow arbitrary elements.
+ *
+ * Revision 1.1 1999/08/10 00:35:51 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_entity.ml b/helm/DEVEL/pxp/pxp/pxp_entity.ml
new file mode 100644
index 000000000..94b21aefe
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/pxp_entity.ml
@@ -0,0 +1,1292 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+
+(* TODO:
+ * - Wie verhindert man, dass ein internal entity eine XML-Dekl. im
+ * replacement text akzeptiert?
+ *)
+
+
+open Pxp_types
+open Pxp_lexer_types
+open Pxp_aux
+open Pxp_reader
+
+(* Hierarchy of parsing layers:
+ *
+ * - Parser: Pxp_yacc
+ * + gets input stream from the main entity object
+ * + checks most of the grammar
+ * + creates the DTD object as side-effect
+ * + creates the element tree as side-effect
+ * + creates further entity objects that are entered into the DTD
+ * - Entity layer: Pxp_entity
+ * + gets input stream from the lexers, or another entity object
+ * + handles entity references: if a reference is encountered the
+ * input stream is redirected such that the tokens come from the
+ * referenced entity object
+ * + handles conditional sections
+ * - Lexer layer: Pxp_lexers
+ * + gets input from lexbuffers created by resolvers
+ * + different lexers for different lexical contexts
+ * + a lexer returns pairs (token,lexid), where token is the scanned
+ * token, and lexid is the name of the lexer that must be used for
+ * the next token
+ * - Resolver layer: Pxp_entity
+ * + a resolver creates the lexbuf from some character source
+ * + a resolver recodes the input and handles the encoding scheme
+ *)
+
+(**********************************************************************)
+
+(* Variables of type 'state' are used to insert Begin_entity and End_entity
+ * tokens into the stream.
+ * - At_beginning: Nothing has been read so far
+ * - First_token tok: A Begin_entity has been inserted; and the next token
+ * is 'tok' which is not Eof. (Begin_entity/End_entity must not be inserted
+ * if the entity is empty.)
+ * - In_stream: After the first token has been read, but befor Eof.
+ * - At_end: Eof has been read, and End_entity has been returned.
+ *)
+
+type state =
+ At_beginning
+ | Inserted_begin_entity
+ | At_end
+;;
+
+
+(**********************************************************************)
+
+class virtual entity the_dtd the_name the_warner
+ init_errors_with_line_numbers init_encoding =
+ object (self)
+ (* This class prescribes the type of all entity objects. Furthermore,
+ * the default 'next_token' mechanism is implemented.
+ *)
+
+ (* 'init_errors_with_line_numbers': whether error messages contain line
+ * numbers or not.
+ * Calculating line numbers is expensive.
+ *)
+
+ val mutable dtd = the_dtd
+ val mutable name = the_name
+ val mutable warner = the_warner
+
+ val encoding = (init_encoding : rep_encoding)
+ val lexerset = Pxp_lexers.get_lexer_set init_encoding
+
+ method encoding = encoding
+ (* method lexerset = lexerset *)
+
+ val mutable manager = None
+ (* The current entity_manager, see below *)
+
+ method private manager =
+ ( match manager with
+ None -> assert false
+ | Some m -> m
+ : < current_entity : entity;
+ pop_entity : unit;
+ push_entity : entity -> unit >
+ )
+
+ method set_manager m = manager <- Some m
+
+
+ val mutable lexbuf = Lexing.from_string ""
+ (* The lexical buffer currently used as character source. *)
+
+ val mutable prolog = None
+ (* Stores the initial token as PI_xml *)
+
+ val mutable prolog_pairs = []
+ (* If prolog <> None, these are the (name,value) pairs of the
+ * processing instruction.
+ *)
+
+
+ val mutable lex_id = Document
+ (* The name of the lexer that should be used for the next token *)
+
+ method set_lex_id id = lex_id <- lex_id
+
+
+
+ val mutable force_parameter_entity_parsing = false
+ (* 'true' forces that inner entities will always be embraced by
+ * Begin_entity and End_entity.
+ * 'false': the inner entity itself decides this
+ *)
+
+ val mutable check_text_declaration = true
+ (* 'true': It is checked that the declaration matches the
+ * production TextDecl.
+ *)
+
+ val mutable normalize_newline = true
+ (* Whether this entity converts CRLF or CR to LF, or not *)
+
+
+ val mutable line = 1 (* current line *)
+ val mutable column = 0 (* current column *)
+ val mutable pos = 0 (* current absolute character position *)
+ val errors_with_line_numbers = init_errors_with_line_numbers
+
+ val mutable p_line = 1
+ val mutable p_column = 1
+
+ method line = p_line
+ method column = p_column
+
+
+ val mutable counts_as_external = false
+
+ method counts_as_external = counts_as_external
+ (* Whether the entity counts as external (for the standalone check). *)
+
+ method set_counts_as_external =
+ counts_as_external <- true
+
+
+ val mutable last_token = Bof
+ (* XXX
+ * These two variables are used to check that between certain pairs of
+ * tokens whitespaces exist. 'last_token' is simply the last token,
+ * but not Ignore, and not PERef (which both represent whitespace).
+ * 'space_seen' records whether Ignore or PERef was seen between this
+ * token and 'last_token'.
+ *)
+
+ val mutable deferred_token = None
+ (* If you set this to Some tl, the next invocations of
+ * next_token_from_entity will return the tokens in tl.
+ * This makes it possible to insert tokens into the stream.
+ *)
+
+ val mutable debug = false
+
+ method is_ndata = false
+ (* Returns if this entity is an NDATA (unparsed) entity *)
+
+ method name = name
+
+ method virtual open_entity : bool -> lexers -> unit
+ (* open_entity force_parsing lexid:
+ * opens the entity, and the first token is scanned by the lexer
+ * 'lexid'. 'force_parsing' forces that Begin_entity and End_entity
+ * tokens embrace the inner tokens of the entity; otherwise this
+ * depends on the entity.
+ * By opening an entity, reading tokens from it, and finally closing
+ * the entity, the inclusion methods "Included",
+ * "Included if validating", and "Included as PE" can be carried out.
+ * Which method is chosen depends on the 'lexid', i.e. the lexical
+ * context: 'lexid = Content' performs "Included (if validating)" (we
+ * are always validating); 'lexid = Declaration' performs
+ * "Included as PE". The difference is which tokens are recognized,
+ * and how spaces are handled.
+ * 'force_parsing' causes that a Begin_entity token is inserted before
+ * and an End_entity token is inserted after the entity. The yacc
+ * rules allow the Begin_entity ... End_entity brace only at certain
+ * positions; this is used to restrict the possible positions where
+ * entities may be included, and to guarantee that the entity matches
+ * a certain production of the grammar ("parsed entities").
+ * 'open_entity' is currently invoked with 'force_parsing = true'
+ * for toplevel nodes, for inclusion of internal general entities,
+ * and for inclusion of parameter entities into document entities.
+ * 'force_parsing = false' is used for all other cases: External
+ * entities add the Begin_entity/End_entity tokens anyway; internal
+ * entities do not. Especially internal parameter entities referenced
+ * from non-document entities do not add these tokens.
+ *)
+
+ method virtual close_entity : lexers
+ (* close_entity:
+ * closes the entity and returns the name of the lexer that must
+ * be used to scan the next token.
+ *)
+
+ method virtual replacement_text : (string * bool)
+ (* replacement_text:
+ * returns the replacement text of the entity, and as second value,
+ * whether the replacement text was constructed by referencing
+ * external entities (directly or indirectly).
+ * This method implements the inclusion method "Included in Literal".
+ *)
+
+
+ method lexbuf = lexbuf
+
+
+ method xml_declaration =
+ (* return the (name,value) pairs of the initial
+ * processing instruction.
+ *)
+ match prolog with
+ None ->
+ None
+ | Some p ->
+ Some prolog_pairs
+
+
+ method set_debugging_mode m =
+ debug <- m
+
+ method private virtual set_encoding : string -> unit
+
+
+ method full_name =
+ name
+
+
+ method next_token =
+ (* read next token from this entity *)
+
+ match deferred_token with
+ Some toklist ->
+ ( match toklist with
+ [] ->
+ deferred_token <- None;
+ self # next_token
+ | tok :: toklist' ->
+ deferred_token <- Some toklist';
+ if debug then
+ prerr_endline ("- Entity " ^ name ^ ": " ^ string_of_tok tok ^ " (deferred)");
+ tok
+ )
+ | None -> begin
+ let this_line = line
+ and this_column = column in
+ let this_pos = pos in
+ p_line <- this_line;
+ p_column <- this_column;
+ (* Read the next token from the appropriate lexer lex_id, and get the
+ * name lex_id' of the next lexer to be used.
+ *)
+ let tok, lex_id' =
+ match lex_id with
+ Document -> lexerset.scan_document lexbuf
+ | Document_type -> lexerset.scan_document_type lexbuf
+ | Content -> lexerset.scan_content lexbuf
+ | Within_tag -> lexerset.scan_within_tag lexbuf
+ | Declaration -> lexerset.scan_declaration lexbuf
+ | Content_comment -> lexerset.scan_content_comment lexbuf
+ | Decl_comment -> lexerset.scan_decl_comment lexbuf
+ | Document_comment -> lexerset.scan_document_comment lexbuf
+ | Ignored_section -> assert false
+ (* Ignored_section: only used by method next_ignored_token *)
+ in
+ if debug then
+ prerr_endline ("- Entity " ^ name ^ ": " ^ string_of_tok tok);
+ (* Find out the number of lines and characters of the last line: *)
+ let n_lines, n_columns =
+ if errors_with_line_numbers then
+ count_lines (Lexing.lexeme lexbuf)
+ else
+ 0, (Lexing.lexeme_end lexbuf - Lexing.lexeme_start lexbuf)
+ in
+ line <- this_line + n_lines;
+ column <- if n_lines = 0 then this_column + n_columns else n_columns;
+ pos <- Lexing.lexeme_end lexbuf;
+ lex_id <- lex_id';
+ (* Throw Ignore and Comment away; Interpret entity references: *)
+ (* NOTE: Of course, references to general entities are not allowed
+ * everywhere; parameter references, too. This is already done by the
+ * lexers, i.e. &name; and %name; are recognized only where they
+ * are allowed.
+ *)
+
+ (* TODO: last_token is only used to detect Bof. Can be simplified *)
+
+ let at_bof = (last_token = Bof) in
+ last_token <- tok;
+
+ let tok' =
+ match tok with
+
+ (* Entity references: *)
+
+ | ERef n ->
+ let en, extdecl = dtd # gen_entity n in
+ if dtd # standalone_declaration && extdecl then
+ raise
+ (Validation_error
+ ("Reference to entity `" ^ n ^
+ "' violates standalone declaration"));
+ en # set_debugging_mode debug;
+ en # open_entity true lex_id;
+ self # manager # push_entity en;
+ en # next_token;
+ | PERef n ->
+ let en = dtd # par_entity n in
+ en # set_debugging_mode debug;
+ en # open_entity force_parameter_entity_parsing lex_id;
+ self # manager # push_entity en;
+ en # next_token;
+
+ (* Convert LineEnd to CharData *)
+ | LineEnd s ->
+ if normalize_newline then
+ CharData "\n"
+ else
+ CharData s
+
+ (* Also normalize CDATA sections *)
+ | Cdata value as cd ->
+ if normalize_newline then
+ Cdata(normalize_line_separators lexerset value)
+ else
+ cd
+
+ (* If there are CRLF sequences in a PI value, normalize them, too *)
+ | PI(name,value) as pi ->
+ if normalize_newline then
+ PI(name, normalize_line_separators lexerset value)
+ else
+ pi
+
+ (* Attribute values: If they are already normalized, they are turned
+ * into Attval_nl_normalized. This is detected by other code.
+ *)
+ | Attval value as av ->
+ if normalize_newline then
+ av
+ else
+ Attval_nl_normalized value
+
+ (* Another CRLF normalization case: Unparsed_string *)
+ | Unparsed_string value as ustr ->
+ if normalize_newline then
+ Unparsed_string(normalize_line_separators lexerset value)
+ else
+ ustr
+
+ (* These tokens require that the entity_id parameter is set: *)
+ | Doctype _ -> Doctype (self :> entity_id)
+ | Doctype_rangle _ ->Doctype_rangle(self :> entity_id)
+ | Dtd_begin _ -> Dtd_begin (self :> entity_id)
+ | Dtd_end _ -> Dtd_end (self :> entity_id)
+ | Decl_element _ -> Decl_element (self :> entity_id)
+ | Decl_attlist _ -> Decl_attlist (self :> entity_id)
+ | Decl_entity _ -> Decl_entity (self :> entity_id)
+ | Decl_notation _ ->Decl_notation (self :> entity_id)
+ | Decl_rangle _ -> Decl_rangle (self :> entity_id)
+ | Lparen _ -> Lparen (self :> entity_id)
+ | Rparen _ -> Rparen (self :> entity_id)
+ | RparenPlus _ -> RparenPlus (self :> entity_id)
+ | RparenStar _ -> RparenStar (self :> entity_id)
+ | RparenQmark _ -> RparenQmark (self :> entity_id)
+ | Conditional_begin _ -> Conditional_begin (self :> entity_id)
+ | Conditional_body _ -> Conditional_body (self :> entity_id)
+ | Conditional_end _ -> Conditional_end (self :> entity_id)
+ | Tag_beg (n,_) -> Tag_beg (n, (self :> entity_id))
+ | Tag_end (n,_) -> Tag_end (n, (self :> entity_id))
+
+ (* End of file: *)
+
+ | Eof ->
+ if debug then begin
+ prerr_endline ("- Entity " ^ name ^ " # handle_eof");
+ let tok = self # handle_eof in
+ prerr_endline ("- Entity " ^ name ^ " # handle_eof: returns " ^ string_of_tok tok);
+ tok
+ end
+ else
+ self # handle_eof;
+
+ (* The default case. *)
+
+ | _ ->
+ tok
+
+ in
+ if at_bof & tok <> Eof
+ then begin
+ if debug then
+ prerr_endline ("- Entity " ^ name ^ " # handle_bof");
+ self # handle_bof tok'
+ end
+ else
+ tok'
+ end
+
+
+ (* 'handle_bof' and 'handle_eof' can be used as hooks. Behaviour:
+ *
+ * - Normally, the first token t is read in, and 'handle_bof t' is
+ * called. The return value of this method is what is returned to
+ * the user.
+ * - If the EOF has been reached, 'handle_eof' is called.
+ * - BUT: If the first token is already EOF, 'handle_eof' is called
+ * ONLY, and 'handle_bof' is NOT called.
+ *
+ * The default implementations:
+ * - handle_bof: does nothing
+ * - handle_eof: Pops the previous entity from the stack, switches back
+ * to this entity, and returns the next token of this entity.
+ *)
+
+
+ method private handle_bof tok =
+ tok
+
+
+ method private handle_eof =
+ let mng = self # manager in
+ begin try
+ mng # pop_entity;
+ let next_lex_id = self # close_entity in
+ let en = mng # current_entity in
+ en # set_lex_id next_lex_id;
+ en # next_token
+ with
+ Stack.Empty ->
+ (* The outermost entity is at EOF *)
+ Eof
+ end
+
+
+ method next_ignored_token =
+ (* used after Conditional_begin (self :> entity_id)
+ | Conditional_end _ -> Conditional_end (self :> entity_id)
+ | _ -> tok
+
+
+ method process_xmldecl pl =
+ (* The parser calls this method just after the XML declaration
+ * has been detected.
+ * 'pl': This is the argument of the PI_xml token.
+ *)
+ if debug then
+ prerr_endline ("- Entity " ^ name ^ " # process_xmldecl");
+ prolog <- Some pl;
+ prolog_pairs <- decode_xml_pi pl;
+ if check_text_declaration then
+ check_text_xml_pi prolog_pairs;
+ begin
+ try
+ let e = List.assoc "encoding" prolog_pairs in
+ self # set_encoding e
+ with
+ Not_found ->
+ self # set_encoding ""
+ end;
+
+
+ method process_missing_xmldecl =
+ (* The parser calls this method if the XML declaration is missing *)
+ if debug then
+ prerr_endline ("- Entity " ^ name ^ " # process_missing_xmldecl");
+ self # set_encoding ""
+
+
+ (* Methods for NDATA entities only: *)
+ method ext_id = (assert false : ext_id)
+ method notation = (assert false : string)
+
+ end
+;;
+
+
+class ndata_entity the_name the_ext_id the_notation init_encoding =
+ object (self)
+ (* An NDATA entity is very restricted; more or less you can only find out
+ * its external ID and its notation.
+ *)
+
+ val mutable name = the_name
+ val mutable ext_id = the_ext_id
+ val mutable notation = the_notation
+ val encoding = (init_encoding : rep_encoding)
+
+ method name = (name : string)
+ method ext_id = (ext_id : ext_id)
+ method notation = (notation : string)
+
+ method is_ndata = true
+
+ method encoding = encoding
+
+
+ val mutable counts_as_external = false
+
+ method counts_as_external = counts_as_external
+ (* Whether the entity counts as external (for the standalone check). *)
+
+ method set_counts_as_external =
+ counts_as_external <- true
+
+
+ method set_manager (m : < current_entity : entity;
+ pop_entity : unit;
+ push_entity : entity -> unit >) =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : unit )
+
+ method set_lex_id (id : lexers) =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : unit )
+
+ method line =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : int )
+
+ method column =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : int )
+
+ method full_name =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : string )
+
+ method private set_encoding (_:string) =
+ assert false
+
+ method xml_declaration = (None : (string*string) list option)
+
+ method set_debugging_mode (_:bool) = ()
+
+ method open_entity (_:bool) (_:lexers) =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : unit )
+
+ method close_entity =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : lexers )
+
+ method replacement_text =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : (string * bool) )
+
+ method lexbuf =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : Lexing.lexbuf )
+
+ method next_token =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : token )
+
+ method next_ignored_token =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : token )
+
+ method process_xmldecl (pl:prolog_token list) =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : unit )
+
+ method process_missing_xmldecl =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : unit )
+
+ end
+;;
+
+
+class external_entity the_resolver the_dtd the_name the_warner the_ext_id
+ the_p_special_empty_entities
+ init_errors_with_line_numbers
+ init_encoding
+ =
+ object (self)
+ inherit entity
+ the_dtd the_name the_warner init_errors_with_line_numbers
+ init_encoding
+ as super
+
+ (* An external entity gets the lexbuf that is used as character source
+ * from a resolver.
+ * Furthermore, before the first token an Begin_entity is inserted, and
+ * before Eof an End_entity token is inserted into the stream. This done
+ * always regardless of the argument 'force_parsing' of the method
+ * 'open_entity'.
+ *
+ * 'the_p_internal_subset': see class internal_entity
+ * 'the_p_special_empty_entities': if true, a Begin_entity/End_entity
+ * brace is left out if the entity is otherwise empty.
+ *)
+
+ val resolver = (the_resolver : resolver)
+ val ext_id = (the_ext_id : ext_id)
+
+ val p_special_empty_entities = (the_p_special_empty_entities : bool)
+
+ val mutable resolver_is_open = false
+ (* Track if the resolver is open. This is also used to find recursive
+ * references of entities.
+ *)
+
+ val mutable state = At_beginning
+
+ initializer
+ counts_as_external <- true;
+
+
+ method private set_encoding e =
+ assert resolver_is_open;
+ resolver # change_encoding e
+
+
+ method full_name =
+ name ^
+ match ext_id with
+ System s -> " = SYSTEM \"" ^ s ^ "\""
+ | Public(p,s) -> " = PUBLIC \"" ^ p ^ "\" \"" ^ s ^ "\""
+ | Anonymous -> " = ANONYMOUS"
+
+
+ method open_entity force_parsing init_lex_id =
+ (* Note that external entities are always parsed, i.e. Begin_entity
+ * and End_entity tokens embrace the inner tokens to force that
+ * the entity is only called where the syntax allows it.
+ *)
+ if resolver_is_open then
+ raise(Validation_error("Recursive reference to entity `" ^ name ^ "'"));
+ let lex =
+ try
+ resolver # open_in ext_id
+ with
+ Pxp_reader.Not_competent ->
+ raise(Error ("No input method available for this external entity: " ^
+ self # full_name))
+ | Pxp_reader.Not_resolvable Not_found ->
+ raise(Error ("Unable to open the external entity: " ^
+ self # full_name))
+ | Pxp_reader.Not_resolvable e ->
+ raise(Error ("Unable to open the external entity: " ^
+ self # full_name ^ "; reason: " ^
+ string_of_exn e))
+ in
+ resolver_is_open <- true;
+ lexbuf <- lex;
+ prolog <- None;
+ lex_id <- init_lex_id;
+ state <- At_beginning;
+ line <- 1;
+ column <- 0;
+ pos <- 0;
+ last_token <- Bof;
+ normalize_newline <- true;
+
+
+ method private handle_bof tok =
+ (* This hook is only called if the stream is not empty. *)
+ deferred_token <- Some [ tok ];
+ state <- Inserted_begin_entity;
+ Begin_entity
+
+
+ method private handle_eof =
+ (* This hook is called if the end of the stream is reached *)
+ match state with
+ At_beginning ->
+ (* This is only possible if the stream is empty. *)
+ if p_special_empty_entities then begin
+ (* Continue immediately with the next token *)
+ state <- At_end;
+ super # handle_eof
+ end
+ else begin
+ (* Insert Begin_entity / End_entity *)
+ deferred_token <- Some [ End_entity ];
+ state <- At_end;
+ Begin_entity;
+ (* After these two token have been processed, the lexer
+ * is called again, and it will return another Eof.
+ *)
+ end
+ | Inserted_begin_entity ->
+ (* Insert End_entity, too. *)
+ state <- At_end;
+ End_entity;
+ | At_end ->
+ (* Continue with the next token: *)
+ super # handle_eof
+
+
+ method close_entity =
+ if not resolver_is_open then
+ failwith ("External entity " ^ name ^ " not open");
+ resolver # close_in;
+ resolver_is_open <- false;
+ lex_id
+
+
+ method replacement_text =
+ (* Return the replacement text of the entity. The method used for this
+ * is more or less the same as for internal entities; i.e. character
+ * and parameter entities are resolved immediately. In addition to that,
+ * external entities may begin with an "xml" processing instruction
+ * which is considered not to be part of the replacement text.
+ *)
+ if resolver_is_open then
+ raise(Validation_error("Recursive reference to entity `" ^ name ^ "'"));
+ let lex = resolver # open_in ext_id in
+ resolver_is_open <- true;
+ lexbuf <- lex;
+ prolog <- None;
+ (* arbitrary: lex_id <- init_lex_id; *)
+ state <- At_beginning;
+ line <- 1;
+ column <- 0;
+ pos <- 0;
+ last_token <- Bof;
+ (* First check if the first token of 'lex' is *)
+ begin match lexerset.scan_only_xml_decl lex with
+ PI_xml pl ->
+ self # process_xmldecl pl
+ | Eof ->
+ (* This only means that the first token was not ;
+ * the "Eof" token represents the empty string.
+ *)
+ self # process_missing_xmldecl
+ | _ ->
+ (* Must not happen. *)
+ assert false
+ end;
+ (* Then create the replacement text. *)
+ let rec scan_and_expand () =
+ match lexerset.scan_dtd_string lexbuf with
+ ERef n -> "&" ^ n ^ ";" ^ scan_and_expand()
+ | CRef(-1) -> "\n" ^ scan_and_expand()
+ | CRef(-2) -> "\n" ^ scan_and_expand()
+ | CRef(-3) -> "\n" ^ scan_and_expand()
+ | CRef k -> character encoding warner k ^ scan_and_expand()
+ | CharData x -> x ^ scan_and_expand()
+ | PERef n ->
+ let en = dtd # par_entity n in
+ let (x,_) = en # replacement_text in
+ x ^ scan_and_expand()
+ | Eof ->
+ ""
+ | _ ->
+ assert false
+ in
+ let rtext = scan_and_expand() in
+ resolver # close_in;
+ resolver_is_open <- false;
+ rtext, true
+ (* TODO:
+ * - The replaced text is not parsed [VALIDATION WEAKNESS]
+ *)
+ end
+;;
+
+
+class document_entity the_resolver the_dtd the_name the_warner the_ext_id
+ init_errors_with_line_numbers
+ init_encoding
+ =
+ object (self)
+ inherit external_entity the_resolver the_dtd the_name the_warner
+ the_ext_id false init_errors_with_line_numbers
+ init_encoding
+
+ (* A document entity is an external entity that does not allow
+ * conditional sections, and that forces that internal parameter entities
+ * are properly nested.
+ *)
+
+ initializer
+ force_parameter_entity_parsing <- true;
+ check_text_declaration <- false;
+
+ method counts_as_external = false
+ (* Document entities count never as external! *)
+ end
+;;
+
+
+class internal_entity the_dtd the_name the_warner the_literal_value
+ the_p_internal_subset init_errors_with_line_numbers
+ init_is_parameter_entity
+ init_encoding
+ =
+ (* An internal entity uses a "literal entity value" as character source.
+ * This value is first expanded and preprocessed, i.e. character and
+ * parameter references are expanded.
+ *
+ * 'the_p_internal_subset': indicates that the entity is declared in the
+ * internal subset. Such entity declarations are not allowed to contain
+ * references to parameter entities.
+ * 'init_is_parameter_entity': whether this is a parameter entity or not
+ *)
+
+ object (self)
+ inherit entity
+ the_dtd the_name the_warner init_errors_with_line_numbers
+ init_encoding
+ as super
+
+ val p_internal_subset = the_p_internal_subset
+
+ val mutable replacement_text = ""
+ val mutable contains_external_references = false
+ val mutable p_parsed_actually = false
+ val mutable is_open = false
+ val mutable state = At_beginning
+ val mutable is_parameter_entity = init_is_parameter_entity
+
+
+ initializer
+ let lexbuf = Lexing.from_string the_literal_value in
+ let rec scan_and_expand () =
+ match lexerset.scan_dtd_string lexbuf with
+ ERef n -> "&" ^ n ^ ";" ^ scan_and_expand()
+ | CRef(-1) -> "\r\n" ^ scan_and_expand()
+ | CRef(-2) -> "\r" ^ scan_and_expand()
+ | CRef(-3) -> "\n" ^ scan_and_expand()
+ | CRef k -> character encoding warner k ^ scan_and_expand()
+ | CharData x -> x ^ scan_and_expand()
+ | PERef n ->
+ if p_internal_subset then
+ raise(WF_error("Restriction of the internal subset: parameter entity not allowed here"));
+ let en = dtd # par_entity n in
+ let (x, extref) = en # replacement_text in
+ contains_external_references <-
+ contains_external_references or extref;
+ x ^ scan_and_expand()
+ | Eof ->
+ ""
+ | _ ->
+ assert false
+ in
+ is_open <- true;
+ replacement_text <- scan_and_expand();
+ is_open <- false;
+ normalize_newline <- false;
+ counts_as_external <- false;
+
+
+ method process_xmldecl (pl:prolog_token list) =
+ raise(Validation_error("The encoding cannot be changed in internal entities"))
+
+
+ method process_missing_xmldecl =
+ ()
+
+
+ method private set_encoding e =
+ (* Ignored if e = "" *)
+ assert(e = "");
+
+
+ method open_entity force_parsing init_lex_id =
+ if is_open then
+ raise(Validation_error("Recursive reference to entity `" ^ name ^ "'"));
+
+ p_parsed_actually <- force_parsing;
+ lexbuf <- Lexing.from_string
+ (if is_parameter_entity then
+ (" " ^ replacement_text ^ " ")
+ else
+ replacement_text);
+ prolog <- None;
+ lex_id <- init_lex_id;
+ state <- At_beginning;
+ is_open <- true;
+ line <- 1;
+ column <- 0;
+ pos <- 0;
+ last_token <- Eof;
+
+
+ method private handle_bof tok =
+ (* This hook is only called if the stream is not empty. *)
+ if p_parsed_actually then begin
+ deferred_token <- Some [ tok ];
+ state <- Inserted_begin_entity;
+ Begin_entity
+ end
+ else begin
+ state <- At_end;
+ tok
+ end
+
+
+ method private handle_eof =
+ (* This hook is called if the end of the stream is reached *)
+ match state with
+ At_beginning ->
+ (* This is only possible if the stream is empty. *)
+ if p_parsed_actually then begin
+ (* Insert Begin_entity / End_entity *)
+ deferred_token <- Some [ End_entity ];
+ state <- At_end;
+ Begin_entity;
+ (* After these two token have been processed, the lexer
+ * is called again, and it will return another Eof.
+ *)
+ end
+ else begin
+ (* Continue immediately with the next token *)
+ state <- At_end;
+ super # handle_eof
+ end
+ | Inserted_begin_entity ->
+ (* Insert End_entity, too. *)
+ state <- At_end;
+ End_entity;
+ | At_end ->
+ (* Continue with the next token: *)
+ super # handle_eof
+
+
+ method close_entity =
+ if not is_open then
+ failwith ("Internal entity " ^ name ^ " not open");
+ is_open <- false;
+ lex_id
+
+
+ method replacement_text =
+ if is_open then
+ raise(Validation_error("Recursive reference to entity `" ^ name ^ "'"));
+ replacement_text, contains_external_references
+ end
+;;
+
+(**********************************************************************)
+
+(* An 'entity_manager' is a stack of entities, where the topmost entity
+ * is the currently active entity, the second entity is the entity that
+ * referred to the active entity, and so on.
+ *
+ * The entity_manager can communicate with the currently active entity.
+ *
+ * The entity_manager provides an interface for the parser; the functions
+ * returning the current token and the next token are exported.
+ *)
+
+class entity_manager (init_entity : entity) =
+ object (self)
+ val mutable entity_stack = Stack.create()
+ val mutable current_entity = init_entity
+ val mutable current_entity's_full_name = lazy (init_entity # full_name)
+
+ val mutable yy_get_next_ref = ref (fun () -> assert false)
+
+ initializer
+ init_entity # set_manager (self :>
+ < current_entity : entity;
+ pop_entity : unit;
+ push_entity : entity -> unit >
+ );
+ yy_get_next_ref := (fun () -> init_entity # next_token)
+
+ method push_entity e =
+ e # set_manager (self :>
+ < current_entity : entity;
+ pop_entity : unit;
+ push_entity : entity -> unit >
+ );
+ Stack.push (current_entity, current_entity's_full_name) entity_stack;
+ current_entity <- e;
+ current_entity's_full_name <- lazy (e # full_name);
+ yy_get_next_ref := (fun () -> e # next_token);
+
+ method pop_entity =
+ (* May raise Stack.Empty *)
+ let e, e_name = Stack.pop entity_stack in
+ current_entity <- e;
+ current_entity's_full_name <- e_name;
+ yy_get_next_ref := (fun () -> e # next_token);
+
+
+
+ method position_string =
+ (* Gets a string describing the position of the last token;
+ * includes an entity backtrace
+ *)
+ let b = Buffer.create 200 in
+ Buffer.add_string b
+ ("In entity " ^ current_entity # full_name
+ ^ ", at line " ^ string_of_int (current_entity # line)
+ ^ ", position " ^ string_of_int (current_entity # column)
+ ^ ":\n");
+ Stack.iter
+ (fun (e, e_name) ->
+ Buffer.add_string b
+ ("Called from entity " ^ Lazy.force e_name
+ ^ ", line " ^ string_of_int (e # line)
+ ^ ", position " ^ string_of_int (e # column)
+ ^ ":\n");
+ )
+ entity_stack;
+ Buffer.contents b
+
+
+ method position =
+ (* Returns the triple (full_name, line, column) of the last token *)
+ Lazy.force current_entity's_full_name,
+ current_entity # line,
+ current_entity # column
+
+
+ method current_entity_counts_as_external =
+ (* Whether the current entity counts as external to the main
+ * document for the purpose of stand-alone checks.
+ *)
+ (* TODO: improve performance *)
+ let is_external = ref false in
+ let check (e, _) =
+ if e # counts_as_external then begin
+ is_external := true;
+ end;
+ in
+ check (current_entity,());
+ Stack.iter check entity_stack;
+ !is_external
+
+
+ method current_entity = current_entity
+
+ method yy_get_next_ref = yy_get_next_ref
+
+ end
+;;
+
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.6 2000/07/14 13:55:00 gerd
+ * Cosmetic changes.
+ *
+ * Revision 1.5 2000/07/09 17:51:50 gerd
+ * Entities return now the beginning of a token as its
+ * position.
+ * New method 'position' for entity_manager.
+ *
+ * Revision 1.4 2000/07/09 01:05:04 gerd
+ * Exported methods 'ext_id' and 'notation' anyway.
+ *
+ * Revision 1.3 2000/07/08 16:28:05 gerd
+ * Updated: Exception 'Not_resolvable' is taken into account.
+ *
+ * Revision 1.2 2000/07/04 22:12:47 gerd
+ * Update: Case ext_id = Anonymous.
+ * Update: Handling of the exception Not_competent when reading
+ * from a resolver.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_entity.ml:
+ *
+ * Revision 1.27 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.26 2000/05/28 17:24:55 gerd
+ * Bugfixes.
+ *
+ * Revision 1.25 2000/05/27 19:23:32 gerd
+ * The entities store whether they count as external with
+ * respect to the standalone check: New methods counts_as_external
+ * and set_counts_as_external.
+ * The entity manager can find out whether the current
+ * entity counts as external: method current_entity_counts_as_external.
+ *
+ * Revision 1.24 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.23 2000/05/14 21:51:24 gerd
+ * Change: Whitespace is handled by the grammar, and no longer
+ * by the entity.
+ *
+ * Revision 1.22 2000/05/14 17:50:54 gerd
+ * Updates because of changes in the token type.
+ *
+ * Revision 1.21 2000/05/09 00:02:44 gerd
+ * Conditional sections are now recognized by the parser.
+ * There seem some open questions; see the TODO comments!
+ *
+ * Revision 1.20 2000/05/08 21:58:22 gerd
+ * Introduced entity_manager as communication object between
+ * the parser and the currently active entity.
+ * New hooks handle_bof and handle_eof.
+ * Removed "delegated entities". The entity manager contains
+ * the stack of open entities.
+ * Changed the way Begin_entity and End_entity are inserted.
+ * This is now done by handle_bof and handle_eof.
+ * The XML declaration is no longer detected by the entity.
+ * This is now done by the parser.
+ *
+ * Revision 1.19 2000/05/01 15:18:44 gerd
+ * Improved CRLF handling in the replacement text of entities.
+ * Changed one error message.
+ *
+ * Revision 1.18 2000/04/30 18:18:39 gerd
+ * Bugfixes: The conversion of CR and CRLF to LF is now hopefully
+ * done right. The new variable "normalize_newline" indicates whether
+ * normalization must happen for that type of entity. The normalization
+ * if actually carried out separately for every token that needs it.
+ *
+ * Revision 1.17 2000/03/13 23:42:38 gerd
+ * Removed the resolver classes, and put them into their
+ * own module (Markup_reader).
+ *
+ * Revision 1.16 2000/02/22 01:06:58 gerd
+ * Bugfix: Resolvers are properly re-initialized. This bug caused
+ * that entities could not be referenced twice in the same document.
+ *
+ * Revision 1.15 2000/01/20 20:54:11 gerd
+ * New config.errors_with_line_numbers.
+ *
+ * Revision 1.14 2000/01/08 18:59:03 gerd
+ * Corrected the string resolver.
+ *
+ * Revision 1.13 1999/09/01 22:58:23 gerd
+ * Method warn_not_latin1 raises Illegal_character if the character
+ * does not match the Char production.
+ * External entities that are not document entities check if the
+ * declaration at the beginning matches the TextDecl production.
+ * Method xml_declaration has type ... list option, not ... list.
+ * Tag_beg and Tag_end now carry an entity_id with them.
+ * The code to check empty entities has changed. That the Begin_entity/
+ * End_entity pair is not to be added must be explicitly turned on. See the
+ * description of empty entity handling in design.txt.
+ * In internal subsets entity declarations are not allowed to refer
+ * to parameter entities. The internal_entity class can do this now.
+ * The p_parsed parameter of internal_entity has gone. It was simply
+ * superflous.
+ *
+ * Revision 1.12 1999/09/01 16:24:13 gerd
+ * The method replacement_text returns the text as described for
+ * "included in literal". The former behaviour has been dropped to include
+ * a leading and a trailing space character for parameter entities.
+ * Bugfix: When general entities are included, they are always parsed.
+ *
+ * Revision 1.11 1999/08/31 19:13:31 gerd
+ * Added checks on proper PE nesting. The idea is that tokens such
+ * as Decl_element and Decl_rangle carry an entity ID with them. This ID
+ * is simply an object of type < >, i.e. you can only test on identity.
+ * The lexer always produces tokens with a dummy ID because it does not
+ * know which entity is the current one. The entity layer replaces the dummy
+ * ID with the actual ID. The parser checks that the IDs of pairs such as
+ * Decl_element and Decl_rangle are the same; otherwise a Validation_error
+ * is produced.
+ *
+ * Revision 1.10 1999/08/19 01:06:41 gerd
+ * Improved error messages: external entities print their
+ * ext id, too
+ *
+ * Revision 1.9 1999/08/15 20:35:48 gerd
+ * Improved error messages.
+ * Before the tokens Plus, Star, Qmark space is not allowed any longer.
+ * Detection of recursive entity references is a bit cleaner.
+ *
+ * Revision 1.8 1999/08/15 15:33:44 gerd
+ * Revised whitespace checking: At certain positions there must be
+ * white space. These checks cannot be part of the lexer, as %entity; counts
+ * as white space. They cannot be part of the yacc parser because one look-ahead
+ * token would not suffice if we did that. So these checks must be done by the
+ * entity layer. Luckily, the rules are simple: There are simply a number of
+ * token pairs between which white space must occur independently of where
+ * these token have been found. Two variables, "space_seen", and "last_token"
+ * have been added in order to check these rules.
+ *
+ * Revision 1.7 1999/08/15 00:41:06 gerd
+ * The [ token of conditional sections is now allowed to occur
+ * in a different entity.
+ *
+ * Revision 1.6 1999/08/15 00:29:02 gerd
+ * The method "attlist_replacement_text" has gone. There is now a
+ * more general "replacement_text" method that computes the replacement
+ * text for both internal and external entities. Additionally, this method
+ * returns whether references to external entities have been resolved;
+ * this is checked in the cases where formerly "attlist_replacement_text"
+ * was used as it is not allowed everywhere.
+ * Entities have a new slot "need_spaces" that indicates that the
+ * next token must be white space or a parameter reference. The problem
+ * was that "
+ (* The class without properties; but you can still compare if two objects
+ * are the same.
+ *)
+
+type token =
+ | Begin_entity (* Beginning of entity *)
+ | End_entity (* End of entity *)
+ | Comment_begin (* *)
+ | Ignore (* ignored whitespace *)
+ | Eq (* = *)
+ | Rangle (* > as tag delimiter *)
+ | Rangle_empty (* /> as tag delimiter *)
+ | Percent (* % followed by space in declaration *)
+ | Plus (* + in declaration *)
+ | Star (* * in declaration *)
+ | Bar (* | in declaration *)
+ | Comma (* , in declaration *)
+ | Qmark (* ? in declaration *)
+ | Pcdata (* #PCDATA in declaration *)
+ | Required (* #REQUIRED in declaration *)
+ | Implied (* #IMPLIED in declaration *)
+ | Fixed (* #FIXED in declaration *)
+ | Bof (* A marker for 'beginning of file' *)
+ | Eof (* End of file *)
+ | Conditional_begin of entity_id (* in declaration *)
+ | Doctype of entity_id (* as DOCTYPE delimiter *)
+ | Dtd_begin of entity_id (* '[' after DOCTYPE *)
+ | Dtd_end of entity_id (* ']' *)
+ | Decl_element of entity_id (* *)
+ | Lparen of entity_id (* ( in declaration *)
+ | Rparen of entity_id (* ) in declaration *)
+ | RparenPlus of entity_id (* )+ in declaration *)
+ | RparenStar of entity_id (* )* in declaration *)
+ | RparenQmark of entity_id (* )? in declaration *)
+
+ | Tag_beg of (string*entity_id) (* *)
+ | PI_xml of (prolog_token list) (* *)
+ | Cdata of string (* *)
+ | CRef of int (* digits; *)
+ | ERef of string (* &name; *)
+ | PERef of string (* %name; *)
+ | CharData of string (* any characters not otherwise matching *)
+ | LineEnd of string
+ | Name of string (* name *)
+ | Nametoken of string (* nmtoken but not name *)
+ | Attval of string (* attribute value; may contain entity refs *)
+ | Attval_nl_normalized of string
+ | Unparsed_string of string (* "data" or 'data' *)
+
+
+(**********************************************************************)
+(* debugging *)
+
+let string_of_tok tok =
+ match tok with
+ Begin_entity -> "Begin_entity"
+ | End_entity -> "End_entity"
+ | Doctype _ -> "Doctype"
+ | Doctype_rangle _ -> "Doctype_rangle"
+ | Comment_begin -> "Comment_begin"
+ | Comment_end -> "Comment_end"
+ | Comment_material _ -> "Comment_material"
+ | Rangle -> "Rangle"
+ | Rangle_empty -> "Rangle_empty"
+ | Ignore -> "Ignore"
+ | Eq -> "Eq"
+ | Dtd_begin _ -> "Dtd_begin"
+ | Dtd_end _ -> "Dtd_end"
+ | Conditional_begin _ -> "Conditional_begin"
+ | Conditional_body _ -> "Conditional_body"
+ | Conditional_end _ -> "Conditional_end"
+ | Percent -> "Percent"
+ | Lparen _ -> "Lparen"
+ | Rparen _ -> "Rparen"
+ | Plus -> "Plus"
+ | Star -> "Star"
+ | Bar -> "Bar"
+ | Comma -> "Comma"
+ | Qmark -> "Qmark"
+ | Pcdata -> "Pcdata"
+ | Required -> "Required"
+ | Implied -> "Implied"
+ | Fixed -> "Fixed"
+ | Decl_element _ -> "Decl_element"
+ | Decl_attlist _ -> "Decl_attlist"
+ | Decl_entity _ -> "Decl_entity"
+ | Decl_notation _ -> "Decl_notation"
+ | Decl_rangle _ -> "Decl_rangle"
+ | RparenPlus _ -> "RparenPlus"
+ | RparenStar _ -> "RparenStar"
+ | RparenQmark _ -> "RparenQmark"
+ | Bof -> "Bof"
+ | Eof -> "Eof"
+ | PI _ -> "PI"
+ | PI_xml _ -> "PI_xml"
+ | Tag_beg _ -> "Tag_beg"
+ | Tag_end _ -> "Tag_end"
+ | Cdata _ -> "Cdata"
+ | CRef _ -> "CRef"
+ | ERef _ -> "ERef"
+ | PERef _ -> "PERef"
+ | CharData _ -> "CharData"
+ | Name _ -> "Name"
+ | Nametoken _ -> "Nametoken"
+ | Attval _ -> "Attval"
+ | Attval_nl_normalized _ -> "Attval_nl_normalized"
+ | Unparsed_string _ -> "Unparsed_string"
+ | LineEnd _ -> "LineEnd"
+
+
+type lexer_set =
+ { lex_encoding : Pxp_types.rep_encoding;
+ scan_document : Lexing.lexbuf -> (token * lexers);
+ scan_content : Lexing.lexbuf -> (token * lexers);
+ scan_within_tag : Lexing.lexbuf -> (token * lexers);
+ scan_document_type : Lexing.lexbuf -> (token * lexers);
+ scan_declaration : Lexing.lexbuf -> (token * lexers);
+ scan_content_comment : Lexing.lexbuf -> (token * lexers);
+ scan_decl_comment : Lexing.lexbuf -> (token * lexers);
+ scan_document_comment: Lexing.lexbuf -> (token * lexers);
+ scan_ignored_section : Lexing.lexbuf -> (token * lexers);
+ scan_xml_pi : Lexing.lexbuf -> prolog_token;
+ scan_dtd_string : Lexing.lexbuf -> token;
+ scan_content_string : Lexing.lexbuf -> token;
+ scan_name_string : Lexing.lexbuf -> token;
+ scan_only_xml_decl : Lexing.lexbuf -> token;
+ scan_for_crlf : Lexing.lexbuf -> token;
+ }
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/08/18 20:14:31 gerd
+ * Comment -> Comment_begin, Comment_material, Comment_end.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_lexer_types.ml:
+ *
+ * Revision 1.6 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.5 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.4 2000/05/14 17:45:36 gerd
+ * Bugfix.
+ *
+ * Revision 1.3 2000/05/14 17:35:12 gerd
+ * Conditional_begin, _end, and _body have an entity_id.
+ *
+ * Revision 1.2 2000/05/08 21:59:06 gerd
+ * New token Bof (beginning of file).
+ *
+ * Revision 1.1 2000/05/06 23:21:49 gerd
+ * Initial revision.
+ *
+ *
+ * ======================================================================
+ *
+ * DERIVED FROM REVISION 1.4 of markup_lexer_types_shadow.ml
+ *
+ * Revision 1.4 2000/04/30 18:19:04 gerd
+ * Added new tokens.
+ *
+ * Revision 1.3 1999/08/31 19:13:31 gerd
+ * Added checks on proper PE nesting. The idea is that tokens such
+ * as Decl_element and Decl_rangle carry an entity ID with them. This ID
+ * is simply an object of type < >, i.e. you can only test on identity.
+ * The lexer always produces tokens with a dummy ID because it does not
+ * know which entity is the current one. The entity layer replaces the dummy
+ * ID with the actual ID. The parser checks that the IDs of pairs such as
+ * Decl_element and Decl_rangle are the same; otherwise a Validation_error
+ * is produced.
+ *
+ * Revision 1.2 1999/08/10 21:35:08 gerd
+ * The XML/encoding declaration at the beginning of entities is
+ * evaluated. In particular, entities have now a method "xml_declaration"
+ * which returns the name/value pairs of such a declaration. The "encoding"
+ * setting is interpreted by the entity itself; "version", and "standalone"
+ * are interpreted by Markup_yacc.parse_document_entity. Other settings
+ * are ignored (this does not conform to the standard; the standard prescribes
+ * that "version" MUST be given in the declaration of document; "standalone"
+ * and "encoding" CAN be declared; no other settings are allowed).
+ * TODO: The user should be warned if the standard is not exactly
+ * fulfilled. -- The "standalone" property is not checked yet.
+ *
+ * Revision 1.1 1999/08/10 00:35:51 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_lexer_types.mli b/helm/DEVEL/pxp/pxp/pxp_lexer_types.mli
new file mode 100644
index 000000000..9e7c2d8a1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/pxp_lexer_types.mli
@@ -0,0 +1,188 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+type lexers =
+ Document
+ | Document_type
+ | Content
+ | Within_tag
+ | Declaration
+ | Content_comment
+ | Decl_comment
+ | Document_comment
+ | Ignored_section
+
+
+type prolog_token =
+ Pro_name of string
+ | Pro_eq (* "=" *)
+ | Pro_string of string (* "..." or '...' *)
+ | Pro_eof
+
+type entity_id = < >
+ (* The class without properties; but you can still compare if two objects
+ * are the same.
+ *)
+
+type token =
+ | Begin_entity (* Beginning of entity *)
+ | End_entity (* End of entity *)
+ | Comment_begin (* *)
+ | Ignore (* ignored whitespace *)
+ | Eq (* = *)
+ | Rangle (* > as tag delimiter *)
+ | Rangle_empty (* /> as tag delimiter *)
+ | Percent (* % followed by space in declaration *)
+ | Plus (* + in declaration *)
+ | Star (* * in declaration *)
+ | Bar (* | in declaration *)
+ | Comma (* , in declaration *)
+ | Qmark (* ? in declaration *)
+ | Pcdata (* #PCDATA in declaration *)
+ | Required (* #REQUIRED in declaration *)
+ | Implied (* #IMPLIED in declaration *)
+ | Fixed (* #FIXED in declaration *)
+ | Bof (* A marker for 'beginning of file' *)
+ | Eof (* End of file *)
+ | Conditional_begin of entity_id (* in declaration *)
+ | Doctype of entity_id (* as DOCTYPE delimiter *)
+ | Dtd_begin of entity_id (* '[' after DOCTYPE *)
+ | Dtd_end of entity_id (* ']' *)
+ | Decl_element of entity_id (* *)
+ | Lparen of entity_id (* ( in declaration *)
+ | Rparen of entity_id (* ) in declaration *)
+ | RparenPlus of entity_id (* )+ in declaration *)
+ | RparenStar of entity_id (* )* in declaration *)
+ | RparenQmark of entity_id (* )? in declaration *)
+
+ | Tag_beg of (string*entity_id) (* *)
+ | PI_xml of (prolog_token list) (* *)
+ | Cdata of string (* *)
+ | CRef of int (* digits; *)
+ | ERef of string (* &name; *)
+ | PERef of string (* %name; *)
+ | CharData of string (* any characters not otherwise matching *)
+ | LineEnd of string
+ | Name of string (* name *)
+ | Nametoken of string (* nmtoken but not name *)
+ | Attval of string (* attribute value; may contain entity refs *)
+ | Attval_nl_normalized of string
+ | Unparsed_string of string (* "data" or 'data' *)
+
+
+val string_of_tok : token -> string
+
+
+type lexer_set =
+ { lex_encoding : Pxp_types.rep_encoding;
+ scan_document : Lexing.lexbuf -> (token * lexers);
+ scan_content : Lexing.lexbuf -> (token * lexers);
+ scan_within_tag : Lexing.lexbuf -> (token * lexers);
+ scan_document_type : Lexing.lexbuf -> (token * lexers);
+ scan_declaration : Lexing.lexbuf -> (token * lexers);
+ scan_content_comment : Lexing.lexbuf -> (token * lexers);
+ scan_decl_comment : Lexing.lexbuf -> (token * lexers);
+ scan_document_comment: Lexing.lexbuf -> (token * lexers);
+ scan_ignored_section : Lexing.lexbuf -> (token * lexers);
+ scan_xml_pi : Lexing.lexbuf -> prolog_token;
+ scan_dtd_string : Lexing.lexbuf -> token;
+ scan_content_string : Lexing.lexbuf -> token;
+ scan_name_string : Lexing.lexbuf -> token;
+ scan_only_xml_decl : Lexing.lexbuf -> token;
+ scan_for_crlf : Lexing.lexbuf -> token;
+ }
+
+(* lexer_set: Every internal encoding has its own set of lexer functions *)
+
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/08/18 20:14:31 gerd
+ * Comment -> Comment_begin, Comment_material, Comment_end.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_lexer_types.mli:
+ *
+ * Revision 1.5 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.4 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.3 2000/05/14 17:35:12 gerd
+ * Conditional_begin, _end, and _body have an entity_id.
+ *
+ * Revision 1.2 2000/05/08 21:59:17 gerd
+ * New token Bof (beginning of file).
+ *
+ * Revision 1.1 2000/05/06 23:21:49 gerd
+ * Initial revision.
+ *
+ *
+ * ======================================================================
+ *
+ * DERIVED FROM REVISION 1.3 of markup_lexer_types_shadow.mli
+ *
+ * Revision 1.3 1999/08/31 19:13:31 gerd
+ * Added checks on proper PE nesting. The idea is that tokens such
+ * as Decl_element and Decl_rangle carry an entity ID with them. This ID
+ * is simply an object of type < >, i.e. you can only test on identity.
+ * The lexer always produces tokens with a dummy ID because it does not
+ * know which entity is the current one. The entity layer replaces the dummy
+ * ID with the actual ID. The parser checks that the IDs of pairs such as
+ * Decl_element and Decl_rangle are the same; otherwise a Validation_error
+ * is produced.
+ *
+ * Revision 1.2 1999/08/10 21:35:09 gerd
+ * The XML/encoding declaration at the beginning of entities is
+ * evaluated. In particular, entities have now a method "xml_declaration"
+ * which returns the name/value pairs of such a declaration. The "encoding"
+ * setting is interpreted by the entity itself; "version", and "standalone"
+ * are interpreted by Markup_yacc.parse_document_entity. Other settings
+ * are ignored (this does not conform to the standard; the standard prescribes
+ * that "version" MUST be given in the declaration of document; "standalone"
+ * and "encoding" CAN be declared; no other settings are allowed).
+ * TODO: The user should be warned if the standard is not exactly
+ * fulfilled. -- The "standalone" property is not checked yet.
+ *
+ * Revision 1.1 1999/08/10 00:35:51 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_lexers.ml b/helm/DEVEL/pxp/pxp/pxp_lexers.ml
new file mode 100644
index 000000000..ce6e7b3f1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/pxp_lexers.ml
@@ -0,0 +1,90 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+
+open Pxp_types
+open Pxp_lexer_types
+
+let lexer_set_iso88591 =
+ { lex_encoding = `Enc_iso88591;
+ scan_document = Pxp_lex_document_iso88591.scan_document;
+ scan_content = Pxp_lex_content_iso88591.scan_content;
+ scan_within_tag = Pxp_lex_within_tag_iso88591.scan_within_tag;
+ scan_document_type = Pxp_lex_document_type_iso88591.
+ scan_document_type;
+ scan_declaration = Pxp_lex_declaration_iso88591.scan_declaration;
+ scan_content_comment = Pxp_lex_misc_iso88591.scan_content_comment;
+ scan_decl_comment = Pxp_lex_misc_iso88591.scan_decl_comment;
+ scan_document_comment = Pxp_lex_misc_iso88591.scan_document_comment;
+ scan_ignored_section = Pxp_lex_name_string_iso88591.
+ scan_ignored_section;
+ scan_xml_pi = Pxp_lex_misc_iso88591.scan_xml_pi;
+ scan_dtd_string = Pxp_lex_dtd_string_iso88591.scan_dtd_string;
+ scan_content_string = Pxp_lex_content_string_iso88591.
+ scan_content_string;
+ scan_name_string = Pxp_lex_name_string_iso88591.scan_name_string;
+ scan_only_xml_decl = Pxp_lex_misc_iso88591.scan_only_xml_decl;
+ scan_for_crlf = Pxp_lex_misc_iso88591.scan_for_crlf;
+ }
+;;
+
+
+let lexer_set_utf8 = ref None
+;;
+
+
+let init_utf8 ls =
+ lexer_set_utf8 := Some ls
+;;
+
+
+let get_lexer_set enc =
+ match enc with
+ `Enc_iso88591 -> lexer_set_iso88591
+ | `Enc_utf8 ->
+ ( match !lexer_set_utf8 with
+ None ->
+ failwith ("Pxp_lexers: UTF-8 lexers not initialized")
+ | Some ls ->
+ ls
+ )
+ | _ ->
+ failwith ("Pxp_lexers: This type of internal encoding is not supported")
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.4 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * Revision 1.3 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.2 2000/05/23 00:09:44 gerd
+ * The UTF-8 lexer set is no longer initialized here. It is done
+ * in the new module Pxp_utf8. Reason: You can link without UTF-8 support.
+ *
+ * Revision 1.1 2000/05/20 20:30:50 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_lexers.mli b/helm/DEVEL/pxp/pxp/pxp_lexers.mli
new file mode 100644
index 000000000..d8eabf6a6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/pxp_lexers.mli
@@ -0,0 +1,51 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+
+open Pxp_types
+open Pxp_lexer_types
+
+val get_lexer_set : rep_encoding -> lexer_set
+ (* Return the set of lexer functions that is able to handle the passed
+ * encoding.
+ *)
+
+val init_utf8 : lexer_set -> unit
+ (* Internally used. *)
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.4 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * Revision 1.3 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.2 2000/05/23 00:09:44 gerd
+ * The UTF-8 lexer set is no longer initialized here. It is done
+ * in the new module Pxp_utf8. Reason: You can link without UTF-8 support.
+ *
+ * Revision 1.1 2000/05/20 20:30:50 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_reader.ml b/helm/DEVEL/pxp/pxp/pxp_reader.ml
new file mode 100644
index 000000000..83add26d5
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/pxp_reader.ml
@@ -0,0 +1,730 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+open Pxp_types;;
+exception Not_competent;;
+exception Not_resolvable of exn;;
+
+class type resolver =
+ object
+ method init_rep_encoding : rep_encoding -> unit
+ method init_warner : collect_warnings -> unit
+ method rep_encoding : rep_encoding
+ method open_in : ext_id -> Lexing.lexbuf
+ method close_in : unit
+ method close_all : unit
+ method change_encoding : string -> unit
+ method clone : resolver
+ end
+;;
+
+
+class virtual resolve_general
+ =
+ object (self)
+ val mutable internal_encoding = `Enc_utf8
+
+ val mutable encoding = `Enc_utf8
+ val mutable encoding_requested = false
+
+ val mutable warner = new drop_warnings
+
+ val mutable enc_initialized = false
+ val mutable wrn_initialized = false
+
+ val mutable clones = []
+
+ method init_rep_encoding e =
+ internal_encoding <- e;
+ enc_initialized <- true;
+
+ method init_warner w =
+ warner <- w;
+ wrn_initialized <- true;
+
+ method rep_encoding = (internal_encoding :> rep_encoding)
+
+(*
+ method clone =
+ ( {< encoding = `Enc_utf8;
+ encoding_requested = false;
+ >}
+ : # resolver :> resolver )
+*)
+
+ method private warn (k:int) =
+ (* Called if a character not representable has been found.
+ * k is the character code.
+ *)
+ if k < 0xd800 or (k >= 0xe000 & k <= 0xfffd) or
+ (k >= 0x10000 & k <= 0x10ffff) then begin
+ warner # warn ("Code point cannot be represented: " ^ string_of_int k);
+ end
+ else
+ raise (WF_error("Code point " ^ string_of_int k ^
+ " outside the accepted range of code points"))
+
+
+ method private autodetect s =
+ (* s must be at least 4 bytes long. The slot 'encoding' is
+ * set to:
+ * "UTF-16-BE": UTF-16/UCS-2 encoding big endian
+ * "UTF-16-LE": UTF-16/UCS-2 encoding little endian
+ * "UTF-8": UTF-8 encoding
+ *)
+ if String.length s < 4 then
+ encoding <- `Enc_utf8
+ else if String.sub s 0 2 = "\254\255" then
+ encoding <- `Enc_utf16
+ (* Note: Netconversion.recode will detect the big endianess, too *)
+ else if String.sub s 0 2 = "\255\254" then
+ encoding <- `Enc_utf16
+ (* Note: Netconversion.recode will detect the little endianess, too *)
+ else
+ encoding <- `Enc_utf8
+
+
+ method private virtual next_string : string -> int -> int -> int
+ method private virtual init_in : ext_id -> unit
+ method virtual close_in : unit
+
+ method close_all =
+ List.iter (fun r -> r # close_in) clones
+
+ method open_in xid =
+ assert(enc_initialized && wrn_initialized);
+
+ encoding <- `Enc_utf8;
+ encoding_requested <- false;
+ self # init_in xid; (* may raise Not_competent *)
+ (* init_in: may already set 'encoding' *)
+
+ let buffer_max = 512 in
+ let buffer = String.make buffer_max ' ' in
+ let buffer_len = ref 0 in
+ let buffer_end = ref false in
+ let fillup () =
+ if not !buffer_end & !buffer_len < buffer_max then begin
+ let l =
+ self # next_string buffer !buffer_len (buffer_max - !buffer_len) in
+ if l = 0 then
+ buffer_end := true
+ else begin
+ buffer_len := !buffer_len + l
+ end
+ end
+ in
+ let consume n =
+ let l = !buffer_len - n in
+ String.blit buffer n buffer 0 l;
+ buffer_len := l
+ in
+
+ fillup();
+ if not encoding_requested then self # autodetect buffer;
+
+ Lexing.from_function
+ (fun s n ->
+ (* TODO: if encoding = internal_encoding, it is possible to
+ * avoid copying buffer to s because s can be directly used
+ * as buffer.
+ *)
+
+ fillup();
+ if !buffer_len = 0 then
+ 0
+ else begin
+ let m_in = !buffer_len in
+ let m_max = if encoding_requested then n else 1 in
+ let n_in, n_out, encoding' =
+ if encoding = (internal_encoding : rep_encoding :> encoding) &&
+ encoding_requested
+ then begin
+ (* Special case encoding = internal_encoding *)
+ String.blit buffer 0 s 0 m_in;
+ m_in, m_in, encoding
+ end
+ else
+ Netconversion.recode
+ ~in_enc:encoding
+ ~in_buf:buffer
+ ~in_pos:0
+ ~in_len:m_in
+ ~out_enc:(internal_encoding : rep_encoding :> encoding)
+ ~out_buf:s
+ ~out_pos:0
+ ~out_len:n
+ ~max_chars:m_max
+ ~subst:(fun k -> self # warn k; "")
+ in
+ if n_in = 0 then
+ (* An incomplete character at the end of the stream: *)
+ raise Netconversion.Malformed_code;
+ (* failwith "Badly encoded character"; *)
+ encoding <- encoding';
+ consume n_in;
+ assert(n_out <> 0);
+ n_out
+ end)
+
+ method change_encoding enc =
+ if not encoding_requested then begin
+ if enc <> "" then begin
+ match Netconversion.encoding_of_string enc with
+ `Enc_utf16 ->
+ (match encoding with
+ (`Enc_utf16_le | `Enc_utf16_be) -> ()
+ | `Enc_utf16 -> assert false
+ | _ ->
+ raise(WF_error "Encoding of data stream and encoding declaration mismatch")
+ )
+ | e ->
+ encoding <- e
+ end;
+ (* else: the autodetected encoding counts *)
+ encoding_requested <- true;
+ end;
+ end
+;;
+
+
+class resolve_read_any_channel ?(auto_close=true) ~channel_of_id =
+ object (self)
+ inherit resolve_general as super
+
+ val f_open = channel_of_id
+ val mutable current_channel = None
+ val auto_close = auto_close
+
+ method private init_in (id:ext_id) =
+ if current_channel <> None then
+ failwith "Pxp_reader.resolve_read_any_channel # init_in";
+ let ch, enc_opt = f_open id in (* may raise Not_competent *)
+ begin match enc_opt with
+ None -> ()
+ | Some enc -> encoding <- enc; encoding_requested <- true
+ end;
+ current_channel <- Some ch;
+
+ method private next_string s ofs len =
+ match current_channel with
+ None -> failwith "Pxp_reader.resolve_read_any_channel # next_string"
+ | Some ch ->
+ input ch s ofs len
+
+ method close_in =
+ match current_channel with
+ None -> ()
+ | Some ch ->
+ if auto_close then close_in ch;
+ current_channel <- None
+
+ method clone =
+ let c = new resolve_read_any_channel
+ ?auto_close:(Some auto_close) f_open in
+ c # init_rep_encoding internal_encoding;
+ c # init_warner warner;
+ clones <- c :: clones;
+ (c :> resolver)
+
+ end
+;;
+
+
+class resolve_read_this_channel1 is_stale ?id ?fixenc ?auto_close ch =
+
+ let getchannel = ref (fun xid -> assert false) in
+
+ object (self)
+ inherit resolve_read_any_channel
+ ?auto_close:auto_close
+ (fun xid -> !getchannel xid)
+ as super
+
+ val mutable is_stale = is_stale
+ (* The channel can only be read once. To avoid that the channel
+ * is opened several times, the flag 'is_stale' is set after the
+ * first time.
+ *)
+
+ val fixid = id
+ val fixenc = fixenc
+ val fixch = ch
+
+ initializer
+ getchannel := self # getchannel
+
+ method private getchannel xid =
+ begin match fixid with
+ None -> ()
+ | Some bound_xid ->
+ if xid <> bound_xid then raise Not_competent
+ end;
+ ch, fixenc
+
+ method private init_in (id:ext_id) =
+ if is_stale then
+ raise Not_competent
+ else begin
+ super # init_in id;
+ is_stale <- true
+ end
+
+ method close_in =
+ current_channel <- None
+
+ method clone =
+ let c = new resolve_read_this_channel1
+ is_stale
+ ?id:fixid ?fixenc:fixenc ?auto_close:(Some auto_close) fixch
+ in
+ c # init_rep_encoding internal_encoding;
+ c # init_warner warner;
+ clones <- c :: clones;
+ (c :> resolver)
+
+ end
+;;
+
+
+class resolve_read_this_channel =
+ resolve_read_this_channel1 false
+;;
+
+
+class resolve_read_any_string ~string_of_id =
+ object (self)
+ inherit resolve_general as super
+
+ val f_open = string_of_id
+ val mutable current_string = None
+ val mutable current_pos = 0
+
+ method private init_in (id:ext_id) =
+ if current_string <> None then
+ failwith "Pxp_reader.resolve_read_any_string # init_in";
+ let s, enc_opt = f_open id in (* may raise Not_competent *)
+ begin match enc_opt with
+ None -> ()
+ | Some enc -> encoding <- enc; encoding_requested <- true
+ end;
+ current_string <- Some s;
+ current_pos <- 0;
+
+ method private next_string s ofs len =
+ match current_string with
+ None -> failwith "Pxp_reader.resolve_read_any_string # next_string"
+ | Some str ->
+ let l = min len (String.length str - current_pos) in
+ String.blit str current_pos s ofs l;
+ current_pos <- current_pos + l;
+ l
+
+ method close_in =
+ match current_string with
+ None -> ()
+ | Some _ ->
+ current_string <- None
+
+ method clone =
+ let c = new resolve_read_any_string f_open in
+ c # init_rep_encoding internal_encoding;
+ c # init_warner warner;
+ clones <- c :: clones;
+ (c :> resolver)
+ end
+;;
+
+
+class resolve_read_this_string1 is_stale ?id ?fixenc str =
+
+ let getstring = ref (fun xid -> assert false) in
+
+ object (self)
+ inherit resolve_read_any_string (fun xid -> !getstring xid) as super
+
+ val is_stale = is_stale
+ (* For some reasons, it is not allowed to open a clone of the resolver
+ * a second time when the original resolver is already open.
+ *)
+
+ val fixid = id
+ val fixenc = fixenc
+ val fixstr = str
+
+ initializer
+ getstring := self # getstring
+
+ method private getstring xid =
+ begin match fixid with
+ None -> ()
+ | Some bound_xid ->
+ if xid <> bound_xid then raise Not_competent
+ end;
+ fixstr, fixenc
+
+
+ method private init_in (id:ext_id) =
+ if is_stale then
+ raise Not_competent
+ else
+ super # init_in id
+
+ method clone =
+ let c = new resolve_read_this_string1
+ (is_stale or current_string <> None)
+ ?id:fixid ?fixenc:fixenc fixstr
+ in
+ c # init_rep_encoding internal_encoding;
+ c # init_warner warner;
+ clones <- c :: clones;
+ (c :> resolver)
+ end
+;;
+
+
+class resolve_read_this_string =
+ resolve_read_this_string1 false
+;;
+
+
+class resolve_read_url_channel
+ ?(base_url = Neturl.null_url)
+ ?auto_close
+ ~url_of_id
+ ~channel_of_url
+
+ : resolver
+ =
+
+ let getchannel = ref (fun xid -> assert false) in
+
+ object (self)
+ inherit resolve_read_any_channel
+ ?auto_close:auto_close
+ (fun xid -> !getchannel xid)
+ as super
+
+ val base_url = base_url
+ val mutable own_url = Neturl.null_url
+
+ val url_of_id = url_of_id
+ val channel_of_url = channel_of_url
+
+
+ initializer
+ getchannel := self # getchannel
+
+ method private getchannel xid =
+ let rel_url = url_of_id xid in (* may raise Not_competent *)
+
+ try
+ (* Now compute the absolute URL: *)
+ let abs_url = Neturl.apply_relative_url base_url rel_url in
+ (* may raise Malformed_URL *)
+
+ (* Simple check whether 'abs_url' is really absolute: *)
+ if not(Neturl.url_provides ~scheme:true abs_url)
+ then raise Not_competent;
+
+ own_url <- abs_url;
+ (* FIXME: Copy 'abs_url' ? *)
+
+ (* Get and return the channel: *)
+ channel_of_url abs_url (* may raise Not_competent *)
+ with
+ Neturl.Malformed_URL -> raise (Not_resolvable Neturl.Malformed_URL)
+ | Not_competent -> raise (Not_resolvable Not_found)
+
+ method clone =
+ let c =
+ new resolve_read_url_channel
+ ?base_url:(Some own_url)
+ ?auto_close:(Some auto_close)
+ ~url_of_id:url_of_id
+ ~channel_of_url:channel_of_url
+ in
+ c # init_rep_encoding internal_encoding;
+ c # init_warner warner;
+ clones <- c :: clones;
+ (c :> resolve_read_url_channel)
+ end
+;;
+
+
+type spec = [ `Not_recognized | `Allowed | `Required ]
+
+class resolve_as_file
+ ?(file_prefix = (`Allowed :> spec))
+ ?(host_prefix = (`Allowed :> spec))
+ ?(system_encoding = `Enc_utf8)
+ ?url_of_id:passed_url_of_id
+ ?channel_of_url:passed_channel_of_url
+ ()
+ =
+
+ let url_syntax =
+ let enable_if =
+ function
+ `Not_recognized -> Neturl.Url_part_not_recognized
+ | `Allowed -> Neturl.Url_part_allowed
+ | `Required -> Neturl.Url_part_required
+ in
+ { Neturl.null_url_syntax with
+ Neturl.url_enable_scheme = enable_if file_prefix;
+ Neturl.url_enable_host = enable_if host_prefix;
+ Neturl.url_enable_path = Neturl.Url_part_required;
+ Neturl.url_accepts_8bits = true;
+ }
+ in
+
+ let base_url_syntax =
+ { Neturl.null_url_syntax with
+ Neturl.url_enable_scheme = Neturl.Url_part_required;
+ Neturl.url_enable_host = Neturl.Url_part_allowed;
+ Neturl.url_enable_path = Neturl.Url_part_required;
+ Neturl.url_accepts_8bits = true;
+ }
+ in
+
+ let default_base_url =
+ Neturl.make_url
+ ~scheme: "file"
+ ~host: ""
+ ~path: (Neturl.split_path (Sys.getcwd() ^ "/"))
+ base_url_syntax
+ in
+
+ let file_url_of_id xid =
+ let file_url_of_sysname sysname =
+ (* By convention, we can assume that sysname is a URL conforming
+ * to RFC 1738 with the exception that it may contain non-ASCII
+ * UTF-8 characters.
+ *)
+ try
+ Neturl.url_of_string url_syntax sysname
+ (* may raise Malformed_URL *)
+ with
+ Neturl.Malformed_URL -> raise Not_competent
+ in
+ let url =
+ match xid with
+ Anonymous -> raise Not_competent
+ | Public (_,sysname) -> if sysname <> "" then file_url_of_sysname sysname
+ else raise Not_competent
+ | System sysname -> file_url_of_sysname sysname
+ in
+ let scheme =
+ try Neturl.url_scheme url with Not_found -> "file" in
+ let host =
+ try Neturl.url_host url with Not_found -> "" in
+
+ if scheme <> "file" then raise Not_competent;
+ if host <> "" && host <> "localhost" then raise Not_competent;
+
+ url
+ in
+
+ let channel_of_file_url url =
+ try
+ let path_utf8 =
+ try Neturl.join_path (Neturl.url_path ~encoded:false url)
+ with Not_found -> raise Not_competent
+ in
+
+ let path =
+ Netconversion.recode_string
+ ~in_enc: `Enc_utf8
+ ~out_enc: system_encoding
+ path_utf8 in
+ (* May raise Bad_character_stream *)
+
+ open_in_bin path, None
+ (* May raise Sys_error *)
+
+ with
+ | Netconversion.Malformed_code -> assert false
+ (* should not happen *)
+
+ in
+
+ let url_of_id id =
+ match passed_url_of_id with
+ None ->
+ file_url_of_id id
+ | Some f ->
+ begin
+ try f id
+ with
+ Not_competent -> file_url_of_id id
+ end
+ in
+
+ let channel_of_url url =
+ match passed_channel_of_url with
+ None ->
+ channel_of_file_url url
+ | Some f ->
+ begin
+ try f url
+ with
+ Not_competent -> channel_of_file_url url
+ end
+ in
+
+ resolve_read_url_channel
+ ~base_url: default_base_url
+ ~auto_close: true
+ ~url_of_id: url_of_id
+ ~channel_of_url: channel_of_url
+;;
+
+
+class combine ?prefer rl =
+ object (self)
+ val prefered_resolver = prefer
+ val resolvers = (rl : resolver list)
+ val mutable internal_encoding = `Enc_utf8
+ val mutable warner = new drop_warnings
+ val mutable active_resolver = None
+ val mutable clones = []
+
+ method init_rep_encoding enc =
+ List.iter
+ (fun r -> r # init_rep_encoding enc)
+ rl;
+ internal_encoding <- enc
+
+ method init_warner w =
+ List.iter
+ (fun r -> r # init_warner w)
+ rl;
+ warner <- w;
+
+ method rep_encoding = internal_encoding
+ (* CAUTION: This may not be the truth! *)
+
+ method open_in xid =
+ let rec find_competent_resolver rl =
+ match rl with
+ r :: rl' ->
+ begin try
+ r, (r # open_in xid)
+ with
+ Not_competent -> find_competent_resolver rl'
+ end;
+ | [] ->
+ raise Not_competent
+ in
+
+ if active_resolver <> None then failwith "Pxp_reader.combine # open_in";
+ let r, lb =
+ match prefered_resolver with
+ None -> find_competent_resolver resolvers
+ | Some r -> find_competent_resolver (r :: resolvers)
+ in
+ active_resolver <- Some r;
+ lb
+
+ method close_in =
+ match active_resolver with
+ None -> ()
+ | Some r -> r # close_in;
+ active_resolver <- None
+
+ method close_all =
+ List.iter (fun r -> r # close_in) clones
+
+ method change_encoding (enc:string) =
+ match active_resolver with
+ None -> failwith "Pxp_reader.combine # change_encoding"
+ | Some r -> r # change_encoding enc
+
+ method clone =
+ let c =
+ match active_resolver with
+ None ->
+ new combine ?prefer:None (List.map (fun q -> q # clone) resolvers)
+ | Some r ->
+ let r' = r # clone in
+ new combine
+ ?prefer:(Some r')
+ (List.map
+ (fun q -> if q == r then r' else q # clone)
+ resolvers)
+ in
+ c # init_rep_encoding internal_encoding;
+ c # init_warner warner;
+ clones <- c :: clones;
+ c
+ end
+
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.9 2000/08/14 22:24:55 gerd
+ * Moved the module Pxp_encoding to the netstring package under
+ * the new name Netconversion.
+ *
+ * Revision 1.8 2000/07/16 18:31:09 gerd
+ * The exception Illegal_character has been dropped.
+ *
+ * Revision 1.7 2000/07/09 15:32:01 gerd
+ * Fix in resolve_this_channel, resolve_this_string
+ *
+ * Revision 1.6 2000/07/09 01:05:33 gerd
+ * New methode 'close_all' that closes the clones, too.
+ *
+ * Revision 1.5 2000/07/08 16:24:56 gerd
+ * Introduced the exception 'Not_resolvable' to indicate that
+ * 'combine' should not try the next resolver of the list.
+ *
+ * Revision 1.4 2000/07/06 23:04:46 gerd
+ * Quick fix for 'combine': The active resolver is "prefered",
+ * but the other resolvers are also used.
+ *
+ * Revision 1.3 2000/07/06 21:43:45 gerd
+ * Fix: Public(_,name) is now treated as System(name) if
+ * name is non-empty.
+ *
+ * Revision 1.2 2000/07/04 22:13:30 gerd
+ * Implemented the new API rev. 1.2 of pxp_reader.mli.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_reader.ml:
+ *
+ * Revision 1.3 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.2 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.1 2000/03/13 23:41:44 gerd
+ * Initial revision; this code was formerly part of Markup_entity.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_reader.mli b/helm/DEVEL/pxp/pxp/pxp_reader.mli
new file mode 100644
index 000000000..27a3680ec
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/pxp_reader.mli
@@ -0,0 +1,388 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+open Pxp_types;;
+
+exception Not_competent;;
+ (* Raised by the 'open_in' method if the object does not know how to
+ * handle the passed external ID.
+ *)
+
+exception Not_resolvable of exn;;
+ (* Indicates that one resolver was competent, but there was an error
+ * while resolving the external ID. The passed exception explains the
+ * reason.
+ * Not_resolvable(Not_found) serves as indicator for an unknown reason.
+ *)
+
+
+(* The class type 'resolver' is the official type of all "resolvers".
+ * Resolvers take file names (or better, external identifiers) and
+ * return lexbufs, scanning the file for tokens. Resolvers may be
+ * cloned, and clones can interpret relative file names relative to
+ * their creator.
+ *
+ * Example of the latter:
+ *
+ * Resolver r reads from file:/dir/f1.xml
+ *
+ * some XML text
+ * &e; -----> Entity e is bound to "subdir/f2.xml"
+ * Step (1): let r' = "clone of r"
+ * Step (2): open file "subdir/f2.xml"
+ *
+ * r' must still know the directory of the file r is reading, otherwise
+ * it would not be able to resolve "subdir/f2.xml" = "file:/dir/subdir/f2.xml".
+ *
+ * Actually, this example can be coded as:
+ *
+ * let r = new resolve_as_file in
+ * let lbuf = r # open_in "file:/dir/f1.xml" in
+ * ... read from lbuf ...
+ * let r' = r # clone in
+ * let lbuf' = r' # open_in "subdir/f2.xml" in
+ * ... read from lbuf' ...
+ * r' # close_in;
+ * ... read from lbuf ...
+ * r # close_in;
+ *)
+
+class type resolver =
+ object
+ (* A resolver can open an input source, and returns this source as
+ * Lexing.lexbuf.
+ *
+ * After creating a resolver, one must invoke the two methods
+ * init_rep_encoding and init_warner to set the internal encoding of
+ * strings and the warner object, respectively. This is normally
+ * done by the parsing functions in Pxp_yacc.
+ * It is not necessary to invoke these two methods for a fresh
+ * clone.
+ *
+ * It is possible that the character encoding of the source and the
+ * internal encoding of the parser are different. To cope with this,
+ * one of the tasks of the resolver is to recode the characters of
+ * the input source into the internal character encoding.
+ *
+ * Note that there are several ways of determining the encoding of the
+ * input: (1) It is possible that the transport protocol (e.g. HTTP)
+ * transmits the encoding, and (2) it is possible to inspect the beginning
+ * of the file, and to analyze:
+ * (2.1) The first two bytes indicate whether UTF-16 is used
+ * (2.2) Otherwise, one can assume that an ASCII-compatible character
+ * set is used. It is now possible to read the XML declaration
+ * . The encoding found here is
+ * to be used.
+ * (2.3) If the XML declaration is missing, the encoding is UTF-8.
+ * The resolver needs only to distinguish between cases (1), (2.1),
+ * and the rest.
+ * The details of analyzing whether (2.2) or (2.3) applies are programmed
+ * elsewhere, and the resolver will be told the result (see below).
+ *
+ * A resolver is like a file: it must be opened before one can work
+ * with it, and it should be closed after all operations on it have been
+ * done. The method 'open_in' is called with the external ID as argument
+ * and it must return the lexbuf reading from the external resource.
+ * The method 'close_in' does not require an argument.
+ *
+ * It is allowed to re-open a resolver after it has been closed. It is
+ * forbidden to open a resolver again while it is open.
+ * It is allowed to close a resolver several times: If 'close_in' is
+ * invoked while the resolver is already closed, nothing happens.
+ *
+ * The method 'open_in' may raise Not_competent to indicate that this
+ * resolver is not able to open this type of IDs.
+ *
+ * The method 'change_encoding' is called from the parser after the
+ * analysis of case (2) has been done; the argument is either the
+ * string name of the encoding, or the empty string to indicate
+ * that no XML declaration was found. It is guaranteed that
+ * 'change_encoding' is invoked after only a few tokens of the
+ * file. The resolver should react as follows:
+ * - If case (1) applies: Ignore the encoding passed to 'change_encoding'.
+ * - If case (2.1) applies: The encoding passed to 'change_encoding' must
+ * be compatible with UTF-16. This should be
+ * checked, and violations should be reported.
+ * - Else: If the passed encoding is "", assume UTF-8.
+ * Otherwise, assume the passed encoding.
+ *
+ * The following rule helps synchronizing the lexbuf with the encoding:
+ * If the resolver has been opened, but 'change_encoding' has not yet
+ * been invoked, the lexbuf contains at most one character (which may
+ * be represented by multiple bytes); i.e. the lexbuf is created by
+ * Lexing.from_function, and the function puts only one character into
+ * the buffer at once.
+ * After 'change_encoding' has been invoked, there is no longer a limit
+ * on the lexbuf size.
+ *
+ * The reason for this rule is that you know exactly the character where
+ * the encoding changes to the encoding passed by 'change_encoding'.
+ *
+ * The method 'clone' may be invoked for open or closed resolvers.
+ * Basically, 'clone' returns a new resolver which is always closed.
+ * If the original resolver is closed, the clone is simply a clone.
+ * If the original resolver is open at the moment of cloning:
+ * If the clone is later opened for a relative system ID (i.e. relative
+ * URL), the clone must interpret this ID relative to the ID of the
+ * original resolver.
+ *)
+ method init_rep_encoding : rep_encoding -> unit
+ method init_warner : collect_warnings -> unit
+
+ method rep_encoding : rep_encoding
+
+ method open_in : ext_id -> Lexing.lexbuf
+ (* May raise Not_competent if the object does not know how to handle
+ * this ext_id.
+ *)
+ method close_in : unit
+ method change_encoding : string -> unit
+
+
+ (* Every resolver can be cloned. The clone does not inherit the connection
+ * with the external object, i.e. it is initially closed.
+ *)
+ method clone : resolver
+
+ method close_all : unit
+ (* Closes this resolver and every clone *)
+
+ end
+;;
+
+(* Note: resolve_general is no longer exported. In most cases, the classes
+ * resolve_read_any_channel or resolve_read_any_string are applicable, too,
+ * and much easier to configure.
+ *)
+
+
+(* The next classes are resolvers for concrete input sources. *)
+
+class resolve_read_this_channel :
+ ?id:ext_id -> ?fixenc:encoding -> ?auto_close:bool ->
+ in_channel -> resolver;;
+
+ (* Reads from the passed channel (it may be even a pipe). If the ~id
+ * argument is passed to the object, the created resolver accepts only
+ * this ID. Otherwise all IDs are accepted.
+ * Once the resolver has been cloned, it does not accept any ID. This
+ * means that this resolver cannot handle inner references to external
+ * entities. Note that you can combine this resolver with another resolver
+ * that can handle inner references (such as resolve_as_file); see
+ * class 'combine' below.
+ * If you pass the ~fixenc argument, the encoding of the channel is
+ * set to the passed value, regardless of any auto-recognition or
+ * any XML declaration.
+ * If ?auto_close = true (which is the default), the channel is
+ * closed after use. If ?auto_close = false, the channel is left open.
+ *)
+
+
+class resolve_read_any_channel :
+ ?auto_close:bool ->
+ channel_of_id:(ext_id -> (in_channel * encoding option)) ->
+ resolver;;
+
+ (* resolve_read_any_channel f_open:
+ * This resolver calls the function f_open to open a new channel for
+ * the passed ext_id. This function must either return the channel and
+ * the encoding, or it must fail with Not_competent.
+ * The function must return None as encoding if the default mechanism to
+ * recognize the encoding should be used. It must return Some e if it is
+ * already known that the encoding of the channel is e.
+ * If ?auto_close = true (which is the default), the channel is
+ * closed after use. If ?auto_close = false, the channel is left open.
+ *)
+
+
+class resolve_read_url_channel :
+ ?base_url:Neturl.url ->
+ ?auto_close:bool ->
+ url_of_id:(ext_id -> Neturl.url) ->
+ channel_of_url:(Neturl.url -> (in_channel * encoding option)) ->
+ resolver;;
+
+ (* resolve_read_url_channel url_of_id channel_of_url:
+ *
+ * When this resolver gets an ID to read from, it calls the function
+ * ~url_of_id to get the corresponding URL. This URL may be a relative
+ * URL; however, a URL scheme must be used which contains a path.
+ * The resolver converts the URL to an absolute URL if necessary.
+ * The second function, ~channel_of_url, is fed with the absolute URL
+ * as input. This function opens the resource to read from, and returns
+ * the channel and the encoding of the resource.
+ *
+ * Both functions, ~url_of_id and ~channel_of_url, can raise
+ * Not_competent to indicate that the object is not able to read from
+ * the specified resource. However, there is a difference: A Not_competent
+ * from ~url_of_id is left as it is, but a Not_competent from ~channel_of_url
+ * is converted to Not_resolvable. So only ~url_of_id decides which URLs
+ * are accepted by the resolver and which not.
+ *
+ * The function ~channel_of_url must return None as encoding if the default
+ * mechanism to recognize the encoding should be used. It must return
+ * Some e if it is already known that the encoding of the channel is e.
+ *
+ * If ?auto_close = true (which is the default), the channel is
+ * closed after use. If ?auto_close = false, the channel is left open.
+ *
+ * Objects of this class contain a base URL relative to which relative
+ * URLs are interpreted. When creating a new object, you can specify
+ * the base URL by passing it as ~base_url argument. When an existing
+ * object is cloned, the base URL of the clone is the URL of the original
+ * object.
+ *
+ * Note that the term "base URL" has a strict definition in RFC 1808.
+ *)
+
+
+class resolve_read_this_string :
+ ?id:ext_id -> ?fixenc:encoding -> string -> resolver;;
+
+ (* Reads from the passed string. If the ~id
+ * argument is passed to the object, the created resolver accepts only
+ * this ID. Otherwise all IDs are accepted.
+ * Once the resolver has been cloned, it does not accept any ID. This
+ * means that this resolver cannot handle inner references to external
+ * entities. Note that you can combine this resolver with another resolver
+ * that can handle inner references (such as resolve_as_file); see
+ * class 'combine' below.
+ * If you pass the ~fixenc argument, the encoding of the string is
+ * set to the passed value, regardless of any auto-recognition or
+ * any XML declaration.
+ *)
+
+
+class resolve_read_any_string :
+ string_of_id:(ext_id -> (string * encoding option)) -> resolver;;
+
+ (* resolver_read_any_string f_open:
+ * This resolver calls the function f_open to get the string for
+ * the passed ext_id. This function must either return the string and
+ * the encoding, or it must fail with Not_competent.
+ * The function must return None as encoding if the default mechanism to
+ * recognize the encoding should be used. It must return Some e if it is
+ * already known that the encoding of the string is e.
+ *)
+
+
+class resolve_as_file :
+ ?file_prefix:[ `Not_recognized | `Allowed | `Required ] ->
+ ?host_prefix:[ `Not_recognized | `Allowed | `Required ] ->
+ ?system_encoding:encoding ->
+ ?url_of_id:(ext_id -> Neturl.url) ->
+ ?channel_of_url: (Neturl.url -> (in_channel * encoding option)) ->
+ unit ->
+ resolver;;
+
+ (* Reads from the local file system. Every file name is interpreted as
+ * file name of the local file system, and the referred file is read.
+ *
+ * The full form of a file URL is: file://host/path, where
+ * 'host' specifies the host system where the file identified 'path'
+ * resides. host = "" or host = "localhost" are accepted; other values
+ * will raise Not_competent. The standard for file URLs is
+ * defined in RFC 1738.
+ *
+ * Option ~file_prefix: Specifies how the "file:" prefix of file names
+ * is handled:
+ * `Not_recognized: The prefix is not recognized.
+ * `Allowed: The prefix is allowed but not required (the default).
+ * `Required: The prefix is required.
+ *
+ * Option ~host_prefix: Specifies how the "//host" phrase of file names
+ * is handled:
+ * `Not_recognized: The phrase is not recognized.
+ * `Allowed: The phrase is allowed but not required (the default).
+ * `Required: The phrase is required.
+ *
+ * Option ~system_encoding: Specifies the encoding of file names of
+ * the local file system. Default: UTF-8.
+ *
+ * Options ~url_of_id, ~channel_of_url: Not for the end user!
+ *)
+
+
+class combine : ?prefer:resolver -> resolver list -> resolver;;
+
+ (* Combines several resolver objects. If a concrete entity with an
+ * ext_id is to be opened, the combined resolver tries the contained
+ * resolvers in turn until a resolver accepts opening the entity
+ * (i.e. it does not raise Not_competent on open_in).
+ *
+ * Clones: If the 'clone' method is invoked before 'open_in', all contained
+ * resolvers are cloned and again combined. If the 'clone' method is
+ * invoked after 'open_in' (i.e. while the resolver is open), only the
+ * active resolver is cloned.
+ *)
+
+(* EXAMPLES OF RESOLVERS:
+ *
+ * let r1 = new resolve_as_file
+ * - r1 can open all local files
+ *
+ * let r2 = new resolve_read_this_channel
+ * ~id:"file:/dir/f.xml"
+ * (open_in "/dir/f.xml")
+ * - r2 can only read /dir/f.xml of the local file system. If this file
+ * contains references to other files, r2 will fail
+ *
+ * let r3 = new combine [ r2; r1 ]
+ * - r3 reads /dir/f.xml of the local file system by calling r2, and all
+ * other files by calling r1
+ *)
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.5 2000/07/09 01:05:33 gerd
+ * New methode 'close_all' that closes the clones, too.
+ *
+ * Revision 1.4 2000/07/08 16:24:56 gerd
+ * Introduced the exception 'Not_resolvable' to indicate that
+ * 'combine' should not try the next resolver of the list.
+ *
+ * Revision 1.3 2000/07/06 23:04:46 gerd
+ * Quick fix for 'combine': The active resolver is "prefered",
+ * but the other resolvers are also used.
+ *
+ * Revision 1.2 2000/07/04 22:06:49 gerd
+ * MAJOR CHANGE: Complete redesign of the reader classes.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_reader.mli:
+ *
+ * Revision 1.3 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.2 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.1 2000/03/13 23:41:54 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_types.ml b/helm/DEVEL/pxp/pxp/pxp_types.ml
new file mode 100644
index 000000000..e8a8eac97
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/pxp_types.ml
@@ -0,0 +1,212 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+type ext_id =
+ System of string
+ | Public of (string * string)
+ | Anonymous
+
+
+type dtd_id =
+ External of ext_id
+ | Derived of ext_id
+ | Internal
+;;
+
+type content_model_type =
+ Unspecified
+ | Empty
+ | Any
+ | Mixed of mixed_spec list
+ | Regexp of regexp_spec
+
+and mixed_spec =
+ MPCDATA
+ | MChild of string
+
+and regexp_spec =
+ Optional of regexp_spec
+ | Repeated of regexp_spec
+ | Repeated1 of regexp_spec
+ | Alt of regexp_spec list
+ | Seq of regexp_spec list
+ | Child of string
+;;
+
+
+type att_type =
+ A_cdata
+ | A_id
+ | A_idref
+ | A_idrefs
+ | A_entity
+ | A_entities
+ | A_nmtoken
+ | A_nmtokens
+ | A_notation of string list
+ | A_enum of string list
+;;
+
+
+type att_default =
+ D_required
+ | D_implied
+ | D_default of string (* The default value is already expanded *)
+ | D_fixed of string (* The default value is already expanded *)
+;;
+
+
+type att_value =
+ Value of string
+ | Valuelist of string list
+ | Implied_value
+;;
+
+
+class type collect_warnings =
+ object
+ method warn : string -> unit
+ end
+;;
+
+
+class drop_warnings =
+ object
+ method warn (w:string) = ()
+ end
+;;
+
+
+type encoding = Netconversion.encoding;;
+
+type rep_encoding =
+ (* The subset of 'encoding' that may be used for internal representation
+ * of strings.
+ *)
+ [ `Enc_utf8 (* UTF-8 *)
+ | `Enc_iso88591 (* ISO-8859-1 *)
+ ]
+;;
+
+
+exception Validation_error of string
+
+exception WF_error of string
+
+exception Error of string
+
+exception Character_not_supported
+
+exception At of (string * exn)
+
+exception Undeclared
+
+
+let rec string_of_exn x0 =
+ match x0 with
+ At (s, x) ->
+ s ^ string_of_exn x
+ | Validation_error s ->
+ "ERROR (Validity constraint): " ^ s
+ | WF_error s ->
+ "ERROR (Well-formedness constraint): " ^ s
+ | Error s ->
+ "ERROR: " ^ s
+ | Character_not_supported ->
+ "RESTRICTION: Character not supported"
+ | Netconversion.Malformed_code ->
+ "ERROR: Bad character stream"
+ | Undeclared ->
+ "INFORMATION: Undeclared"
+ | Parsing.Parse_error ->
+ "SYNTAX ERROR"
+ | _ ->
+ "Other exception: " ^ Printexc.to_string x0
+;;
+
+
+type output_stream =
+ Out_buffer of Buffer.t
+ | Out_channel of out_channel
+ | Out_function of (string -> int -> int -> unit)
+;;
+
+
+let write os str pos len =
+ match os with
+ Out_buffer b -> Buffer.add_substring b str pos len
+ | Out_channel ch -> output ch str pos len
+ | Out_function f -> f str pos len
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.7 2000/08/14 22:24:55 gerd
+ * Moved the module Pxp_encoding to the netstring package under
+ * the new name Netconversion.
+ *
+ * Revision 1.6 2000/07/27 00:41:15 gerd
+ * new 8 bit codes
+ *
+ * Revision 1.5 2000/07/16 18:31:09 gerd
+ * The exception Illegal_character has been dropped.
+ *
+ * Revision 1.4 2000/07/14 21:25:27 gerd
+ * Simplified the type 'collect_warnings'.
+ *
+ * Revision 1.3 2000/07/08 16:23:50 gerd
+ * Added the exception 'Error'.
+ *
+ * Revision 1.2 2000/07/04 22:14:05 gerd
+ * Implemented the changes of rev. 1.2 of pxp_types.mli.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_types.ml:
+ *
+ * Revision 1.7 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.6 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.5 2000/05/01 20:43:19 gerd
+ * New type output_stream; new function 'write'.
+ *
+ * Revision 1.4 1999/09/01 16:25:35 gerd
+ * Dropped Illegal_token and Content_not_allowed_here. WF_error can
+ * be used instead.
+ *
+ * Revision 1.3 1999/08/15 02:22:33 gerd
+ * Added exception Undeclared.
+ *
+ * Revision 1.2 1999/08/14 22:14:58 gerd
+ * New class "collect_warnings".
+ *
+ * Revision 1.1 1999/08/10 00:35:52 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_types.mli b/helm/DEVEL/pxp/pxp/pxp_types.mli
new file mode 100644
index 000000000..e8b471170
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/pxp_types.mli
@@ -0,0 +1,224 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+
+type ext_id =
+ System of string
+ | Public of (string * string)
+ | Anonymous
+
+ (* external identifiers are either "system identifiers" (filenames or URLs),
+ * or "public identifiers" Public(id,sysid) where "id" is the representation
+ * of the public ID, and "sysid" a fallback system ID, or the empty string.
+ *
+ * New in PXP: Sometimes the external ID is not known. This case can be
+ * referred to as Anonymous ID.
+ *
+ * Encoding: The identifiers are _always_ encoded as UTF8 strings,
+ * regardless of whether another encoding is configured for the parser.
+ * TODO: umsetzen
+ *)
+
+
+type dtd_id =
+ External of ext_id (* DTD is completely external *)
+ | Derived of ext_id (* DTD is derived from an external DTD *)
+ | Internal (* DTD is completely internal *)
+;;
+
+type content_model_type =
+ Unspecified (* A specification of the model has not yet been
+ * found
+ *)
+ | Empty (* Nothing is allowed as content *)
+ | Any (* Everything is allowed as content *)
+ | Mixed of mixed_spec list (* The contents consist of elements and PCDATA
+ * in arbitrary order. What is allowed in
+ * particular is given as mixed_spec.
+ *)
+ | Regexp of regexp_spec (* The contents are elements following this regular
+ * expression
+ *)
+
+and mixed_spec =
+ MPCDATA (* PCDATA children are allowed *)
+ | MChild of string (* This kind of Element is allowed *)
+
+and regexp_spec =
+ Optional of regexp_spec (* subexpression? *)
+ | Repeated of regexp_spec (* subexpression* *)
+ | Repeated1 of regexp_spec (* subexpression+ *)
+ | Alt of regexp_spec list (* subexpr1 | subexpr2 | ... | subexprN *)
+ | Seq of regexp_spec list (* subexpr1 , subexpr2 , ... , subexprN *)
+ | Child of string (* This kind of Element is allowed here *)
+;;
+
+
+type att_type =
+ A_cdata (* CDATA *)
+ | A_id (* ID *)
+ | A_idref (* IDREF *)
+ | A_idrefs (* IDREFS *)
+ | A_entity (* ENTITY *)
+ | A_entities (* ENTiTIES *)
+ | A_nmtoken (* NMTOKEN *)
+ | A_nmtokens (* NMTOKENS *)
+ | A_notation of string list (* NOTATION (name1 | name2 | ... | nameN) *)
+ | A_enum of string list (* (name1 | name2 | ... | nameN) *)
+;;
+
+
+type att_default =
+ D_required (* #REQUIRED *)
+ | D_implied (* #IMPLIED *)
+ | D_default of string (* -- The value is already expanded *)
+ | D_fixed of string (* FIXED -- The value is already expanded *)
+;;
+
+
+type att_value =
+ Value of string (* a single value *)
+ | Valuelist of string list (* a list of values *)
+ | Implied_value (* a value left out *)
+;;
+
+
+class type collect_warnings =
+ object
+ method warn : string -> unit
+ end
+;;
+
+
+class drop_warnings : collect_warnings;;
+
+
+type encoding = Netconversion.encoding;;
+ (* We accept all encodings for character sets which are defined in
+ * Netconversion (package netstring).
+ *)
+
+type rep_encoding =
+ (* The subset of 'encoding' that may be used for internal representation
+ * of strings.
+ * Note: The following encodings are ASCII-compatible! This is an important
+ * property used throghout the whole PXP code.
+ *)
+ [ `Enc_utf8 (* UTF-8 *)
+ | `Enc_iso88591 (* ISO-8859-1 *)
+ ]
+;;
+
+
+exception Validation_error of string
+ (* Violation of a validity constraint *)
+
+exception WF_error of string
+ (* Violation of a well-formedness constraint *)
+
+exception Error of string
+ (* Other error *)
+
+exception Character_not_supported
+
+exception At of (string * exn)
+ (* The string is a description where the exn happened. The exn value can
+ * again be At(_,_) (for example, when an entity within an entity causes
+ * the error).
+ *)
+
+exception Undeclared
+ (* Indicates that declaration is available and because of this every kind
+ * of usage is allowed.
+ *)
+
+val string_of_exn : exn -> string
+ (* Converts a Markup exception into a readable string *)
+
+
+type output_stream =
+ Out_buffer of Buffer.t
+ | Out_channel of out_channel
+ | Out_function of (string -> int -> int -> unit)
+
+val write : output_stream -> string -> int -> int -> unit
+ (* write os s pos len: Writes the string to the buffer/channel/stream *)
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.8 2000/08/14 22:24:55 gerd
+ * Moved the module Pxp_encoding to the netstring package under
+ * the new name Netconversion.
+ *
+ * Revision 1.7 2000/07/27 00:41:15 gerd
+ * new 8 bit codes
+ *
+ * Revision 1.6 2000/07/16 18:31:09 gerd
+ * The exception Illegal_character has been dropped.
+ *
+ * Revision 1.5 2000/07/16 16:34:21 gerd
+ * Updated comments.
+ *
+ * Revision 1.4 2000/07/14 21:25:27 gerd
+ * Simplified the type 'collect_warnings'.
+ *
+ * Revision 1.3 2000/07/08 16:23:50 gerd
+ * Added the exception 'Error'.
+ *
+ * Revision 1.2 2000/07/04 22:08:26 gerd
+ * type ext_id: New variant Anonymous. - The System and Public
+ * variants are now encoded as UTF-8.
+ * collect_warnings is now a class type only. New class
+ * drop_warnings.
+ * New functions encoding_of_string and string_of_encoding.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from Markup_types.mli:
+ *
+ * Revision 1.7 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.6 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.5 2000/05/01 20:43:25 gerd
+ * New type output_stream; new function 'write'.
+ *
+ * Revision 1.4 1999/09/01 16:25:35 gerd
+ * Dropped Illegal_token and Content_not_allowed_here. WF_error can
+ * be used instead.
+ *
+ * Revision 1.3 1999/08/15 02:22:40 gerd
+ * Added exception Undeclared.
+ *
+ * Revision 1.2 1999/08/14 22:15:17 gerd
+ * New class "collect_warnings".
+ *
+ * Revision 1.1 1999/08/10 00:35:52 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_utf8.ml b/helm/DEVEL/pxp/pxp/pxp_utf8.ml
new file mode 100644
index 000000000..f0a946251
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/pxp_utf8.ml
@@ -0,0 +1,48 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_types;;
+open Pxp_lexer_types;;
+
+Pxp_lexers.init_utf8
+ { lex_encoding = `Enc_utf8;
+ scan_document = Pxp_lex_document_utf8.scan_document;
+ scan_content = Pxp_lex_content_utf8.scan_content;
+ scan_within_tag = Pxp_lex_within_tag_utf8.scan_within_tag;
+ scan_document_type = Pxp_lex_document_type_utf8.
+ scan_document_type;
+ scan_declaration = Pxp_lex_declaration_utf8.scan_declaration;
+ scan_content_comment = Pxp_lex_misc_utf8.scan_content_comment;
+ scan_decl_comment = Pxp_lex_misc_utf8.scan_decl_comment;
+ scan_document_comment = Pxp_lex_misc_utf8.scan_document_comment;
+ scan_ignored_section = Pxp_lex_name_string_utf8.scan_ignored_section;
+ scan_xml_pi = Pxp_lex_misc_utf8.scan_xml_pi;
+ scan_dtd_string = Pxp_lex_dtd_string_utf8.scan_dtd_string;
+ scan_content_string = Pxp_lex_content_string_utf8.
+ scan_content_string;
+ scan_name_string = Pxp_lex_name_string_utf8.scan_name_string;
+ scan_only_xml_decl = Pxp_lex_misc_utf8.scan_only_xml_decl;
+ scan_for_crlf = Pxp_lex_misc_utf8.scan_for_crlf;
+ }
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/06/04 20:31:44 gerd
+ * Updated.
+ *
+ * Revision 1.2 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.1 2000/05/23 00:08:48 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_utf8.mli b/helm/DEVEL/pxp/pxp/pxp_utf8.mli
new file mode 100644
index 000000000..42cb033d4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/pxp_utf8.mli
@@ -0,0 +1,22 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* This is a module without interface. Its initialization part sets up
+ * the UTF-8 lexers.
+ * Link with this module if you want to use the UTF-8 lexers!
+ *)
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/05/23 00:08:48 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_yacc.m2y b/helm/DEVEL/pxp/pxp/pxp_yacc.m2y
new file mode 100644
index 000000000..91de7cd2f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/pxp_yacc.m2y
@@ -0,0 +1,2528 @@
+(* $Id$ -*- tuareg -*-
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+open Parsing
+open Pxp_types
+open Pxp_lexer_types
+open Pxp_dtd
+open Pxp_entity
+open Pxp_document
+open Pxp_aux
+
+(* Some types from the interface definition: *)
+
+exception ID_not_unique
+
+class type [ 'ext ] index =
+object
+ constraint 'ext = 'ext node #extension
+ method add : string -> 'ext node -> unit
+ method find : string -> 'ext node
+end
+
+
+type config =
+ { warner : collect_warnings;
+ errors_with_line_numbers : bool;
+ enable_pinstr_nodes : bool;
+ enable_super_root_node : bool;
+ enable_comment_nodes : bool;
+ encoding : rep_encoding;
+ recognize_standalone_declaration : bool;
+ store_element_positions : bool;
+ idref_pass : bool;
+ validate_by_dfa : bool;
+ accept_only_deterministic_models : bool;
+ debugging_mode : bool;
+ }
+
+type source =
+ Entity of ((dtd -> Pxp_entity.entity) * Pxp_reader.resolver)
+ | ExtID of (ext_id * Pxp_reader.resolver)
+
+
+type start_symbol =
+ Ext_document
+ | Ext_declarations
+ | Ext_element
+
+
+type context =
+ { mutable current : unit -> token; (* get the current token *)
+ mutable get_next : unit -> token; (* go on to the next token; return it *)
+ mutable current_token : token; (* This is the current token *)
+ mutable manager : entity_manager; (* The entity manager *)
+ }
+
+
+let make_context entity_manager =
+ let c =
+ { current = (fun _ -> assert false);
+ get_next = (fun _ -> assert false);
+ current_token = Eof;
+ manager = entity_manager;
+ }
+ in
+ (* Note that the function which is stored in get_next_ref can be changed
+ * as a side-effect when an entity is opened or closed. The function in
+ * c.get_next must be programmed such that always the current "get_next"
+ * function is executed.
+ *)
+ let get_next_ref = entity_manager # yy_get_next_ref in
+ c.current <- (fun () -> c.current_token);
+ c.get_next <- (fun () -> let tok = !get_next_ref() in
+ c.current_token <- tok;
+ tok);
+ ignore(c.get_next());
+ c
+;;
+
+
+let from_channel ?system_encoding ?id:init_id ?fixenc ch =
+
+ (* Reading from a channel works by modifying the algorithm of
+ * resolve_as_file.
+ *)
+
+ let url_syntax = (* A syntax suitable for "file" URLs *)
+ { Neturl.null_url_syntax with
+ Neturl.url_enable_scheme = Neturl.Url_part_allowed;
+ Neturl.url_enable_host = Neturl.Url_part_allowed;
+ Neturl.url_enable_path = Neturl.Url_part_required;
+ Neturl.url_accepts_8bits = true;
+ }
+ in
+
+ let an_url =
+ Neturl.make_url
+ ~scheme: "file"
+ ~host: ""
+ ~path: [ "" ]
+ url_syntax
+ in
+
+ let init_channel_done = ref false in
+ (* Whether the first access to this source has already happened. *)
+
+ (* The task of url_of_id is:
+ * - When it is called the first time, and no init_id is present,
+ * the URL file:/// is passed back (an_url). This forces that
+ * absolute path names /path/dir/... will be interpreted as
+ * file path names. (But relative path names will not work.)
+ * - If an init_id has been passed, we can assume that the opened URL
+ * is exactly this init_id. By raising Not_competent it is indicated
+ * that the standard method is to be used for the interpretation of
+ * the URL.
+ * - Otherwise, the channel is already being read, and thus cannot again
+ * opened. (This case is handled in channel_of_url.)
+ *)
+
+ let url_of_id xid =
+ if !init_channel_done then begin
+ (* Use the normal way of determining the URL of the ID: *)
+ raise Pxp_reader.Not_competent
+ end
+ else begin
+ match init_id with
+ None ->
+ an_url
+ (* If the channel is not associated with any URL: Simply pass
+ * the URL file:/// back.
+ *)
+ | Some the_init_id ->
+ assert (the_init_id = xid);
+ raise Pxp_reader.Not_competent
+ (* If the channel is associated with a URL, the corresponding
+ * ID must be passed when the first invocation happens.
+ *)
+ end
+ in
+
+ (* The task of channel_of_url:
+ * - If it is called the first time ("else"), the channel is returned
+ * - Otherwise, the channel is already being read, and thus cannot again
+ * opened. By raising Not_competent it is signaled that the
+ * resolve_as_file object must not continue to open the URL.
+ *)
+
+ let channel_of_url url =
+ if !init_channel_done then
+ raise Pxp_reader.Not_competent
+ else begin
+ init_channel_done := true;
+ ch, fixenc
+ end
+ in
+
+ let r =
+ new Pxp_reader.resolve_as_file
+ ?system_encoding:system_encoding
+ ~url_of_id:url_of_id
+ ~channel_of_url:channel_of_url
+ ()
+ in
+
+ let init_xid =
+ match init_id with
+ None -> Anonymous
+ | Some id ->
+ (* Note: 'id' may be illegal (malformed); in this case, the first
+ * invocation of url_of_id will raise Not_competent, and the 'open_in'
+ * method will fail.
+ *)
+ id
+ in
+
+ ExtID(init_xid, r)
+;;
+
+
+let from_file ?system_encoding utf8_filename =
+
+ let r =
+ new Pxp_reader.resolve_as_file
+ ?system_encoding:system_encoding
+ ()
+ in
+
+ let utf8_abs_filename =
+ if utf8_filename <> "" && utf8_filename.[0] = '/' then
+ utf8_filename
+ else
+ Sys.getcwd() ^ "/" ^ utf8_filename
+ in
+
+ let syntax = { Neturl.ip_url_syntax with Neturl.url_accepts_8bits = true } in
+ let url = Neturl.make_url
+ ~scheme:"file"
+ ~host:"localhost"
+ ~path:(Neturl.split_path utf8_abs_filename)
+ syntax
+ in
+
+ let xid = System (Neturl.string_of_url url) in
+
+
+ ExtID(xid, r)
+;;
+
+
+let from_string ?fixenc s =
+ let r =
+ new Pxp_reader.resolve_read_this_string ?fixenc:fixenc s in
+ ExtID(Anonymous, r)
+;;
+
+
+(**********************************************************************)
+
+class ['ext] parser_object
+ init_doc init_dtd init_extend_dtd init_config init_resolver init_spec
+ init_process_xmldecl transform_dtd id_index
+ =
+ object (self)
+
+ (* Note that the 'ext parameter has been the motivation to make the
+ * parser a class.
+ *)
+
+ val mutable dtd = init_dtd
+ (* The DTD being parsed; or the DTD currently assumed *)
+
+ val extend_dtd = init_extend_dtd
+ (* Whether the DTD should be extended by ELEMENT, ATTLIST, and
+ * NOTATION declarations or not. (True for validating mode,
+ * false for well-formedness mode.)
+ *)
+
+ val transform_dtd = transform_dtd
+ (* A function transforming the DTD *)
+
+ val id_index = (id_index : 'ext index option)
+ (* The ID index or None *)
+
+ val process_xmldecl = init_process_xmldecl
+ (* Whether the XML declaration is parsed and the found XML version
+ * and standalone declaration are passed to 'doc'.
+ *)
+
+ val lexerset = Pxp_lexers.get_lexer_set (init_config.encoding)
+
+ val doc = init_doc
+ (* The current document *)
+
+ method doc = (doc : 'ext document)
+
+ val resolver = init_resolver
+ (* The resolver for external IDs *)
+
+ val config = init_config
+ (* The current configuration *)
+
+ val elstack = (Stack.create() : ('ext node * entity_id) Stack.t)
+ (* The element stack containing all open elements, i.e. elements that
+ * have begun by a start tag but that have not been finished (end tag).
+ * If the parser sees a start tag, it creates the element and pushes it
+ * on top of this stack. If the parser recognizes an end tag, it pulls
+ * one element from the stack and checks if it has the same name as
+ * given with the end tag.
+ *
+ * At initialization time, a special element is pushed on the stack,
+ * the so-called super root. It is always the bottommost
+ * element of the stack, and serves as a guard.
+ * [See "initializer" below.]
+ *)
+
+ method current =
+ (* Get the top element of the element stack *)
+ try
+ fst(Stack.top elstack)
+ with
+ Stack.Empty -> assert false
+ (* Not possible, because the super root is always the element
+ * at the bottom of the stack.
+ *)
+
+ val mutable n_tags_open = 0
+ (* Number of begin tags that have been parsed and whose corresponding
+ * end tags have not yet been parsed
+ *)
+
+ val mutable p_internal_subset = false
+ (* true while parsing the internal subset - there are some additional
+ * constraints for internal subsets, and because of this it must
+ * be known whether the current declaration is contained in the
+ * internal or external subset of the DTD.
+ *)
+
+ val mutable root = None
+ (* Contains the root element (topmost element) while it is being parsed
+ * and after it has been parsed.
+ * This variable is None before the root element is seen.
+ *)
+
+ method root = root
+
+ val spec = init_spec
+ (* A hashtable that contains exemplar objects for the various element
+ * types. If an element is parsed, the exemplar is looked up and
+ * "cloned" (by the "create" method)
+ *)
+
+ val mutable current_data = []
+ (* Collects character data. *)
+
+ method collect_data s =
+ (* Collects the character material 's' *)
+ current_data <- s :: current_data
+
+ method save_data =
+ (* Puts the material collected in 'current_data' into a new
+ * node, and appends this node as new sub node to 'current'
+ *)
+ match current_data with
+ [] ->
+ ()
+ | [ str ] ->
+ if str <> "" then
+ self # current # add_node (create_data_node spec dtd str);
+ current_data <- []
+ | _ ->
+ let count = List.fold_left
+ (fun acc s -> acc + String.length s)
+ 0
+ current_data in
+ let str = String.create count in
+ let pos = ref count in
+ List.iter
+ (fun s ->
+ let l = String.length s in
+ pos := !pos - l;
+ String.blit
+ ~src:s
+ ~src_pos:0
+ ~dst:str
+ ~dst_pos:(!pos)
+ ~len:l
+ )
+ current_data;
+ assert(!pos = 0);
+ if str <> "" then
+ self # current # add_node (create_data_node spec dtd str);
+ current_data <- []
+
+
+ method only_whitespace data =
+ (* Checks that the string "data" contains only whitespace. On failure,
+ * Validation_error is raised.
+ *)
+ let lexbuf = Lexing.from_string data in
+ let t1 = lexerset.scan_name_string lexbuf in
+ if t1 <> Ignore then
+ raise(WF_error("Data not allowed here"));
+ let t2 = lexerset.scan_name_string lexbuf in
+ if t2 <> Eof then
+ raise(WF_error("Data not allowed here"));
+ ()
+
+ initializer
+ (* CHECKS: *)
+ if config.encoding <> dtd # encoding then
+ failwith("Encoding mismatch");
+
+ (* --- Initialize 'elstack': Push the super-root on the stack. *)
+ let super_root =
+ if config.enable_super_root_node then
+ create_super_root_node spec dtd
+ else
+ (* because spec may not contain an exemplar for the super root: *)
+ create_no_node spec dtd
+ in
+ (* Move the super root or the emulation to the stack: *)
+ Stack.push (super_root, (self :> entity_id)) elstack;
+
+
+
+ (********* Here the method "parse" begins. The grammar below is
+ * transformed to a local function of this method
+ *)
+
+ method parse context start_symbol =
+
+ let parse_ignored_section yy_current yy_get_next =
+ (* A special parser which should be used after is found.
+ *)
+
+ while yy_current() = Ignore do
+ ignore(yy_get_next());
+ done;
+
+ ( match yy_current() with
+ Conditional_body _ -> ()
+ | _ -> raise Parsing.Parse_error;
+ );
+
+ let en = context.manager # current_entity in
+ let llev = ref 1 in
+ while !llev >= 1 do
+ let igntok = en # next_ignored_token in
+ (* next_ignored_token: uses a special lexer that only
+ * recognizes Conditional_begin and Conditional_end;
+ * other character combinations are ignored.
+ *)
+ (* NOTE: next_ignored_token works much like yy_get_next,
+ * but it does not set the current token!
+ *)
+ match igntok with
+ Conditional_begin _ ->
+ llev := !llev + 1
+ | Conditional_end _ ->
+ llev := !llev - 1;
+ (* Because the loop may be exited now: *)
+ context.current_token <- igntok;
+ | (End_entity | Eof) ->
+ raise Parsing.Parse_error
+ | _ ->
+ ()
+ done;
+
+ in
+
+
+ let check_and_parse_xmldecl xmldecl =
+ if process_xmldecl then begin
+ let v, _, s = decode_doc_xml_pi (decode_xml_pi xmldecl) in
+ check_version_num v;
+ doc # init_xml_version v;
+ let v = match s with
+ None -> false
+ | Some "yes" -> true
+ | Some "no" -> false
+ | _ -> raise (WF_error("Illegal 'standalone' declaration"))
+ in
+ if config.recognize_standalone_declaration then
+ dtd # set_standalone_declaration v
+ end
+ in
+
+ let recode_utf8 s =
+ (* Recode 's' to UTF-8 *)
+ if config.encoding = `Enc_utf8 then
+ s (* No recoding necessary *)
+ else
+ Netconversion.recode_string
+ ~in_enc:(config.encoding :> encoding) ~out_enc:`Enc_utf8 s
+ in
+
+
+%%
+
+/* The following grammar looks similar to ocamlyacc grammars, but
+ * ocamlyacc is actually not used to transform the grammar into a parser.
+ * Instead, the parser generator m2parsergen is applied.
+ *
+ * The format of the grammar is different (see m2parsergen/README),
+ * but I hope that you can understand most features immediately.
+ *
+ * The type of the parser is different: m2parsergen creates a top-down
+ * parser while ocamlyacc generates a LALR-1 parser.
+ *
+ * The way the generated code is called is different: ocamlyacc produces
+ * lots of top-level definitions whereas m2parsergen generates only
+ * a local let-in-phrase. This is explained in the already mentioned
+ * README file.
+ */
+
+/* See Pxp_types.ml for comments to the various tokens */
+
+%token Begin_entity
+%token End_entity
+%token Comment_begin
+%token Comment_end
+%token Ignore
+%token Eq
+%token Rangle
+%token Rangle_empty
+%token <> Conditional_begin
+%token <> Conditional_body
+%token <> Conditional_end
+%token Percent
+%token Plus
+%token Star
+%token Bar
+%token Comma
+%token Qmark
+%token Pcdata
+%token Required
+%token Implied
+%token Fixed
+%token Eof
+
+%token <> Comment_material
+%token <> Doctype
+%token <> Doctype_rangle
+%token <> Dtd_begin
+%token <> Dtd_end
+%token <> Decl_element
+%token <> Decl_attlist
+%token <> Decl_entity
+%token <> Decl_notation
+%token <> Decl_rangle
+%token <> Lparen
+%token <> Rparen
+%token <> RparenPlus
+%token <> RparenStar
+%token <> RparenQmark
+
+%token <> Tag_beg
+%token <> Tag_end
+
+%token <> PI
+%token <> PI_xml
+%token <> Cdata
+%token <> CRef
+%token <> ERef
+%token <> PERef
+%token <> CharData
+%token <> LineEnd
+%token <> Name
+%token <> Nametoken
+%token <> Attval
+%token <> Attval_nl_normalized
+%token <> Unparsed_string
+
+/* START SYMBOLS:
+ *
+ * "ext_document": parses a complete XML document (i.e. containing a
+ * and an element)
+ * "ext_declarations": parses an "external DTD subset", i.e. a sequence
+ * of declarations
+ * "ext_element": parses a single element (no allowed);
+ * the element needs not to be the root element of the
+ * DTD
+ *
+ * The functions corresponding to these symbols return always () because
+ * they only have side-effects.
+ */
+
+/* SOME GENERAL COMMENTS:
+ *
+ * The parser does not get its tokens from the lexers directly. Instead of
+ * this, there is an entity object between the parser and the lexers. This
+ * object already handles:
+ *
+ * - References to general and parameter entities. The token stream is
+ * modified such that tokens automatically come from the referenced entities.
+ * External parameter entities and all general entities are embraced by
+ * the two special tokens Begin_entity and End_entity. The parser must
+ * check that these braces are correctly nested.
+ */
+
+%%
+
+
+ext_document():
+ Begin_entity
+ doc_xmldecl_then_misc_then_prolog_then_rest() End_entity
+ {{
+ if n_tags_open <> 0 then
+ raise(WF_error("Missing end tag"))
+ }}
+
+
+/* In the following rule, we must find out whether there is an XML declaration
+ * or not, and directly after that either "process_xmldecl" or
+ * "process_missing_xmldecl" of the current entity must be called.
+ * AND IT MUST BE DIRECTLY! Because of this, the invocation is carried out
+ * in the "$" clause immediately following the first token.
+ *
+ * TODO: This is not enough. The first token may be a tag, and the tag
+ * may already contain non-ASCII characters. (But in this case, the resolvers
+ * assume UTF8, and they are right...)
+ */
+
+doc_xmldecl_then_misc_then_prolog_then_rest():
+ pl:PI_xml
+ $ {{ context.manager # current_entity # process_xmldecl pl;
+ check_and_parse_xmldecl pl;
+ }}
+ misc()* doc_prolog_then_rest()
+ {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+ misc() misc()* doc_prolog_then_rest()
+ {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+ doctypedecl() misc()* contents_start()
+ {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+ contents_start()
+ {{ () }}
+
+
+doc_prolog_then_rest():
+ doctypedecl() misc()* contents_start()
+ {{ () }}
+| contents_start()
+ {{ () }}
+
+
+ext_element():
+ Begin_entity el_xmldecl_then_misc_then_rest() End_entity
+ {{
+ if n_tags_open <> 0 then
+ raise(WF_error("Missing end tag"))
+ }}
+
+
+/* See comment for doc_mldecl_then_misc_then_prolog_then_rest. */
+
+el_xmldecl_then_misc_then_rest():
+ pl:PI_xml
+ $ {{ context.manager # current_entity # process_xmldecl pl; }}
+ misc()* contents_start()
+ {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+ misc() misc()* contents_start()
+ {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+ contents_start()
+ {{ () }}
+
+
+ext_declarations():
+ /* Parses a sequence of declarations given by an entity. As side-effect,
+ * the parsed declarations are put into the dtd object.
+ */
+ Begin_entity decl_xmldecl_then_rest()
+ {{ () }}
+| Eof
+ {{ () }}
+
+
+decl_xmldecl_then_rest():
+ /* Note: This rule is also called from declaration()! */
+ pl:PI_xml
+ $ {{ context.manager # current_entity # process_xmldecl pl;
+ }}
+ declaration()* End_entity
+ {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+ declaration() declaration()* End_entity
+ {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+ End_entity
+ {{ () }}
+
+
+misc():
+ pi()
+ {{ () }}
+| data: CharData
+ /* In this context, the lexers sometimes do not recognize white space;
+ * instead CharData tokens containing white space are delivered.
+ */
+ {{ self # only_whitespace data }}
+| Ignore
+ {{ () }}
+| comment()
+ {{ () }}
+
+
+/********************* DOCUMENT TYPE DECLARATION *************************/
+
+doctypedecl():
+ /* parses from . As side-effect, first the declarations of
+ * the internal DTD (if any) are put into !!on_dtd, then the declarations
+ * of the external DTD (if any) are put into this DTD object.
+ */
+ doctype_entid: Doctype
+ ws: Ignore Ignore*
+ doctypedecl_material (doctype_entid)
+ {{ () }}
+ ? {{ match !yy_position with
+ "ws" -> raise(WF_error("Whitespace is missing after `DOCTYPE'"))
+ | _ -> raise(WF_error("Bad DOCTYPE declaration"))
+ }}
+
+
+/* TRICK:
+ * ws: Ignore? Ignore*
+ * is meant seriously. The effect is that ws becomes a boolean variable
+ * which is true if there is an Ignore token and false otherwise.
+ * This construct is faster than just
+ * ws: Ignore*
+ * in which case ws becomes an integer variable containing the number of
+ * Ignore tokens. Counting the number of tokens is slower than only checking
+ * the existence.
+ *
+ * We need the information whether there is an Ignore token (representing
+ * white space), because white space is only obligatory if also an identifier
+ * for the external subset is parsed; this conditional syntax constraint is
+ * simply programmed in the body of the grammar rule.
+ */
+
+doctypedecl_material(doctype_entid):
+ root_name: Name
+ ws: Ignore? Ignore*
+ external_subset: external_id()?
+ Ignore*
+ internal_subset: internal_dtd()?
+ Ignore*
+ doctype_rangle_entid: Doctype_rangle
+ {{
+ if doctype_entid != doctype_rangle_entid then
+ raise (Validation_error("Entities not properly nested with DOCTYPE declaration"));
+ dtd # set_root root_name;
+ begin match external_subset, internal_subset with
+ None, None -> () (* no DTD means no ID *)
+ | None, Some _ -> dtd # set_id Internal
+ | Some id, None -> dtd # set_id (External id)
+ | Some id, Some _ -> dtd # set_id (Derived id)
+ end;
+ (* Get now the external doctype declaration. Note that the internal
+ * subset has precedence and must be read first.
+ *)
+ begin match external_subset with
+ None -> ()
+ | Some id ->
+ if not ws then
+ raise(WF_error("Whitespace is missing after `DOCTYPE " ^
+ root_name ^ "'"));
+ let r' = resolver # clone in
+ let pobj =
+ new parser_object
+ (new document config.warner)
+ dtd
+ extend_dtd
+ config
+ r'
+ spec
+ process_xmldecl
+ (fun x -> x)
+ None
+ in
+ let en = new external_entity r' dtd "[dtd]"
+ config.warner id false config.errors_with_line_numbers
+ config.encoding
+ in
+ en # set_debugging_mode (config.debugging_mode);
+ let mgr = new entity_manager en in
+ en # open_entity true Declaration;
+ try
+ let context = make_context mgr in
+ pobj # parse context Ext_declarations;
+ ignore(en # close_entity);
+ with
+ error ->
+ ignore(en # close_entity);
+ r' # close_all;
+ let pos = mgr # position_string in
+ raise (At(pos, error))
+ end;
+ dtd # validate
+ }}
+ ? {{
+ match !yy_position with
+ "doctype_rangle_entid" -> raise(WF_error("`>' expected"))
+ | _ -> raise(WF_error("Bad DOCTYPE declaration"))
+ }}
+
+/* Note that there are no keywords for SYSTEM or PUBLIC, as these would
+ * be difficult to recognize in the lexical contexts. Because of this,
+ * SYSTEM/PUBLIC is parsed as name, and the rule for everything after
+ * SYSTEM/PUBLIC is computed dynamically.
+ */
+
+external_id():
+ tok:Name
+ $ {{
+ let followup =
+ match tok with
+ "SYSTEM" -> parse_system_id
+ (* Apply the rule system_id (below) to parse the
+ * rest of the ID
+ *)
+ | "PUBLIC" -> parse_public_id
+ (* Apply the rule public_id (below) to parse the
+ * rest of the ID
+ *)
+ | _ -> raise(WF_error("SYSTEM or PUBLIC expected"))
+ in
+ }}
+ ws:Ignore Ignore*
+ r:[followup]()
+ {{ r }}
+ ? {{ match !yy_position with
+ "ws" -> raise(WF_error("Whitespace is missing after " ^ tok))
+ | _ -> raise(WF_error("Bad SYSTEM or PUBLIC identifier"))
+ }}
+
+
+system_id():
+ str:Unparsed_string
+ {{ System (recode_utf8 str) }}
+
+
+public_id():
+ str1: Unparsed_string
+ ws: Ignore Ignore*
+ str2: Unparsed_string
+ {{ check_public_id str1;
+ Public(recode_utf8 str1, recode_utf8 str2)
+ }}
+ ? {{ match !yy_position with
+ "ws" -> raise(WF_error("Whitespace is missing between the literals of the PUBLIC identifier"))
+ | _ -> raise(WF_error("Bad PUBLIC identifier"))
+ }}
+
+
+/* The internal subset: "[" declaration* "]". While parsing the declarations
+ * the object variable p_internal_subset must be true; however, if there
+ * are entity references, this variable must be reset to false during
+ * the entity. (See the rule for "declaration" below.)
+ */
+
+internal_dtd():
+ dtd_begin_entid: internal_dtd_begin()
+ declaration()*
+ dtd_end_entid: internal_dtd_end()
+ {{
+ if dtd_begin_entid != dtd_end_entid then
+ raise(Validation_error("Entities not properly nested with internal DTD subset"))
+ }}
+ ? {{ match !yy_position with
+ "dtd_end_entid" -> raise(WF_error("`]' expected"))
+ | _ -> raise(WF_error("Bad internal DTD subset"))
+ }}
+
+
+internal_dtd_begin():
+ Dtd_begin
+ {{ assert (not p_internal_subset);
+ p_internal_subset <- true }}
+
+
+internal_dtd_end():
+ Dtd_end
+ {{ assert p_internal_subset;
+ p_internal_subset <- false }}
+
+
+declaration():
+ /* Parses a single declaration (or processing instruction). As side-effect
+ * the parsed declaration is stored into the dtd object.
+ */
+ elementdecl()
+ {{ () }}
+| attlistdecl()
+ {{ () }}
+| entid:Decl_entity ws:Ignore Ignore* e:entitydecl(entid)
+ {{ () }}
+ ? {{ match !yy_position with
+ "ws" -> raise(WF_error("Whitespace is missing after ENTITY"))
+ | "e" -> raise(WF_error("Name or `%' expected"))
+ | _ -> raise(WF_error("Bad entity declaration"))
+ }}
+| notationdecl()
+ {{ () }}
+| pi: PI
+ {{ let target, value = pi in
+ let pi = new proc_instruction target value config.encoding in
+ dtd # add_pinstr pi
+ }}
+| Ignore
+ {{ () }}
+| Comment_begin Comment_material* ce:Comment_end
+ {{ () }}
+ ? {{ match !yy_position with
+ "ce" -> raise(WF_error("`-->' expected"))
+ | _ -> raise(WF_error("Bad comment"))
+ }}
+| Begin_entity
+ $ {{ (* Set 'p_internal_subset' to 'false' until the matching 'end_entity'
+ * rule is parsed. This allows unrestricted usage of parameter entities
+ * within declarations of internal entities.
+ *)
+ let old_p_internal_subset = p_internal_subset in
+ p_internal_subset <- false;
+ }}
+ decl_xmldecl_then_rest()
+ {{ (* Restore the old value of 'p_internal_subset'. *)
+ p_internal_subset <- old_p_internal_subset;
+ ()
+ }}
+| begin_entid:Conditional_begin
+ $ {{ (* Check whether conditional sections are allowed at this position. *)
+ if p_internal_subset then
+ raise(WF_error("Restriction of the internal subset: Conditional sections not allowed"));
+ }}
+ Ignore*
+ cond:conditional_section() end_entid:Conditional_end
+ {{ (* Check whether Conditional_begin and Conditional_end are in the same
+ * entity. (This restriction is explained in the file SPECS.)
+ *)
+ if begin_entid != end_entid then
+ raise(Validation_error("The first and the last token of conditional sections must be in the same entity (additional restriction of this parser)"));
+ }}
+ ? {{ match !yy_position with
+ "end_entid" -> raise(WF_error("`>]>' expected"))
+ | "cond" -> raise(WF_error("INCLUDE or IGNORE expected"))
+ | _ -> raise(WF_error("Bad conditional section"))
+ }}
+
+/* The tokens INCLUDE/IGNORE are scanned as names, and the selection of the
+ * right parsing rule is dynamic.
+ * Note that parse_ignored_section is not defined by a grammar rule but
+ * by a conventional let-binding above.
+ */
+
+conditional_section():
+ include_or_ignore:Name
+ $ {{ let parsing_function =
+ match include_or_ignore with
+ "INCLUDE" -> parse_included_section
+ (* invoke rule "included_section" below *)
+ | "IGNORE" -> parse_ignored_section
+ (* invoke function "parse_ignored_section" *)
+ | _ -> raise(WF_error("INCLUDE or IGNORE expected"))
+ in
+ }}
+ [ parsing_function ] ()
+ {{ () }}
+ ? {{ raise(WF_error("Bad conditional section")) }}
+
+included_section():
+ Conditional_body declaration()*
+ {{ () }}
+| Ignore Ignore* Conditional_body declaration()*
+ {{ () }}
+
+
+/*************************** ELEMENT DECLARATIONS ********************/
+
+elementdecl():
+ /* parses . Puts the parsed element type as side-effect into
+ * dtd.
+ */
+ decl_element_entid: Decl_element
+ $ {{ let extdecl = context.manager # current_entity_counts_as_external in
+ }}
+ ws1: Ignore Ignore*
+ name: Name
+ ws2: Ignore Ignore*
+ content_model: contentspec()
+ Ignore*
+ decl_rangle_entid: Decl_rangle
+ {{
+ if decl_element_entid != decl_rangle_entid then
+ raise (Validation_error "Entities not properly nested with ELEMENT declaration");
+ if extend_dtd then begin
+ let el = new dtd_element dtd name in
+ (* It is allowed that an precedes the corresponding
+ * . Because of this it is possible that there is already
+ * an element called 'name' in the DTD, and we only must set the content
+ * model of this element.
+ *)
+ try
+ dtd # add_element el;
+ el # set_cm_and_extdecl content_model extdecl;
+ with
+ Not_found -> (* means: there is already an element 'name' *)
+ let el' = dtd # element name in
+ el' # set_cm_and_extdecl content_model extdecl;
+ (* raises Validation_error if el' already has a content model *)
+ end
+ }}
+ ? {{ match !yy_position with
+ ("ws1"|"ws2") -> raise(WF_error("Whitespace is missing"))
+ | "name" -> raise(WF_error("The name of the element is expected here"))
+ | "content_model" -> raise(WF_error("Content model expression expected"))
+ | "decl_rangle_entid" -> raise(WF_error("`>' expected"))
+ | _ -> raise(WF_error("Bad element type declaration"))
+ }}
+
+contentspec():
+ /* parses a content model and returns it (type content_model_type) */
+ name: Name /* EMPTY or ANY */
+ {{ match name with
+ "EMPTY" -> Empty
+ | "ANY" -> Any
+ | _ -> raise(WF_error("EMPTY, ANY, or a subexpression expected"))
+ }}
+| entid:Lparen Ignore* term:mixed_or_regexp(entid)
+ {{ term }}
+ ? {{ raise(WF_error("Bad content model expression")) }}
+
+
+/* Many of the following rules have an lparen_entid argument. This is the
+ * internal ID of the entity containing the corresponding left parenthesis;
+ * by comparing it with the ID of the entity of the right parenthesis the
+ * contraint is implemented that both parentheses must be in the same entity.
+ */
+
+mixed_or_regexp(lparen_entid):
+ re: choice_or_seq(lparen_entid)
+ {{ Regexp re }}
+| m: mixed(lparen_entid)
+ {{ m }}
+
+
+multiplier():
+ /* returns one of the multiplier symbols (?,*,+) */
+ Plus
+ {{ Plus }}
+| Star
+ {{ Star }}
+| Qmark
+ {{ Qmark }}
+
+
+mixed (lparen_entid) :
+ Pcdata
+ Ignore*
+ material: mixed_alternatives_top()
+ {{
+ let rest, rparen_entid = material in
+ if lparen_entid != rparen_entid then
+ raise (Validation_error "Entities not properly nested with parentheses");
+ Mixed (MPCDATA :: rest)
+ }}
+ ? {{ raise(WF_error("Bad content model expression")) }}
+
+
+mixed_alternatives_top():
+ entid: Rparen
+ {{ [], entid }}
+| entid: RparenStar
+ {{ [], entid }}
+| Bar Ignore* name:Name Ignore* names:mixed_alternative()* entid:RparenStar
+ {{
+ (MChild name :: names), entid
+ }}
+ ? {{ match !yy_position with
+ "name" -> raise(WF_error("Name expected"))
+ | "entid" -> raise(WF_error("`)*' expected"))
+ | _ -> raise(WF_error("Bad content model expression"))
+ }}
+
+
+mixed_alternative() :
+ Bar Ignore* name:Name Ignore*
+ {{ MChild name }}
+ ? {{ match !yy_position with
+ "name" -> raise(WF_error("Name expected"))
+ | _ -> raise(WF_error("Bad content model expression"))
+ }}
+
+
+
+choice_or_seq (lparen_entid):
+ /* parses either a regular expression, or a mixed expression. Returns
+ * Mixed spec or Regexp spec (content_model_type).
+ * Which kind of expression (regexp or mixed) is being read is recognized
+ * after the first subexpression has been parsed; the other subexpressions
+ * must be of the same kind.
+ */
+ re: cp()
+ Ignore*
+ factor: choice_or_seq_factor()
+ {{
+ let (finalmark,subexpr), rparen_entid = factor in
+ if lparen_entid != rparen_entid then
+ raise (Validation_error "Entities not properly nested with parentheses");
+ (* Check that the other subexpressions are "regexp", too, and
+ * merge them with the first.
+ *)
+ let re' =
+ match subexpr with
+ Alt [] -> re
+ | Alt alt -> Alt (re :: alt)
+ | Seq seq -> Seq (re :: seq)
+ | _ -> assert false
+ in
+ (* Interpret the finalmark. *)
+ match finalmark with
+ Ignore -> re'
+ | Plus -> Repeated1 re'
+ | Star -> Repeated re'
+ | Qmark -> Optional re'
+ | _ -> assert false
+ }}
+ ? {{ raise(WF_error("Bad content model expression")) }}
+
+choice_or_seq_factor():
+ /* Parses "||...)" or ",,...)", both forms optionally
+ * followed by ?, *, or +.
+ * Returns ((finalmark, expr), rparen_entid), where
+ * - finalmark is the character after the right parenthesis or Ignore
+ * - expr is either
+ * Alt [] meaning that only ")" has been found
+ * Alt non_empty_list meaning that the subexpressions are separated by '|'
+ * Seq non_empty_list meaning that the subexpressions are separated by ','
+ */
+ entid:Rparen
+ {{ (Ignore, Alt []), entid }}
+| entid:RparenPlus
+ {{ (Plus, Alt []), entid }}
+| entid:RparenStar
+ {{ (Star, Alt []), entid }}
+| entid:RparenQmark
+ {{ (Qmark, Alt []), entid }}
+| Bar Ignore* re:cp() Ignore* factor:choice_or_seq_factor()
+ {{
+ let (finalmark, subexpr), rparen_entid = factor in
+ begin match subexpr with
+ Alt [] -> (finalmark, (Alt [re])), rparen_entid
+ | Alt alt -> (finalmark, (Alt (re :: alt))), rparen_entid
+ | _ -> raise(WF_error("It is not allowed to mix alternatives and sequences"))
+ end
+ }}
+ ? {{ raise(WF_error("Bad content model expression")) }}
+| Comma Ignore* re:cp() Ignore* factor:choice_or_seq_factor()
+ {{
+ let (finalmark, subexpr), rparen_entid = factor in
+ begin match subexpr with
+ Alt [] -> (finalmark, (Seq [re])), rparen_entid
+ | Seq seq -> (finalmark, (Seq (re :: seq))), rparen_entid
+ | _ -> raise(WF_error("It is not allowed to mix alternatives and sequences"))
+ end
+ }}
+ ? {{ raise(WF_error("Bad content model expression")) }}
+
+cp():
+ /* parse either a name, or a parenthesized subexpression "(...)" */
+ name:Name m:multiplier()?
+ {{ match m with
+ None -> Child name
+ | Some Plus -> Repeated1 (Child name)
+ | Some Star -> Repeated (Child name)
+ | Some Qmark -> Optional (Child name)
+ | _ -> assert false
+ }}
+ ? {{ raise(WF_error("Bad content model expression")) }}
+| entid:Lparen Ignore* m:choice_or_seq(entid)
+ {{ m }}
+ ? {{ raise(WF_error("Bad content model expression")) }}
+
+
+/********************* ATTRIBUTE LIST DECLARATION ***********************/
+
+attlistdecl():
+ /* parses . Enters the attribute list in dtd as side-
+ * effect.
+ */
+ decl_attlist_entid: Decl_attlist
+ $ {{ let extdecl = context.manager # current_entity_counts_as_external in
+ }}
+ ws1: Ignore Ignore*
+ el_name: Name
+ ws: Ignore? Ignore*
+ factor: attdef_factor()
+ {{
+ let at_list, decl_rangle_entid = factor in
+
+ if decl_attlist_entid != decl_rangle_entid then
+ raise (Validation_error "Entities not properly nested with ATTLIST declaration");
+
+ if not ws && at_list <> [] then begin
+ match at_list with
+ (name,_,_) :: _ ->
+ (* This is normally impossible, because the lexer demands
+ * some other token between two names.
+ *)
+ raise(WF_error("Whitespace is missing before `" ^ name ^ "'"));
+ | _ -> assert false
+ end;
+
+ if extend_dtd then begin
+ let new_el = new dtd_element dtd el_name in
+ (* Note that it is allowed that precedes the corresponding
+ * declaration. In this case we add the element declaration
+ * already to the DTD but leave the content model unspecified.
+ *)
+ let el =
+ try
+ dtd # add_element new_el;
+ new_el
+ with
+ Not_found -> (* already added *)
+ let old_el = dtd # element el_name in
+ if old_el # attribute_names <> [] then
+ config.warner # warn ("More than one ATTLIST declaration for element type `" ^
+ el_name ^ "'");
+ old_el
+ in
+ List.iter
+ (fun (a_name, a_type, a_default) ->
+ el # add_attribute a_name a_type a_default extdecl)
+ at_list
+ end
+ }}
+ ? {{ match !yy_position with
+ "ws1" -> raise(WF_error("Whitespace is missing after ATTLIST"))
+ | "el_name" -> raise(WF_error("The name of the element is expected here"))
+ | "factor" -> raise(WF_error("Another attribute name or `>' expected"))
+ | _ -> raise(WF_error("Bad attribute declaration"))
+ }}
+
+
+attdef_factor():
+ /* parses a list of triples and returns the
+ * list as (string * att_type * att_default) list.
+ */
+ attdef:attdef() ws:Ignore? Ignore* factor:attdef_factor()
+ {{
+ let attdef_rest, decl_rangle_entid = factor in
+ if not ws && attdef_rest <> [] then begin
+ match attdef_rest with
+ (name,_,_) :: _ ->
+ raise(WF_error("Missing whitespace before `" ^ name ^ "'"));
+ | _ -> assert false
+ end;
+ (attdef :: attdef_rest), decl_rangle_entid }}
+ ? {{ match !yy_position with
+ | "factor" -> raise(WF_error("Another attribute name or `>' expected"))
+ | _ -> raise(WF_error("Bad attribute declaration"))
+ }}
+| entid:Decl_rangle
+ {{ [], entid }}
+
+
+attdef():
+ /* Parses a single triple */
+ name: Name
+ ws1: Ignore Ignore*
+ tp: atttype()
+ ws2: Ignore Ignore*
+ default: defaultdecl()
+ {{ (name,tp,default) }}
+ ? {{ match !yy_position with
+ ("ws1"|"ws2") -> raise(WF_error("Whitespace is missing"))
+ | "tp" -> raise(WF_error("Type of attribute or `(' expected"))
+ | "default" -> raise(WF_error("#REQUIRED, #IMPLIED, #FIXED or a string literal expected"))
+ | _ -> raise(WF_error("Bad attribute declaration"))
+ }}
+
+atttype():
+ /* Parses an attribute type and returns it as att_type. */
+ name: Name
+ $ {{ let followup =
+ if name = "NOTATION" then
+ parse_notation
+ else
+ parse_never
+ in
+ }}
+ nota: [followup]()?
+ {{
+ match name with
+ "CDATA" -> A_cdata
+ | "ID" -> A_id
+ | "IDREF" -> A_idref
+ | "IDREFS" -> A_idrefs
+ | "ENTITY" -> A_entity
+ | "ENTITIES" -> A_entities
+ | "NMTOKEN" -> A_nmtoken
+ | "NMTOKENS" -> A_nmtokens
+ | "NOTATION" ->
+ (match nota with
+ None -> raise(WF_error("Error in NOTATION type (perhaps missing whitespace after NOTATION?)"))
+ | Some n -> n
+ )
+ | _ -> raise(WF_error("One of CDATA, ID, IDREF, IDREFS, ENTITY, ENTITIES, NMTOKEN, NMTOKENS, NOTATION, or a subexpression expected"))
+ }}
+ ? {{ raise(WF_error("Bad attribute declaration (perhaps missing whitespace after NOTATION)")) }}
+
+| Lparen
+ Ignore*
+ name: name_or_nametoken()
+ Ignore*
+ names: nmtoken_factor()*
+ rp: Rparen
+ /* Enumeration */
+ {{ A_enum(name :: names) }}
+ ? {{ match !yy_position with
+ "name" -> raise(WF_error("Name expected"))
+ | "names" -> raise(WF_error("`|' and more names expected, or `)'"))
+ | "rp" -> raise(WF_error("`|' and more names expected, or `)'"))
+ | _ -> raise(WF_error("Bad enumeration type"))
+ }}
+
+
+never():
+ /* The always failing rule */
+ $ {{ raise Not_found; }}
+ Doctype /* questionable */
+ {{ A_cdata (* Does not matter *)
+ }}
+
+
+notation():
+ Ignore Ignore*
+ lp: Lparen
+ Ignore*
+ name: Name
+ Ignore*
+ names: notation_factor()*
+ rp: Rparen
+ {{ A_notation(name :: names) }}
+ ? {{ match !yy_position with
+ "lp" -> raise(WF_error("`(' expected"))
+ | "name" -> raise(WF_error("Name expected"))
+ | "names" -> raise(WF_error("`|' and more names expected, or `)'"))
+ | "rp" -> raise(WF_error("`|' and more names expected, or `)'"))
+ | _ -> raise(WF_error("Bad NOTATION type"))
+ }}
+
+
+notation_factor():
+ /* Parse "|" and return the name */
+ Bar Ignore* name:Name Ignore*
+ {{ name }}
+ ? {{ match !yy_position with
+ "name" -> raise(WF_error("Name expected"))
+ | _ -> raise(WF_error("Bad NOTATION type"))
+ }}
+
+nmtoken_factor():
+ /* Parse "|" and return the nmtoken */
+ Bar Ignore* n:name_or_nametoken() Ignore*
+ {{ n }}
+ ? {{ match !yy_position with
+ "n" -> raise(WF_error("Nametoken expected"))
+ | _ -> raise(WF_error("Bad enumeration type"))
+ }}
+
+
+name_or_nametoken():
+ n:Name {{ n }}
+| n:Nametoken {{ n }}
+
+
+/* The default values must be expanded and normalized. This has been implemented
+ * by the function expand_attvalue.
+ */
+
+
+defaultdecl():
+ /* Parse the default value for an attribute and return it as att_default */
+ Required
+ {{ D_required }}
+| Implied
+ {{ D_implied }}
+| Fixed ws:Ignore Ignore* str:Unparsed_string
+ {{ D_fixed (expand_attvalue lexerset dtd str config.warner false) }}
+ ? {{ match !yy_position with
+ "ws" -> raise(WF_error("Whitespace is missing after #FIXED"))
+ | "str" -> raise(WF_error("String literal expected"))
+ | _ -> raise(WF_error("Bad #FIXED default value"))
+ }}
+| str:Unparsed_string
+ {{ D_default (expand_attvalue lexerset dtd str config.warner false) }}
+
+
+/**************************** ENTITY DECLARATION ***********************/
+
+entitydecl(decl_entity_entid):
+ /* parses everything _after_ . The parsed
+ * entity declaration is entered into the dtd object as side-effect.
+ */
+ name: Name
+ $ {{ let extdecl = context.manager # current_entity_counts_as_external in
+ }}
+ ws: Ignore Ignore*
+ material: entitydef()
+ Ignore*
+ decl_rangle_entid: Decl_rangle
+ /* A general entity */
+ {{
+ if decl_entity_entid != decl_rangle_entid then
+ raise (Validation_error "Entities not properly nested with ENTITY declaration");
+ let en =
+ (* Distinguish between
+ * - internal entities
+ * - external entities
+ * - NDATA (unparsed) entities
+ *)
+ match material with
+ (Some s, None, None) ->
+ new internal_entity dtd name config.warner s p_internal_subset
+ config.errors_with_line_numbers false config.encoding
+ | (None, Some xid, None) ->
+ new external_entity (resolver # clone) dtd name config.warner
+ xid false config.errors_with_line_numbers
+ config.encoding
+
+ | (None, Some xid, Some n) ->
+ (new ndata_entity name xid n config.encoding :> entity)
+ | _ -> assert false
+ in
+ dtd # add_gen_entity en extdecl
+ }}
+ ? {{ match !yy_position with
+ "ws" -> raise(WF_error("Whitespace is missing"))
+ | "material" -> raise(WF_error("String literal or identifier expected"))
+ | "decl_rangle_entid" -> raise(WF_error("`>' expected"))
+ | _ -> raise(WF_error("Bad entity declaration"))
+ }}
+
+| Percent
+ $ {{ let extdecl = context.manager # current_entity_counts_as_external in
+ }}
+ ws1: Ignore Ignore*
+ name: Name
+ ws2: Ignore Ignore*
+ material: pedef()
+ Ignore*
+ decl_rangle_entid: Decl_rangle
+ /* A parameter entity */
+ {{
+ if decl_entity_entid != decl_rangle_entid then
+ raise (Validation_error "Entities not properly nested with ENTITY declaration");
+ let en =
+ (* Distinguish between internal and external entities *)
+ match material with
+ (Some s, None) ->
+ new internal_entity dtd name config.warner s p_internal_subset
+ config.errors_with_line_numbers true config.encoding
+ | (None, Some xid) ->
+ new external_entity (resolver # clone) dtd name config.warner
+ xid true config.errors_with_line_numbers
+ config.encoding
+ | _ -> assert false
+ in
+
+ (* The following two lines force that even internal entities count
+ * as external (for the standalone check) if the declaration of
+ * the internal entity occurs in an external entity.
+ *)
+ if extdecl then
+ en # set_counts_as_external;
+
+ dtd # add_par_entity en;
+ }}
+ ? {{ match !yy_position with
+ ("ws1"|"ws2") -> raise(WF_error("Whitespace is missing"))
+ | "material" -> raise(WF_error("String literal or identifier expected"))
+ | "decl_rangle_entid" -> raise(WF_error("`>' expected"))
+ | _ -> raise(WF_error("Bad entity declaration"))
+ }}
+
+
+entitydef():
+ /* parses the definition value of a general entity. Returns either:
+ * - (Some s, None, None) meaning the definition of an internal entity
+ * with (literal) value s has been found
+ * - (None, Some x, None) meaning that an external parsed entity with
+ * external ID x has been found
+ * - (None, Some x, Some n) meaning that an unparsed entity with
+ * external ID x and notations n has been found
+ */
+ str:Unparsed_string
+ {{ Some str, None, None }}
+| id:external_id() ws:Ignore? Ignore* decl:ndatadecl()?
+ {{ if not ws && decl <> None then
+ raise(WF_error("Whitespace missing before `NDATA'"));
+ None, Some id, decl
+ }}
+
+
+pedef():
+ /* parses the definition value of a parameter entity. Returns either:
+ * - (Some s, None) meaning that the definition of an internal entity
+ * with (literal) value s has been found
+ * - (None, Some x) meaning that an external ID x has been found
+ */
+ str:Unparsed_string
+ {{ Some str, None }}
+| id:external_id()
+ {{ None, Some id }}
+
+
+ndatadecl():
+ /* Parses either NDATA "string" or the empty string; returns Some "string"
+ * in the former, None in the latter case.
+ */
+ ndata:Name ws:Ignore Ignore* name:Name
+ {{ if ndata = "NDATA" then
+ name
+ else
+ raise(WF_error("NDATA expected"))
+ }}
+ ? {{ match !yy_position with
+ "ws" -> raise(WF_error("Whitespace is missing after NDATA"))
+ | "name" -> raise(WF_error("Name expected"))
+ | _ -> raise(WF_error("Bad NDATA declaration"))
+ }}
+
+/**************************** NOTATION DECLARATION *******************/
+
+notationdecl():
+ /* parses and enters the notation declaration into the
+ * dtd object as side-effect
+ */
+ decl_notation_entid: Decl_notation
+ ws1: Ignore Ignore*
+ name: Name
+ ws2: Ignore Ignore*
+ sys_or_public: Name /* SYSTEM or PUBLIC */
+ ws3: Ignore Ignore*
+ str1: Unparsed_string
+ ws: Ignore? Ignore*
+ str2: Unparsed_string?
+ Ignore*
+ decl_rangle_entid: Decl_rangle
+ {{
+ if decl_notation_entid != decl_rangle_entid then
+ raise (Validation_error "Entities not properly nested with NOTATION declaration");
+ let xid =
+ (* Note that it is allowed that PUBLIC is only followed by one
+ * string literal
+ *)
+ match sys_or_public with
+ "SYSTEM" ->
+ if str2 <> None then raise(WF_error("SYSTEM must be followed only by one argument"));
+ System (recode_utf8 str1)
+ | "PUBLIC" ->
+ begin match str2 with
+ None ->
+ check_public_id str1;
+ Public(recode_utf8 str1,"")
+ | Some p ->
+ if not ws then
+ raise(WF_error("Missing whitespace between the string literals of the `PUBLIC' id"));
+ check_public_id str1;
+ Public(recode_utf8 str1, recode_utf8 p)
+ end
+ | _ -> raise(WF_error("PUBLIC or SYSTEM expected"))
+ in
+ if extend_dtd then begin
+ let no = new dtd_notation name xid config.encoding in
+ dtd # add_notation no
+ end
+ }}
+ ? {{ match !yy_position with
+ ("ws1"|"ws2"|"ws3") -> raise(WF_error("Whitespace is missing"))
+ | "name" -> raise(WF_error("Name expected"))
+ | "sys_or_public" -> raise(WF_error("SYSTEM or PUBLIC expected"))
+ | ("str1"|"str2") -> raise(WF_error("String literal expected"))
+ | "decl_rangle_entid" -> raise(WF_error("`>' expected"))
+ | _ -> raise(WF_error("Bad NOTATION declaration"))
+ }}
+
+/****************************** ELEMENTS **************************/
+
+/* In the following rules, the number of error rules is reduced to
+ * improve the performance of the parser.
+ */
+
+
+contents_start():
+ /* parses ... misc*, i.e. exactly one element followed
+ * optionally by white space or processing instructions.
+ * The element is entered into the global variables as follows:
+ * - If elstack is non-empty, the parsed element is added as new child to
+ * the top element of the stack.
+ * - If elstack is empty, the root_examplar object is modified rather than
+ * that a new element is created. If additionally the variable root is
+ * None, it is assigned Some root_examplar.
+ * Note that the modification of the root_exemplar is done by the method
+ * internal_init.
+ * The reason why the root element is modified rather than newly created
+ * is a typing requirement. It must be possible that the class of the root
+ * is derived from the original class element_impl, i.e. the user must be
+ * able to add additional methods. If we created a new root object, we
+ * would have to denote to which class the new object belongs; the root
+ * would always be an 'element_impl' object (and not a derived object).
+ * If we instead cloned an exemplar object and modified it by the
+ * "create" method, the root object would belong to the same class as the
+ * exemplar (good), but the type of the parsing function would always
+ * state that an 'element_impl' was created (because we can pass the new
+ * object only back via a global variable). The only solution is to
+ * modify the object that has been passed to the parsing function directly.
+ */
+ $ {{ dtd <- transform_dtd dtd; }}
+ start_tag() content()*
+ {{ () }}
+
+
+content():
+ /* parses: start tags, end tags, content, or processing
+ * instructions. That the tags are properly nested is dynamically checked.
+ * As result, recognized elements are added to their parent elements,
+ * content is added to the element containing it, and processing instructions
+ * are entered into the element embracing them. (All as side-effects.)
+ */
+ start_tag()
+ {{ () }}
+| end_tag()
+ {{ () }}
+| char_data()
+ {{ () }}
+| cref()
+ {{ () }}
+| pi()
+ {{ () }}
+| entity_ref()
+ {{ () }}
+| comment()
+ {{ () }}
+
+
+entity_ref():
+ Begin_entity eref_xmldecl_then_rest()
+ {{ if n_tags_open = 0 then
+ raise(WF_error("Entity reference not allowed here"))
+ }}
+
+
+/* See comment for doc_mldecl_then_misc_then_prolog_then_rest. */
+
+eref_xmldecl_then_rest():
+ pl:PI_xml
+ $ {{ context.manager # current_entity # process_xmldecl pl;
+ }}
+ content()* End_entity
+ {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+ content() content()* End_entity
+ {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+ End_entity
+ {{ () }}
+
+
+start_tag():
+ /* parses or .
+ *
+ * EFFECT: If elstack is non-empty, the element is added to the
+ * top element of the stack as new child, and the element
+ * is pushed on the stack. If elstack is empty, the root_exemplar is
+ * modified and gets the parsed name and attribute list. The root_exemplar
+ * is pushed on the stack. If additionally the variable root is empty, too,
+ * this variable is initialized.
+ * If the form has been parsed, no element is pushed
+ * on the stack.
+ */
+ tag: Tag_beg
+ $ {{ let position =
+ if config.store_element_positions then
+ Some(context.manager # position)
+ else
+ None
+ in
+ }}
+ ws: Ignore? Ignore*
+ attlist: attribute()*
+ emptiness: start_tag_rangle()
+ /* Note: it is guaranteed that there is whitespace between Tag_beg and
+ * the name of the first attribute, because there must be some separator.
+ * So we need not to check ws!
+ */
+ {{
+ let rec check_attlist al =
+ match al with
+ (nv1, num1) :: al' ->
+ if not num1 && al' <> [] then begin
+ match al with
+ ((n1,_),_) :: ((n2,_),_) :: _ ->
+ raise(WF_error("Whitespace is missing between attributes `" ^
+ n1 ^ "' and `" ^ n2 ^ "'"))
+ | _ -> assert false
+ end;
+ check_attlist al'
+ | [] -> ()
+ in
+ check_attlist attlist;
+
+ let name, tag_beg_entid = tag in
+ let attlist' = List.map (fun (nv,_) -> nv) attlist in
+ let d =
+ create_element_node ?position:position spec dtd name attlist' in
+
+ begin match id_index with
+ None -> ()
+ | Some idx ->
+ (* Put the ID attribute into the index, if present *)
+ begin try
+ let v = d # id_attribute_value in (* may raise Not_found *)
+ idx # add v d (* may raise ID_not_unique *)
+ with
+ Not_found ->
+ (* No ID attribute *)
+ ()
+ | ID_not_unique ->
+ (* There is already an ID with the same value *)
+ raise(Validation_error("ID not unique"))
+ end
+ end;
+
+ if n_tags_open = 0 then begin
+ if root = None then begin
+ (* We have found the begin tag of the root element. *)
+ if config.enable_super_root_node then begin
+ (* The user wants the super root instead of the real root.
+ * The real root element becomes the child of the VR.
+ *)
+ (* Assertion: self # current is the super root *)
+ assert (self # current # node_type = T_super_root);
+ root <- Some (self # current);
+ self # current # add_node d;
+ doc # init_root (self # current);
+ end
+ else begin
+ (* Normal behaviour: The user wants to get the real root. *)
+ root <- Some d;
+ doc # init_root d;
+ end;
+ end
+ else
+ (* We have found a second topmost element. This is illegal. *)
+ raise(WF_error("Document must consist of only one toplevel element"))
+ end
+ else begin
+ (* We have found some inner begin tag. *)
+ self # save_data; (* Save outstanding data material first *)
+ self # current # add_node d
+ end;
+
+ if emptiness then
+ (* An empty tag like . *)
+ d # local_validate ~use_dfa:config.validate_by_dfa ()
+ else begin
+ (* A non-empty tag. *)
+ Stack.push (d, tag_beg_entid) elstack;
+ n_tags_open <- n_tags_open + 1;
+ end;
+ }}
+ ? {{ match !yy_position with
+ "attlist" -> raise(WF_error("Bad attribute list"))
+ | "emptiness" -> raise(WF_error("`>' or `/>' expected"))
+ | _ -> raise(WF_error("Bad start tag"))
+ }}
+
+
+attribute():
+ /* Parses name="value" */
+ n:Name Ignore* Eq Ignore* v:attval() ws:Ignore? Ignore*
+ {{ (n,v), ws }}
+
+
+attval():
+ v:Attval
+ {{ expand_attvalue lexerset dtd v config.warner true }}
+| v:Attval_nl_normalized
+ {{ expand_attvalue lexerset dtd v config.warner false }}
+
+
+start_tag_rangle():
+ Rangle {{ false }}
+| Rangle_empty {{ true }}
+
+
+end_tag():
+ /* parses .
+ * Pops the top element from the elstack and checks if it is the same
+ * element.
+ */
+ tag:Tag_end Ignore* Rangle
+ {{ let name, tag_end_entid = tag in
+ if n_tags_open = 0 then
+ raise(WF_error("End-tag without start-tag"));
+
+ self # save_data; (* Save outstanding data material first *)
+
+ let x, tag_beg_entid = Stack.pop elstack in
+ let x_name =
+ match x # node_type with
+ | T_element n -> n
+ | _ -> assert false
+ in
+ if name <> x_name then
+ raise(WF_error("End-tag does not match start-tag"));
+ if tag_beg_entid != tag_end_entid then
+ raise(WF_error("End-tag not in the same entity as the start-tag"));
+ x # local_validate ~use_dfa:config.validate_by_dfa ();
+
+ n_tags_open <- n_tags_open - 1;
+
+ assert (n_tags_open >= 0);
+
+ }}
+
+char_data():
+ /* Parses any literal characters not otherwise matching, and adds the
+ * characters to the top element of elstack.
+ * If elstack is empty, it is assumed that there is no surrounding
+ * element, and any non-white space character is forbidden.
+ */
+ data:CharData
+ {{
+ if n_tags_open = 0 then
+ (* only white space is allowed *)
+ self # only_whitespace data
+ else
+ self # collect_data data
+ (* We collect the chardata material until the next end tag is
+ * reached. Then the collected material will concatenated and
+ * stored as a single T_data node (see end_tag rule above)
+ * using save_data.
+ *)
+ }}
+| data:Cdata
+ {{
+ if n_tags_open = 0 then
+ raise (WF_error("CDATA section not allowed here"));
+ self # collect_data data
+ (* Also collect CDATA material *)
+ }}
+
+cref():
+ /* Parses ...; and adds the character to the top element of elstack. */
+ code:CRef
+ {{
+ if n_tags_open = 0 then
+ (* No surrounding element: character references are not allowed *)
+ raise(WF_error("Character reference not allowed here"));
+ self # collect_data (character config.encoding config.warner code)
+ (* Also collect character references *)
+ }}
+
+pi():
+ /* Parses ...?> (but not ).
+ * If there is a top element in elstack, the processing instruction is added
+ * to this element.
+ */
+ pi: PI
+ {{
+ let position =
+ if config.store_element_positions then
+ Some(context.manager # position)
+ else
+ None
+ in
+ let target,value = pi in
+
+ if n_tags_open = 0 & not config.enable_super_root_node
+ then
+ doc # add_pinstr (new proc_instruction target value config.encoding)
+ else begin
+ (* Special case: if processing instructions are processed inline,
+ * they are wrapped into T_pinstr nodes.
+ *)
+ if config.enable_pinstr_nodes then begin
+ self # save_data; (* Save outstanding data material first *)
+ let pinstr = new proc_instruction target value config.encoding in
+ let wrapper = create_pinstr_node
+ ?position:position spec dtd pinstr in
+ wrapper # local_validate(); (* succeeds always *)
+ self # current # add_node wrapper;
+ end
+ else
+ (* Normal behaviour: Add the PI to the parent element. *)
+ self # current # add_pinstr
+ (new proc_instruction target value config.encoding)
+ end
+ }}
+
+
+comment():
+ /* Parses
+ */
+ Comment_begin
+ $ {{
+ let position =
+ if config.enable_comment_nodes && config.store_element_positions then
+ Some(context.manager # position)
+ else
+ None
+ in
+ }}
+ mat: Comment_material*
+ ce: Comment_end
+ {{
+ if config.enable_comment_nodes then begin
+ self # save_data; (* Save outstanding data material first *)
+ let comment_text = String.concat "" mat in
+ let wrapper = create_comment_node
+ ?position:position spec dtd comment_text in
+ wrapper # local_validate(); (* succeeds always *)
+ self # current # add_node wrapper;
+ end
+ }}
+ ? {{ match !yy_position with
+ | "ce" -> raise(WF_error("`-->' expected"))
+ | _ -> raise(WF_error("Bad comment"))
+ }}
+
+
+%%
+ (* The method "parse" continues here... *)
+
+ try
+ match start_symbol with
+ Ext_document ->
+ parse_ext_document context.current context.get_next
+ | Ext_declarations ->
+ parse_ext_declarations context.current context.get_next
+ | Ext_element ->
+ parse_ext_element context.current context.get_next
+ with
+ Not_found ->
+ raise Parsing.Parse_error
+
+ (*********** The method "parse" ends here *************)
+
+
+(**********************************************************************)
+
+(* Here ends the class definition: *)
+end
+;;
+
+(**********************************************************************)
+
+open Pxp_reader;;
+
+
+class default_ext =
+ object(self)
+ val mutable node = (None : ('a extension node as 'a) option)
+ method clone = {< >}
+ method node =
+ match node with
+ None ->
+ assert false
+ | Some n -> n
+ method set_node n =
+ node <- Some n
+ end
+;;
+
+
+let default_extension = new default_ext;;
+
+let default_spec =
+ make_spec_from_mapping
+ ~super_root_exemplar: (new element_impl default_extension)
+ ~comment_exemplar: (new element_impl default_extension)
+ ~default_pinstr_exemplar: (new element_impl default_extension)
+ ~data_exemplar: (new data_impl default_extension)
+ ~default_element_exemplar: (new element_impl default_extension)
+ ~element_mapping: (Hashtbl.create 1)
+ ()
+;;
+
+
+let idref_pass id_index root =
+ let error t att value =
+ let name =
+ match t # node_type with
+ T_element name -> name
+ | _ -> assert false
+ in
+ let text =
+ "Attribute `" ^ att ^ "' of element `" ^ name ^
+ "' refers to unknown ID `" ^ value ^ "'" in
+ let pos_ent, pos_line, pos_col = t # position in
+ if pos_line = 0 then
+ raise(Validation_error text)
+ else
+ raise(At("In entity " ^ pos_ent ^ " at line " ^
+ string_of_int pos_line ^ ", position " ^ string_of_int pos_col ^
+ ":\n",
+ Validation_error text))
+ in
+
+ let rec check_tree t =
+ let idref_atts = t # idref_attribute_names in
+ List.iter
+ (fun att ->
+ match t # attribute att with
+ Value s ->
+ begin try ignore(id_index # find s) with
+ Not_found ->
+ error t att s
+ end
+ | Valuelist l ->
+ List.iter
+ (fun s ->
+ try ignore(id_index # find s) with
+ Not_found ->
+ error t att s
+ )
+ l
+ | Implied_value -> ()
+ )
+ idref_atts;
+ List.iter check_tree (t # sub_nodes)
+ in
+ check_tree root
+;;
+
+
+exception Return_DTD of dtd;;
+ (* Used by extract_dtd_from_document_entity to jump out of the parser *)
+
+
+let call_parser ~configuration:cfg
+ ~source:src
+ ~dtd
+ ~extensible_dtd
+ ~document:doc
+ ~specification:spec
+ ~process_xmldecl
+ ~transform_dtd
+ ~(id_index : 'ext #index option)
+ ~use_document_entity
+ ~entry
+ ~init_lexer =
+ let e = cfg.errors_with_line_numbers in
+ let w = cfg.warner in
+ let r, en =
+ match src with
+ Entity(m,r') -> r', m dtd
+ | ExtID(xid,r') -> r',
+ if use_document_entity then
+ new document_entity
+ r' dtd "[toplevel]" w xid e
+ cfg.encoding
+ else
+ new external_entity
+ r' dtd "[toplevel]" w xid false e
+ cfg.encoding
+ in
+ r # init_rep_encoding cfg.encoding;
+ r # init_warner w;
+ en # set_debugging_mode (cfg.debugging_mode);
+ let pobj =
+ new parser_object
+ doc
+ dtd
+ extensible_dtd
+ cfg
+ r
+ spec
+ process_xmldecl
+ transform_dtd
+ (id_index :> 'ext index option)
+ in
+ let mgr = new entity_manager en in
+ en # open_entity true init_lexer;
+ begin try
+ let context = make_context mgr in
+ pobj # parse context entry;
+ ignore(en # close_entity);
+ with
+ Return_DTD d ->
+ ignore(en # close_entity);
+ raise(Return_DTD d)
+ | error ->
+ ignore(en # close_entity);
+ r # close_all;
+ let pos = mgr # position_string in
+ raise (At(pos, error))
+ end;
+ if cfg.idref_pass then begin
+ match id_index with
+ None -> ()
+ | Some idx ->
+ ( match pobj # root with
+ None -> ()
+ | Some root ->
+ idref_pass idx root;
+ )
+ end;
+ pobj
+
+
+let parse_dtd_entity cfg src =
+ (* Parse a DTD given as separate entity. *)
+ let dtd = new dtd cfg.warner cfg.encoding in
+ let doc = new document cfg.warner in
+ let pobj =
+ call_parser
+ ~configuration:cfg
+ ~source:src
+ ~dtd:dtd
+ ~extensible_dtd:true (* Extend the DTD by parsed declarations *)
+ ~document:doc
+ ~specification:default_spec
+ ~process_xmldecl:false (* The XML declaration is ignored
+ * (except 'encoding')
+ *)
+ ~transform_dtd:(fun x -> x) (* Do not transform the DTD *)
+ ~id_index: None
+ ~use_document_entity:false
+ ~entry:Ext_declarations (* Entry point of the grammar *)
+ ~init_lexer:Declaration (* The initially used lexer *)
+ in
+ dtd # validate;
+ if cfg.accept_only_deterministic_models then dtd # only_deterministic_models;
+ dtd
+;;
+
+
+let parse_content_entity ?id_index cfg src dtd spec =
+ (* Parse an element given as separate entity *)
+ dtd # validate; (* ensure that the DTD is valid *)
+ if cfg.accept_only_deterministic_models then dtd # only_deterministic_models;
+ let doc = new document cfg.warner in
+ let pobj =
+ call_parser
+ ~configuration:cfg
+ ~source:src
+ ~dtd:dtd
+ ~extensible_dtd:true (* Extend the DTD by parsed declarations *)
+ ~document:doc
+ ~specification:spec
+ ~process_xmldecl:false (* The XML declaration is ignored
+ * (except 'encoding')
+ *)
+ ~transform_dtd:(fun x -> x) (* Do not transform the DTD *)
+ ~id_index:(id_index :> 'ext index option)
+ ~use_document_entity:false
+ ~entry:Ext_element (* Entry point of the grammar *)
+ ~init_lexer:Content (* The initially used lexer *)
+ in
+ match pobj # root with
+ Some r -> r
+ | None -> raise(WF_error("No root element"))
+;;
+
+
+let parse_wfcontent_entity cfg src spec =
+ let dtd = new dtd cfg.warner cfg.encoding in
+ dtd # allow_arbitrary;
+ let doc = new document cfg.warner in
+ let pobj =
+ call_parser
+ ~configuration:cfg
+ ~source:src
+ ~dtd:dtd
+ ~extensible_dtd:false (* Do not extend the DTD *)
+ ~document:doc
+ ~specification:spec
+ ~process_xmldecl:false (* The XML declaration is ignored
+ * (except 'encoding')
+ *)
+ ~transform_dtd:(fun x -> x) (* Do not transform the DTD *)
+ ~id_index:None
+ ~use_document_entity:false
+ ~entry:Ext_element (* Entry point of the grammar *)
+ ~init_lexer:Content (* The initially used lexer *)
+ in
+ match pobj # root with
+ Some r -> r
+ | None -> raise(WF_error("No root element"))
+;;
+
+
+let iparse_document_entity ?(transform_dtd = (fun x -> x))
+ ?id_index
+ cfg0 src spec p_wf =
+ (* Parse an element given as separate entity *)
+ (* p_wf: 'true' if in well-formedness mode, 'false' if in validating mode *)
+ let cfg = { cfg0 with
+ recognize_standalone_declaration =
+ cfg0.recognize_standalone_declaration && (not p_wf)
+ } in
+ let dtd = new dtd cfg.warner cfg.encoding in
+ if p_wf then
+ dtd # allow_arbitrary;
+ let doc = new document cfg.warner in
+ let pobj =
+ call_parser
+ ~configuration:cfg
+ ~source:src
+ ~dtd:dtd
+ ~extensible_dtd:(not p_wf) (* Extend the DTD by parsed declarations
+ * only if in validating mode
+ *)
+ ~document:doc
+ ~specification:spec
+ ~process_xmldecl:true (* The XML declaration is processed *)
+ (* TODO: change to 'not p_wf' ? *)
+ ~transform_dtd:(fun dtd ->
+ let dtd' = transform_dtd dtd in
+ if cfg.accept_only_deterministic_models then
+ dtd' # only_deterministic_models;
+ dtd')
+
+ ~id_index:(id_index :> 'ext index option)
+ ~use_document_entity:true
+ ~entry:Ext_document (* Entry point of the grammar *)
+ ~init_lexer:Document (* The initially used lexer *)
+ in
+ pobj # doc
+;;
+
+
+let parse_document_entity ?(transform_dtd = (fun x -> x))
+ ?id_index
+ cfg src spec =
+ iparse_document_entity
+ ~transform_dtd:transform_dtd
+ ?id_index:(id_index : 'ext #index option :> 'ext index option)
+ cfg src spec false;;
+
+let parse_wfdocument_entity cfg src spec =
+ iparse_document_entity cfg src spec true;;
+
+let extract_dtd_from_document_entity cfg src =
+ let transform_dtd dtd = raise (Return_DTD dtd) in
+ try
+ let doc = parse_document_entity
+ ~transform_dtd:transform_dtd
+ cfg
+ src
+ default_spec in
+ (* Should not happen: *)
+ doc # dtd
+ with
+ Return_DTD dtd ->
+ (* The normal case: *)
+ dtd
+;;
+
+
+let default_config =
+ let w = new drop_warnings in
+ { warner = w;
+ errors_with_line_numbers = true;
+ enable_pinstr_nodes = false;
+ enable_super_root_node = false;
+ enable_comment_nodes = false;
+ encoding = `Enc_iso88591;
+ recognize_standalone_declaration = true;
+ store_element_positions = true;
+ idref_pass = false;
+ validate_by_dfa = true;
+ accept_only_deterministic_models = true;
+ debugging_mode = false;
+ }
+
+
+class [ 'ext ] hash_index =
+object
+ constraint 'ext = 'ext node #extension
+ val ht = (Hashtbl.create 100 : (string, 'ext node) Hashtbl.t)
+ method add s n =
+ try
+ ignore(Hashtbl.find ht s);
+ raise ID_not_unique
+ with
+ Not_found ->
+ Hashtbl.add ht s n
+
+ method find s = Hashtbl.find ht s
+ method index = ht
+end
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.14 2000/08/26 23:23:14 gerd
+ * Bug: from_file must not interpret the file name as URL path.
+ * Bug: When PI and comment nodes are generated, the collected data
+ * material must be saved first.
+ *
+ * Revision 1.13 2000/08/19 21:30:03 gerd
+ * Improved the error messages of the parser
+ *
+ * Revision 1.12 2000/08/18 20:16:25 gerd
+ * Implemented that Super root nodes, pinstr nodes and comment
+ * nodes are included into the document tree.
+ *
+ * Revision 1.11 2000/08/14 22:24:55 gerd
+ * Moved the module Pxp_encoding to the netstring package under
+ * the new name Netconversion.
+ *
+ * Revision 1.10 2000/07/23 02:16:33 gerd
+ * Support for DFAs.
+ *
+ * Revision 1.9 2000/07/14 13:57:29 gerd
+ * Added the id_index feature.
+ *
+ * Revision 1.8 2000/07/09 17:52:45 gerd
+ * New implementation for current_data.
+ * The position of elements is stored on demand.
+ *
+ * Revision 1.7 2000/07/09 01:00:35 gerd
+ * Improvement: It is now guaranteed that only one data node
+ * is added for consecutive character material.
+ *
+ * Revision 1.6 2000/07/08 16:27:29 gerd
+ * Cleaned up the functions calling the parser.
+ * New parser argument: transform_dtd.
+ * Implementations for 'extract_dtd_from_document_entity' and
+ * 'parse_wfcontent_entity'.
+ *
+ * Revision 1.5 2000/07/06 23:05:18 gerd
+ * Initializations of resolvers were missing.
+ *
+ * Revision 1.4 2000/07/06 22:11:01 gerd
+ * Fix: The creation of the non-virtual root element is protected
+ * in the same way as the virtual root element.
+ *
+ * Revision 1.3 2000/07/04 22:15:18 gerd
+ * Change: Using the new resolver capabilities.
+ * Still incomplete: the new extraction and parsing functions.
+ *
+ * Revision 1.2 2000/06/14 22:19:06 gerd
+ * Added checks such that it is impossible to mix encodings.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_yacc.m2y:
+ *
+ * Revision 1.9 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.8 2000/05/27 19:26:19 gerd
+ * Change: The XML declaration is interpreted right after
+ * it has been parsed (no longer after the document): new function
+ * check_and_parse_xmldecl.
+ * When elements, attributes, and entities are declared
+ * it is stored whether the declaration happens in an external
+ * entity (for the standalone check).
+ * The option recognize_standalone_declaration is interpreted.
+ *
+ * Revision 1.7 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.6 2000/05/14 21:51:24 gerd
+ * Change: Whitespace is handled by the grammar, and no longer
+ * by the entity.
+ *
+ * Revision 1.5 2000/05/14 17:50:54 gerd
+ * Updates because of changes in the token type.
+ *
+ * Revision 1.4 2000/05/11 22:09:17 gerd
+ * Fixed the remaining problems with conditional sections.
+ * This seems to be also a weakness of the XML spec!
+ *
+ * Revision 1.3 2000/05/09 00:02:44 gerd
+ * Conditional sections are now recognized by the parser.
+ * There seem some open questions; see the TODO comments!
+ *
+ * Revision 1.2 2000/05/08 22:01:44 gerd
+ * Introduced entity managers (see markup_entity.ml).
+ * The XML declaration is now recognized by the parser. If such
+ * a declaration is found, the method process_xmldecl of the currently
+ * active entity is called. If the first token is not an XML declaration,
+ * the method process_missing_xmldecl is called instead.
+ * Some minor changes.
+ *
+ * Revision 1.1 2000/05/06 23:21:49 gerd
+ * Initial revision.
+ *
+ *
+ * ======================================================================
+ *
+ * COPIED FROM REVISION 1.19 OF markup_yacc.mly
+ *
+ * Revision 1.19 2000/05/01 15:20:08 gerd
+ * "End tag matches start tag" is checked before "End tag in the
+ * same entity as start tag".
+ *
+ * Revision 1.18 2000/04/30 18:23:08 gerd
+ * Bigger change: Introduced the concept of virtual roots. First,
+ * this reduces the number of checks. Second, it makes it possible to
+ * return the virtual root to the caller instead of the real root (new
+ * config options 'virtual_root' and 'processing_instructions_inline').
+ * Minor changes because of better CR/CRLF handling.
+ *
+ * Revision 1.17 2000/03/13 23:47:46 gerd
+ * Updated because of interface changes. (See markup_yacc_shadow.mli
+ * rev. 1.8)
+ *
+ * Revision 1.16 2000/01/20 20:54:43 gerd
+ * New config.errors_with_line_numbers.
+ *
+ * Revision 1.15 1999/12/17 22:27:58 gerd
+ * Bugfix: The value of 'p_internal_subset' (an instance
+ * variable of the parser object) is to true when the internal subset
+ * begins, and is set to false when this subset ends. The error was
+ * that references to external entities within this subset did not
+ * set 'p_internal_subset' to false; this is now corrected by introducing
+ * the 'p_internal_subset_stack'.
+ * This is a typical example of how the code gets more and
+ * more complicated and that it is very difficult to really understand
+ * what is going on.
+ *
+ * Revision 1.14 1999/11/09 22:23:37 gerd
+ * Removed the invocation of "init_dtd" of the root document.
+ * This method is no longer available. The DTD is also passed to the
+ * document object by the root element, so nothing essential changes.
+ *
+ * Revision 1.13 1999/10/25 23:37:09 gerd
+ * Bugfix: The warning "More than one ATTLIST declaration for element
+ * type ..." is only generated if an ATTLIST is found while there are already
+ * attributes for the element.
+ *
+ * Revision 1.12 1999/09/01 23:08:38 gerd
+ * New frontend function: parse_wf_document. This simply uses
+ * a DTD that allows anything, and by the new parameter "extend_dtd" it is
+ * avoided that element, attlist, and notation declarations are added to this
+ * DTD. The idea is that this function simulates a well-formedness parser.
+ * Tag_beg, Tag_end carry the entity_id. The "elstack" stores the
+ * entity_id of the stacked tag. This was necessary because otherwise there
+ * are some examples to produces incorrectly nested elements.
+ * p_internal_subset is a variable that stores whether the internal
+ * subset is being parsed. This is important beacause entity declarations in
+ * internal subsets are not allowed to contain parameter references.
+ * It is checked if the "elstack" is empty after all has been parsed.
+ * Processing instructions outside DTDs and outside elements are now
+ * added to the document.
+ * The rules of mixed and regexp style content models have been
+ * separated. The code is now much simpler.
+ * Entity references outside elements are detected and rejected.
+ *
+ * Revision 1.11 1999/09/01 16:26:08 gerd
+ * Improved the quality of error messages.
+ *
+ * Revision 1.10 1999/08/31 19:13:31 gerd
+ * Added checks on proper PE nesting. The idea is that tokens such
+ * as Decl_element and Decl_rangle carry an entity ID with them. This ID
+ * is simply an object of type < >, i.e. you can only test on identity.
+ * The lexer always produces tokens with a dummy ID because it does not
+ * know which entity is the current one. The entity layer replaces the dummy
+ * ID with the actual ID. The parser checks that the IDs of pairs such as
+ * Decl_element and Decl_rangle are the same; otherwise a Validation_error
+ * is produced.
+ *
+ * Revision 1.9 1999/08/15 20:42:01 gerd
+ * Corrected a misleading message.
+ *
+ * Revision 1.8 1999/08/15 20:37:34 gerd
+ * Improved error messages.
+ * Bugfix: While parsing document entities, the subclass document_entity is
+ * now used instead of external_entity. The rules in document entities are a bit
+ * stronger.
+ *
+ * Revision 1.7 1999/08/15 14:03:59 gerd
+ * Empty documents are not allowed.
+ * "CDATA section not allowed here" is a WF_error, not a Validation_
+ * error.
+ *
+ * Revision 1.6 1999/08/15 02:24:19 gerd
+ * Removed some grammar rules that were used for testing.
+ * Documents without DTD can now have arbitrary elements (formerly
+ * they were not allowed to have any element).
+ *
+ * Revision 1.5 1999/08/14 22:57:20 gerd
+ * It is allowed that external entities are empty because the
+ * empty string is well-parsed for both declarations and contents. Empty
+ * entities can be referenced anywhere because the references are replaced
+ * by nothing. Because of this, the Begin_entity...End_entity brace is only
+ * inserted if the entity is non-empty. (Otherwise references to empty
+ * entities would not be allowed anywhere.)
+ * As a consequence, the grammar has been changed such that a
+ * single Eof is equivalent to Begin_entity,End_entity without content.
+ *
+ * Revision 1.4 1999/08/14 22:20:01 gerd
+ * The "config" slot has now a component "warner" which is
+ * an object with a "warn" method. This is used to warn about characters
+ * that cannot be represented in the Latin 1 alphabet.
+ * Furthermore, there is a new component "debugging_mode".
+ * Some Parse_error exceptions have been changed into Validation_error.
+ * The interfaces of functions/classes imported from other modules
+ * have changed; the invocations have been adapted.
+ * Contents may contain CDATA sections that have been forgotten.
+ *
+ * Revision 1.3 1999/08/11 15:00:41 gerd
+ * The Begin_entity ... End_entity brace is also possible in
+ * 'contents'.
+ * The configuration passed to the parsing object contains always
+ * the resolver that is actually used.
+ *
+ * Revision 1.2 1999/08/10 21:35:12 gerd
+ * The XML/encoding declaration at the beginning of entities is
+ * evaluated. In particular, entities have now a method "xml_declaration"
+ * which returns the name/value pairs of such a declaration. The "encoding"
+ * setting is interpreted by the entity itself; "version", and "standalone"
+ * are interpreted by Markup_yacc.parse_document_entity. Other settings
+ * are ignored (this does not conform to the standard; the standard prescribes
+ * that "version" MUST be given in the declaration of document; "standalone"
+ * and "encoding" CAN be declared; no other settings are allowed).
+ * TODO: The user should be warned if the standard is not exactly
+ * fulfilled. -- The "standalone" property is not checked yet.
+ *
+ * Revision 1.1 1999/08/10 00:35:52 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_yacc.mli b/helm/DEVEL/pxp/pxp/pxp_yacc.mli
new file mode 100644
index 000000000..cb987a8a6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/pxp_yacc.mli
@@ -0,0 +1,488 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+
+(*$ markup-yacc.mli *)
+
+open Pxp_types
+open Pxp_dtd
+open Pxp_document
+
+exception ID_not_unique
+
+class type [ 'ext ] index =
+object
+ (* The type of indexes over the ID attributes of the elements. This type
+ * is the minimum requirement needed by the parser to create such an index.
+ *)
+ constraint 'ext = 'ext node #extension
+ method add : string -> 'ext node -> unit
+ (* Add the passed node to the index. If there is already an ID with
+ * the passed string value, the exception ID_not_unique should be
+ * raised. (But the index is free also to accept several identical IDs.)
+ *)
+ method find : string -> 'ext node
+ (* Finds the node with the passed ID value, or raises Not_found *)
+end
+;;
+
+
+class [ 'ext ] hash_index :
+object
+ (* This is a simple implementation of 'index' using a hash table. *)
+ constraint 'ext = 'ext node #extension
+ method add : string -> 'ext node -> unit
+ (* See above. *)
+ method find : string -> 'ext node
+ (* See above. *)
+ method index : (string, 'ext node) Hashtbl.t
+ (* Returns the hash table. *)
+end
+;;
+
+
+type config =
+ { warner : collect_warnings;
+ (* An object that collects warnings. *)
+
+ errors_with_line_numbers : bool;
+ (* Whether error messages contain line numbers or not. The parser
+ * is 10 to 20 per cent faster if line numbers are turned off;
+ * you get only byte positions in this case.
+ *)
+
+ enable_pinstr_nodes : bool;
+ (* true: turns a special mode for processing instructions on. Normally,
+ * you cannot determine the exact location of a PI; you only know
+ * in which element the PI occurs. This mode makes it possible
+ * to find the exact location out: Every PI is artificially wrapped
+ * by a special node with type T_pinstr. For example, if the XML text
+ * is , the parser normally produces only an element
+ * object for "a", and puts the PIs "x" and "y" into it (without
+ * order). In this mode, the object "a" will contain two objects
+ * with type T_pinstr, and the first object will contain "x", and the
+ * second "y": the object tree looks like
+ * - Node with type = T_element "a"
+ * - Node with type = T_pinstr "x"
+ * + contains processing instruction "x"
+ * - Node with type = T_pinstr "y"
+ * + contains processing instruction "y"
+ *
+ * Notes:
+ * (1) In past versions of PXP this mode was called
+ * processing_instructions_inline, and it produced nodes of
+ * type T_element "-pi" instead of T_pinstr.
+ * (2) The T_pinstr nodes are created from the pinstr exemplars
+ * in your spec
+ *)
+
+ enable_super_root_node : bool;
+ (* true: the topmost element of the XML tree is not the root element,
+ * but the so-called super root. The root element is a son of the
+ * super root. The super root is a node with type T_super_root.
+ * The following behaviour changes, too:
+ * - PIs occurring outside the root element and outside the DTD are
+ * added to the super root instead of the document object
+ * - If enable_pinstr_nodes is also turned on, the PI wrappers
+ * are added to the super root
+ *
+ * For example, the document
+ * y
+ * is normally represented by:
+ * - document object
+ * + contains PIs x and y
+ * - reference to root node with type = T_element "a"
+ * - node with type = T_data: contains "y"
+ * With enabled super root node:
+ * - document object
+ * - reference to super root node with type = T_super_root
+ * + contains PIs x and y
+ * - root node with type = T_element "a"
+ * - node with type = T_data: contains "y"
+ * If also enable_pinstr_nodes:
+ * - document object
+ * - reference to super root node with type = T_super_root
+ * - node with type = T_pinstr "x"
+ * + contains PI "x"
+ * - root node with type = T_element "a"
+ * - node with type = T_data: contains "y"
+ * - node with type = T_pinstr "y"
+ * + contains PI "y"
+ * Notes:
+ * (1) In previous versions of PXP this mode was called
+ * virtual_root, and it produced an additional node of type
+ * T_element "-vr" instead of T_super_root.
+ * (2) The T_super_root node is created from the super root exemplar
+ * in your spec.
+ *)
+
+ enable_comment_nodes : bool;
+ (* When enabled, comments are represented as nodes with type =
+ * T_comment.
+ * To access the contents of comments, use the method "comment"
+ * for the comment nodes.
+ * These nodes behave like elements; however, they are normally
+ * empty and do not have attributes. Note that it is possible to
+ * add children to comment nodes and to set attributes, but it is
+ * strongly recommended not to do so. There are no checks on
+ * such abnormal use, because they would cost too
+ * much time, even when no comment nodes are generated at all.
+ *
+ * Comment nodes should be disabled unless you must parse a
+ * third-party XML text which uses comments as another data
+ * container.
+ *
+ * The nodes of type T_comment are created from the comment exemplars
+ * in your spec.
+ *)
+
+ encoding : rep_encoding;
+ (* Specifies the encoding used for the *internal* representation
+ * of any character data.
+ * Note that the default is still Enc_iso88591.
+ *)
+
+ recognize_standalone_declaration : bool;
+ (* Whether the "standalone" declaration is recognized or not.
+ * This option does not have an effect on well-formedness parsing:
+ * in this case such declarations are never recognized.
+ *
+ * Recognizing the "standalone" declaration means that the
+ * value of the declaration is scanned and passed to the DTD,
+ * and that the "standalone-check" is performed.
+ *
+ * Standalone-check: If a document is flagged standalone='yes'
+ * some additional constraints apply. The idea is that a parser
+ * without access to any external document subsets can still parse
+ * the document, and will still return the same values as the parser
+ * with such access. For example, if the DTD is external and if
+ * there are attributes with default values, it is checked that there
+ * is no element instance where these attributes are omitted - the
+ * parser would return the default value but this requires access to
+ * the external DTD subset.
+ *)
+
+ store_element_positions : bool;
+ (* Whether the file name, the line and the column of the
+ * beginning of elements are stored in the element nodes.
+ * This option may be useful to generate error messages.
+ *
+ * Positions are only stored for:
+ * - Elements
+ * - Wrapped processing instructions (see enable_pinstr_nodes)
+ * For all other node types, no position is stored.
+ *
+ * You can access positions by the method "position" of nodes.
+ *)
+
+ idref_pass : bool;
+ (* Whether the parser does a second pass and checks that all
+ * IDREF and IDREFS attributes contain valid references.
+ * This option works only if an ID index is available. To create
+ * an ID index, pass an index object as id_index argument to the
+ * parsing functions (such as parse_document_entity; see below).
+ *
+ * "Second pass" does not mean that the XML text is again parsed;
+ * only the existing document tree is traversed, and the check
+ * on bad IDREF/IDREFS attributes is performed for every node.
+ *)
+
+ validate_by_dfa : bool;
+ (* If true, and if DFAs are available for validation, the DFAs will
+ * actually be used for validation.
+ * If false, or if no DFAs are available, the standard backtracking
+ * algorithm will be used.
+ * DFA = deterministic finite automaton.
+ *
+ * DFAs are only available if accept_only_deterministic_models is
+ * "true" (because in this case, it is relatively cheap to construct
+ * the DFAs). DFAs are a data structure which ensures that validation
+ * can always be performed in linear time.
+ *
+ * I strongly recommend using DFAs; however, there are examples
+ * for which validation by backtracking is faster.
+ *)
+
+ accept_only_deterministic_models : bool;
+ (* Whether only deterministic content models are accepted in DTDs. *)
+
+ (* The following options are not implemented, or only for internal
+ * use.
+ *)
+
+ debugging_mode : bool;
+ }
+
+
+type source =
+ Entity of ((dtd -> Pxp_entity.entity) * Pxp_reader.resolver)
+ | ExtID of (ext_id * Pxp_reader.resolver)
+
+val from_channel :
+ ?system_encoding:encoding -> ?id:ext_id -> ?fixenc:encoding ->
+ in_channel -> source
+
+val from_string :
+ ?fixenc:encoding -> string -> source
+
+val from_file :
+ ?system_encoding:encoding -> string -> source
+
+(* Notes on sources (version 2):
+ *
+ * Sources specify where the XML text to parse comes from. Sources not only
+ * represent character streams, but also external IDs (i.e. SYSTEM or PUBLIC
+ * names), and they are interpreted as a specific encoding of characters.
+ * A source should be associated with an external ID, because otherwise
+ * it is not known how to handle relative names.
+ *
+ * There are two primary sources, Entity and ExtID, and several functions
+ * for derived sources. First explanations for the functions:
+ *
+ * from_channel: The XML text is read from an in_channel. By default, the
+ * channel is not associated with an external ID, and it is impossible
+ * to resolve relative SYSTEM IDs found in the document.
+ * If the ?id argument is passed, it is assumed that the channel has this
+ * external ID. If relative SYSTEM IDs occur in the document, they can
+ * be interpreted; however, it is only possible to read from "file:"
+ * IDs.
+ * By default, the channel automatically detects the encoding. You can
+ * set a fixed encoding by passing the ?fixenc argument.
+ *
+ * from_string: The XML text is read from a string.
+ * It is impossible to read from any external entity whose reference is found
+ * in the string.
+ * By default, the encoding of the string is detected automatically. You can
+ * set a fixed encoding by passing the ?fixenc argument.
+ *
+ * from_file: The XML text is read from the file whose file name is
+ * passed to the function (as UTF-8 string).
+ * Relative system IDs can be interpreted by this function.
+ * The ?system_encoding argument specifies the character encoding used
+ * for file names (sic!). By default, UTF-8 is assumed.
+ *
+ * Examples:
+ *
+ * from_file "/tmp/file.xml":
+ * reads from this file, which is assumed to have the ID
+ * SYSTEM "file://localhost/tmp/file.xml".
+ *
+ * let ch = open_in "/tmp/file.xml" in
+ * from_channel ~id:(System "file://localhost/tmp/file.xml") ch
+ * This does the same, but uses a channel.
+ *
+ * from_channel ~id:(System "http://host/file.xml")
+ * ch
+ * reads from the channel ch, and it is assumed that the ID is
+ * SYSTEM "http://host/file.xml". If there is any relative SYSTEM ID,
+ * it will be interpreted relative to this location; however, there is
+ * no way to read via HTTP.
+ * If there is any "file:" SYSTEM ID, it is possible to read the file.
+ *
+ * The primary sources:
+ *
+ * - ExtID(x,r): The identifier x (either the SYSTEM or the PUBLIC name) of the
+ * entity to read from is passed to the resolver, and the resolver finds
+ * the entity and opens it.
+ * The intention of this option is to allow customized
+ * resolvers to interpret external identifiers without any restriction.
+ * The Pxp_reader module contains several classes allowing the user to
+ * compose such a customized resolver from predefined components.
+ *
+ * ExtID is the interface of choice for own extensions to resolvers.
+ *
+ * - Entity(m,r): You can implementy every behaviour by using a customized
+ * entity class. Once the DTD object d is known that will be used during
+ * parsing, the entity e = m d is determined and used together with the
+ * resolver r.
+ * This is only for hackers.
+ *)
+
+
+
+val default_config : config
+ (* - Warnings are thrown away
+ * - Error messages will contain line numbers
+ * - Neither T_super_root nor T_pinstr nor T_comment nodes are generated
+ * - The internal encoding is ISO-8859-1
+ * - The standalone declaration is checked
+ * - Element positions are stored
+ * - The IDREF pass is left out
+ * - If available, DFAs are used for validation
+ * - Only deterministic content models are accepted
+ *)
+
+val default_extension : ('a node extension) as 'a
+ (* A "null" extension; an extension that does not extend the functionality *)
+
+val default_spec : ('a node extension as 'a) spec
+ (* Specifies that you do not want to use extensions. *)
+
+val parse_dtd_entity : config -> source -> dtd
+ (* Parse an entity containing a DTD (external subset), and return this DTD. *)
+
+val extract_dtd_from_document_entity : config -> source -> dtd
+ (* Parses a closed document, i.e. a document beginning with ,
+ * and returns the DTD contained in the document.
+ * The parts of the document outside the DTD are actually not parsed,
+ * i.e. parsing stops when all declarations of the DTD have been read.
+ *)
+
+val parse_document_entity :
+ ?transform_dtd:(dtd -> dtd) ->
+ ?id_index:('ext index) ->
+ config -> source -> 'ext spec -> 'ext document
+ (* Parse a closed document, i.e. a document beginning with ,
+ * and validate the contents of the document against the DTD contained
+ * and/or referenced in the document.
+ *
+ * If the optional argument ~transform_dtd is passed, the following
+ * modification applies: After the DTD (both the internal and external
+ * subsets) has been parsed, the function ~transform_dtd is called,
+ * and the resulting DTD is actually used to validate the document.
+ *
+ * If the optional argument ~transform_dtd is missing, the parser
+ * behaves in the same way as if the identity were passed as ~transform_dtd.
+ *
+ * If the optional argument ~id_index is present, the parser adds
+ * any ID attribute to the passed index. An index is required to detect
+ * violations of the uniqueness of IDs.
+ *)
+
+val parse_wfdocument_entity :
+ config -> source -> 'ext spec -> 'ext document
+ (* Parse a closed document (see parse_document_entity), but do not
+ * validate it. Only checks on well-formedness are performed.
+ *)
+
+val parse_content_entity :
+ ?id_index:('ext index) ->
+ config -> source -> dtd -> 'ext spec -> 'ext node
+ (* Parse a file representing a well-formed fragment of a document. The
+ * fragment must be a single element (i.e. something like ... ;
+ * not a sequence like ... ... ). The element is validated
+ * against the passed DTD, but it is not checked whether the element is
+ * the root element specified in the DTD.
+ *
+ * If the optional argument ~id_index is present, the parser adds
+ * any ID attribute to the passed index. An index is required to detect
+ * violations of the uniqueness of IDs.
+ *)
+
+val parse_wfcontent_entity :
+ config -> source -> 'ext spec -> 'ext node
+ (* Parse a file representing a well-formed fragment of a document
+ * (see parse_content_entity). The fragment is not validated, only
+ * checked for well-formedness.
+ *)
+
+
+(*$-*)
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:30 lpadovan
+ * Initial revision
+ *
+ * Revision 1.7 2000/08/18 20:15:43 gerd
+ * Config options:
+ * - enable_super_root_nodes: new name for virtual_root
+ * - enable_pinstr_nodes: new name for processing_instructions_inline
+ * - enable_comment_nodes: new option
+ * Updated comments for various options.
+ *
+ * Revision 1.6 2000/07/23 02:16:33 gerd
+ * Support for DFAs.
+ *
+ * Revision 1.5 2000/07/14 13:57:29 gerd
+ * Added the id_index feature.
+ *
+ * Revision 1.4 2000/07/09 17:52:54 gerd
+ * New option store_element_positions.
+ *
+ * Revision 1.3 2000/07/08 16:26:21 gerd
+ * Added the signatures of the functions
+ * 'extract_dtd_from_document_entity' and 'parse_wfcontent_entity'.
+ * Updated the signature of 'parse_document_entity': New optional
+ * argument 'transform_dtd'.
+ * Updated the comments.
+ *
+ * Revision 1.2 2000/07/04 22:09:03 gerd
+ * MAJOR CHANGE: Redesign of the interface (not yet complete).
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_yacc.mli:
+ *
+ * Revision 1.4 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.3 2000/05/27 19:24:01 gerd
+ * New option: recognize_standalone_declaration.
+ *
+ * Revision 1.2 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.1 2000/05/06 23:21:49 gerd
+ * Initial revision.
+ *
+ * Revision 1.9 2000/04/30 18:23:38 gerd
+ * New config options 'processing_instructions_inline' and
+ * 'virtual_root'.
+ *
+ * Revision 1.8 2000/03/13 23:46:46 gerd
+ * Change: The 'resolver' component of the 'config' type has
+ * disappeared. Instead, there is a new resolver component in the Entity
+ * and ExtID values of 'source'. I hope that this makes clearer that the
+ * resolver has only an effect if used together with Entity and ExtID
+ * sources.
+ * Change: The Entity value can now return the entity dependent
+ * on the DTD that is going to be used.
+ *
+ * Revision 1.7 2000/02/22 02:32:02 gerd
+ * Updated.
+ *
+ * Revision 1.6 2000/02/22 01:52:45 gerd
+ * Added documentation.
+ *
+ * Revision 1.5 2000/01/20 20:54:43 gerd
+ * New config.errors_with_line_numbers.
+ *
+ * Revision 1.4 1999/09/01 23:09:10 gerd
+ * New function parse_wf_entity that simulates a well-formedness
+ * parser.
+ *
+ * Revision 1.3 1999/09/01 16:26:36 gerd
+ * Added an empty line. This is *really* a big change.
+ *
+ * Revision 1.2 1999/08/14 22:20:27 gerd
+ * The "config" slot has now a component "warner"which is
+ * an object with a "warn" method. This is used to warn about characters
+ * that cannot be represented in the Latin 1 alphabet.
+ * Furthermore, there is a new component "debugging_mode".
+ *
+ * Revision 1.1 1999/08/10 00:35:52 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/rtests/MISSING_TESTS b/helm/DEVEL/pxp/pxp/rtests/MISSING_TESTS
new file mode 100644
index 000000000..386830b86
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/MISSING_TESTS
@@ -0,0 +1,6 @@
+- Conditional sections:
+
+ Conditional_begin and Conditional_end must be in the same entity.
+
+- NDATA: check whether ENTITY attributes refer only to declared
+ NDATA entities
diff --git a/helm/DEVEL/pxp/pxp/rtests/Makefile b/helm/DEVEL/pxp/pxp/rtests/Makefile
new file mode 100644
index 000000000..653aaf282
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/Makefile
@@ -0,0 +1,32 @@
+.PHONY: all
+all:
+ $(MAKE) -C reader
+ $(MAKE) -C write
+ $(MAKE) -C codewriter
+ $(MAKE) -C canonxml
+ $(MAKE) -C negative
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.cmxa
+
+.PHONY: CLEAN
+CLEAN: clean
+ $(MAKE) -C reader clean
+ $(MAKE) -C write clean
+ $(MAKE) -C codewriter clean
+ $(MAKE) -C canonxml clean
+ $(MAKE) -C negative clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~
+ rm -f dumpfiles
+ $(MAKE) -C reader distclean
+ $(MAKE) -C write distclean
+ $(MAKE) -C codewriter distclean
+ $(MAKE) -C canonxml distclean
+ $(MAKE) -C negative distclean
+
+dumpfiles: dumpfiles.ml
+ ocamlc -o dumpfiles dumpfiles.ml
diff --git a/helm/DEVEL/pxp/pxp/rtests/README b/helm/DEVEL/pxp/pxp/rtests/README
new file mode 100644
index 000000000..5c56b7af5
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/README
@@ -0,0 +1,16 @@
+----------------------------------------------------------------------
+(Anti) Regression tests
+----------------------------------------------------------------------
+
+- To build the tests, "markup" must already be compiled in ..
+ Do "make" to start the compilation.
+
+- To run the tests:
+ ./run
+
+- Program dumpfiles: Do "make dumpfiles" to create it.
+ It takes XML file names on the command line, and writes a Latex
+ document on stdout. The document shows the contents of all files.
+ EXAMPLE:
+ $ ./dumpfiles canonxml/data_jclark_valid/ext-sa/*.* >x.tex
+ $ latex x
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/Makefile b/helm/DEVEL/pxp/pxp/rtests/canonxml/Makefile
new file mode 100644
index 000000000..afbefb390
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/Makefile
@@ -0,0 +1,27 @@
+# make validate: make bytecode executable
+# make validate.opt: make native executable
+# make clean: remove intermediate files (in this directory)
+# make CLEAN: remove intermediate files (recursively)
+# make distclean: remove any superflous files (recursively)
+#----------------------------------------------------------------------
+
+OCAMLPATH=../..
+
+test_canonxml: test_canonxml.ml
+ ocamlfind ocamlc -g -custom -o test_canonxml -package .,str -linkpkg test_canonxml.ml
+
+#----------------------------------------------------------------------
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa out.xml
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~
+ rm -f test_canonxml
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/README b/helm/DEVEL/pxp/pxp/rtests/canonxml/README
new file mode 100644
index 000000000..c4c3303b9
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/README
@@ -0,0 +1,20 @@
+----------------------------------------------------------------------
+Regression test "canonxml":
+----------------------------------------------------------------------
+
+- An XML file is parsed, and the contents are printed in a canonical
+ format.
+
+- The output is compared with a reference file. The test is only
+ passed if the output and the reference are equal.
+
+- Test data "data_jclark_valid":
+ Contains the samples by James Clark that are valid. The subdirectories:
+ - sa: standalone documents
+ - not-sa: non-standalone document (with external DTD)
+ - ext-sa: non-standalone document (with other external entity)
+
+ Tests that are not passed have been moved into the *-problems directories.
+ The reason is typically that characters have been used not in the
+ Latin 1 character set.
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/001.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/001.ent
new file mode 100644
index 000000000..0b7088ec6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/001.ent
@@ -0,0 +1 @@
+Data
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/001.xml
new file mode 100644
index 000000000..e4cc432f7
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/001.xml
@@ -0,0 +1,5 @@
+
+
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/002.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/002.ent
new file mode 100644
index 000000000..45f6d8e74
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/002.ent
@@ -0,0 +1 @@
+Data
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/002.xml
new file mode 100644
index 000000000..2ee598889
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/002.xml
@@ -0,0 +1,5 @@
+
+
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/003.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/003.ent
new file mode 100644
index 000000000..e69de29bb
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/003.xml
new file mode 100644
index 000000000..407a4a1ee
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/003.xml
@@ -0,0 +1,5 @@
+
+
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/004.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/004.ent
new file mode 100644
index 000000000..0b7088ec6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/004.ent
@@ -0,0 +1 @@
+Data
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/004.xml
new file mode 100644
index 000000000..c3cdbd0f6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/004.xml
@@ -0,0 +1,5 @@
+
+
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/005.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/005.ent
new file mode 100644
index 000000000..c6e97f821
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/005.ent
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/005.xml
new file mode 100644
index 000000000..66b8a9166
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/005.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/006.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/006.ent
new file mode 100644
index 000000000..2846b57df
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/006.ent
@@ -0,0 +1,4 @@
+Data
+
+More data
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/006.xml
new file mode 100644
index 000000000..b8f42b43e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/006.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/007.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/007.ent
new file mode 100644
index 000000000..ab1d696dd
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/007.ent differ
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/007.xml
new file mode 100644
index 000000000..50416edaa
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/007.xml
@@ -0,0 +1,5 @@
+
+
+]>
+X&e;Z
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/008.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/008.ent
new file mode 100644
index 000000000..c6ca61f9c
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/008.ent differ
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/008.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/008.xml
new file mode 100644
index 000000000..565f9475a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/008.xml
@@ -0,0 +1,5 @@
+
+
+]>
+X&e;Z
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/009.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/009.ent
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/009.ent
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/009.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/009.xml
new file mode 100644
index 000000000..8119aa063
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/009.xml
@@ -0,0 +1,5 @@
+
+
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/010.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/010.ent
new file mode 100644
index 000000000..e69de29bb
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/010.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/010.xml
new file mode 100644
index 000000000..5c19ba2c1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/010.xml
@@ -0,0 +1,5 @@
+
+
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/011.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/011.ent
new file mode 100644
index 000000000..cf7711b63
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/011.ent
@@ -0,0 +1 @@
+xyzzy
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/011.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/011.xml
new file mode 100644
index 000000000..c43795cbe
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/011.xml
@@ -0,0 +1,5 @@
+
+
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/012.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/012.ent
new file mode 100644
index 000000000..8eb1fb9c4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/012.ent
@@ -0,0 +1 @@
+&e4;
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/012.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/012.xml
new file mode 100644
index 000000000..42d538f64
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/012.xml
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+]>
+&e1;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/013.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/013.ent
new file mode 100644
index 000000000..7f25c502d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/013.ent
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/013.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/013.xml
new file mode 100644
index 000000000..e7f221fba
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/013.xml
@@ -0,0 +1,10 @@
+
+
+
+
+]>
+&x;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/014.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/014.ent
new file mode 100644
index 000000000..470fd6fe4
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/014.ent differ
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/014.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/014.xml
new file mode 100644
index 000000000..6b068d739
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/014.xml
@@ -0,0 +1,5 @@
+
+
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/001.xml
new file mode 100644
index 000000000..0a7acf8eb
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/001.xml
@@ -0,0 +1 @@
+Data
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/002.xml
new file mode 100644
index 000000000..d4a445e55
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/002.xml
@@ -0,0 +1 @@
+Data
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/003.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/003.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/004.xml
new file mode 100644
index 000000000..0a7acf8eb
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/004.xml
@@ -0,0 +1 @@
+Data
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/005.xml
new file mode 100644
index 000000000..6e293aa70
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/005.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/006.xml
new file mode 100644
index 000000000..04b6fc82e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/006.xml
@@ -0,0 +1 @@
+Data
More data
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/007.xml
new file mode 100644
index 000000000..ab2a74c9d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/007.xml
@@ -0,0 +1 @@
+XYZ
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/008.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/008.xml
new file mode 100644
index 000000000..ab2a74c9d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/008.xml
@@ -0,0 +1 @@
+XYZ
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/009.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/009.xml
new file mode 100644
index 000000000..a79dff65f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/009.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/010.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/010.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/010.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/011.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/011.xml
new file mode 100644
index 000000000..bf275adb2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/011.xml
@@ -0,0 +1 @@
+xyzzy
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/012.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/012.xml
new file mode 100644
index 000000000..81a251cb4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/012.xml
@@ -0,0 +1 @@
+(e5)
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/013.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/013.xml
new file mode 100644
index 000000000..524d94ee6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/013.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/014.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/014.xml
new file mode 100644
index 000000000..71c6dc3e8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/014.xml
@@ -0,0 +1 @@
+data
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/001.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/001.ent
new file mode 100644
index 000000000..e69de29bb
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/001.xml
new file mode 100644
index 000000000..8419329c1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/001.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/002.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/002.ent
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/002.ent
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/002.xml
new file mode 100644
index 000000000..f497ac870
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/002.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/003-1.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/003-1.ent
new file mode 100644
index 000000000..f7af6e8d9
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/003-1.ent
@@ -0,0 +1,3 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/003-2.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/003-2.ent
new file mode 100644
index 000000000..e69de29bb
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/003.xml
new file mode 100644
index 000000000..465dafeac
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/003.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/004-1.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/004-1.ent
new file mode 100644
index 000000000..a988ade92
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/004-1.ent
@@ -0,0 +1,4 @@
+
+
+
+%e1;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/004-2.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/004-2.ent
new file mode 100644
index 000000000..f2ed894b2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/004-2.ent
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/004.xml
new file mode 100644
index 000000000..95c9a92fc
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/004.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/005-1.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/005-1.ent
new file mode 100644
index 000000000..6e224b56c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/005-1.ent
@@ -0,0 +1,3 @@
+
+
+%e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/005-2.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/005-2.ent
new file mode 100644
index 000000000..8611eaf9d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/005-2.ent
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/005.xml
new file mode 100644
index 000000000..9b87d9e57
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/005.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/006.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/006.ent
new file mode 100644
index 000000000..c9f9cfe8f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/006.ent
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/006.xml
new file mode 100644
index 000000000..3b9f3d348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/006.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/007.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/007.ent
new file mode 100644
index 000000000..a26a45e7e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/007.ent
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/007.xml
new file mode 100644
index 000000000..1c5bc80ef
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/007.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/008.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/008.ent
new file mode 100644
index 000000000..a26a45e7e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/008.ent
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/008.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/008.xml
new file mode 100644
index 000000000..013856257
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/008.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/009.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/009.ent
new file mode 100644
index 000000000..a26a45e7e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/009.ent
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/009.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/009.xml
new file mode 100644
index 000000000..a0008557d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/009.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/010.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/010.ent
new file mode 100644
index 000000000..e4c75bba2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/010.ent
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/010.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/010.xml
new file mode 100644
index 000000000..0cbf0933a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/010.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/011.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/011.ent
new file mode 100644
index 000000000..a26a45e7e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/011.ent
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/011.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/011.xml
new file mode 100644
index 000000000..a105cc5e1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/011.xml
@@ -0,0 +1,5 @@
+
+%e;
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/012.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/012.ent
new file mode 100644
index 000000000..2714b330a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/012.ent
@@ -0,0 +1,3 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/012.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/012.xml
new file mode 100644
index 000000000..832359c55
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/012.xml
@@ -0,0 +1,5 @@
+
+%e;
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/013.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/013.ent
new file mode 100644
index 000000000..0fce0544d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/013.ent
@@ -0,0 +1,4 @@
+
+
+]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/013.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/013.xml
new file mode 100644
index 000000000..0430cc9bb
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/013.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/014.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/014.ent
new file mode 100644
index 000000000..827e12e12
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/014.ent
@@ -0,0 +1,4 @@
+
+
+]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/014.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/014.xml
new file mode 100644
index 000000000..8ce35a320
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/014.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/015.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/015.ent
new file mode 100644
index 000000000..9089b1c62
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/015.ent
@@ -0,0 +1,5 @@
+
+
+]]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/015.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/015.xml
new file mode 100644
index 000000000..2e0103e47
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/015.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/016.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/016.ent
new file mode 100644
index 000000000..7a11d00c4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/016.ent
@@ -0,0 +1,4 @@
+
+
+]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/016.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/016.xml
new file mode 100644
index 000000000..4340c9e34
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/016.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/017.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/017.ent
new file mode 100644
index 000000000..6718e5cda
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/017.ent
@@ -0,0 +1,3 @@
+
+">
+%e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/017.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/017.xml
new file mode 100644
index 000000000..e2d218a05
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/017.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/018.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/018.ent
new file mode 100644
index 000000000..ecaa1354a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/018.ent
@@ -0,0 +1,3 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/018.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/018.xml
new file mode 100644
index 000000000..e27f48b77
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/018.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/019.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/019.ent
new file mode 100644
index 000000000..7d56007c9
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/019.ent
@@ -0,0 +1,3 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/019.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/019.xml
new file mode 100644
index 000000000..f9e9301dc
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/019.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/020.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/020.ent
new file mode 100644
index 000000000..c681b5062
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/020.ent
@@ -0,0 +1,3 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/020.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/020.xml
new file mode 100644
index 000000000..e009e388a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/020.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/021.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/021.ent
new file mode 100644
index 000000000..22d71341a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/021.ent
@@ -0,0 +1,3 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/021.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/021.xml
new file mode 100644
index 000000000..77789b677
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/021.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/022.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/022.ent
new file mode 100644
index 000000000..13bab791f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/022.ent
@@ -0,0 +1,3 @@
+
+
+ ]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/022.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/022.xml
new file mode 100644
index 000000000..62bad5d74
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/022.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/023.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/023.ent
new file mode 100644
index 000000000..4498d84cf
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/023.ent
@@ -0,0 +1,5 @@
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/023.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/023.xml
new file mode 100644
index 000000000..2a8d4d9e2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/023.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/024.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/024.ent
new file mode 100644
index 000000000..02c1878cc
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/024.ent
@@ -0,0 +1,4 @@
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/024.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/024.xml
new file mode 100644
index 000000000..2121cae14
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/024.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/025.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/025.ent
new file mode 100644
index 000000000..d0ee12475
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/025.ent
@@ -0,0 +1,5 @@
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/025.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/025.xml
new file mode 100644
index 000000000..8d5bf3ed0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/025.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/026.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/026.ent
new file mode 100644
index 000000000..1e356981a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/026.ent
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/026.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/026.xml
new file mode 100644
index 000000000..2b320cc7a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/026.xml
@@ -0,0 +1,7 @@
+
+
+%e;
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/027.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/027.ent
new file mode 100644
index 000000000..36a54662b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/027.ent
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/027.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/027.xml
new file mode 100644
index 000000000..cab657fee
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/027.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/028.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/028.ent
new file mode 100644
index 000000000..0b8426324
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/028.ent
@@ -0,0 +1,2 @@
+
+]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/028.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/028.xml
new file mode 100644
index 000000000..70a6cf244
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/028.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/029.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/029.ent
new file mode 100644
index 000000000..6ba25e769
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/029.ent
@@ -0,0 +1,3 @@
+
+]]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/029.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/029.xml
new file mode 100644
index 000000000..4e69c414d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/029.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/030.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/030.ent
new file mode 100644
index 000000000..f623441fe
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/030.ent
@@ -0,0 +1,3 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/030.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/030.xml
new file mode 100644
index 000000000..376e84c6d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/030.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/031-1.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/031-1.ent
new file mode 100644
index 000000000..8fd57b7cd
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/031-1.ent
@@ -0,0 +1,3 @@
+
+
+">
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/031-2.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/031-2.ent
new file mode 100644
index 000000000..8611eaf9d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/031-2.ent
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/031.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/031.xml
new file mode 100644
index 000000000..5a94354a8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/031.xml
@@ -0,0 +1,2 @@
+
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/001.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/001.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/002.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/002.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/003.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/003.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/004.xml
new file mode 100644
index 000000000..bdc39e222
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/004.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/005.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/005.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/006.xml
new file mode 100644
index 000000000..d07627d7a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/006.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/007.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/007.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/008.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/008.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/008.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/009.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/009.xml
new file mode 100644
index 000000000..7293fb63d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/009.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/010.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/010.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/010.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/011.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/011.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/011.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/012.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/012.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/012.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/013.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/013.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/013.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/014.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/014.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/014.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/015.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/015.xml
new file mode 100644
index 000000000..131a32fe6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/015.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/016.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/016.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/016.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/017.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/017.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/017.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/018.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/018.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/018.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/019.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/019.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/019.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/020.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/020.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/020.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/021.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/021.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/021.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/022.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/022.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/022.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/023.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/023.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/023.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/024.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/024.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/024.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/025.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/025.xml
new file mode 100644
index 000000000..eb3f9674e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/025.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/026.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/026.xml
new file mode 100644
index 000000000..71c02026e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/026.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/027.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/027.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/027.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/028.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/028.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/028.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/029.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/029.xml
new file mode 100644
index 000000000..7ac8b2b89
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/029.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/030.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/030.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/030.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/031.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/031.xml
new file mode 100644
index 000000000..03a6c3f9c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/031.xml
@@ -0,0 +1 @@
+<!ATTLIST doc a1 CDATA "v1">
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/001.xml
new file mode 100644
index 000000000..d1e11b7bf
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/001.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/002.xml
new file mode 100644
index 000000000..671db91ed
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/002.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/003.xml
new file mode 100644
index 000000000..81302041f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/003.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/004.xml
new file mode 100644
index 000000000..c805282cc
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/004.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/005.xml
new file mode 100644
index 000000000..9b203e7b0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/005.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/006.xml
new file mode 100644
index 000000000..13c947796
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/006.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/007.xml
new file mode 100644
index 000000000..1bb7d3827
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/007.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/008.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/008.xml
new file mode 100644
index 000000000..e4b9ab79d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/008.xml
@@ -0,0 +1,4 @@
+
+]>
+&<>"'
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/009.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/009.xml
new file mode 100644
index 000000000..1fbdc3084
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/009.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/010.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/010.xml
new file mode 100644
index 000000000..a964d2837
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/010.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/011.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/011.xml
new file mode 100644
index 000000000..8f99e5f5f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/011.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/012.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/012.xml
new file mode 100644
index 000000000..7c07c8858
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/012.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/013.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/013.xml
new file mode 100644
index 000000000..1cf6401cc
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/013.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/014.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/014.xml
new file mode 100644
index 000000000..244ec800e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/014.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/015.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/015.xml
new file mode 100644
index 000000000..011275b00
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/015.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/016.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/016.xml
new file mode 100644
index 000000000..d863f851d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/016.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/017.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/017.xml
new file mode 100644
index 000000000..3ace15b4c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/017.xml
@@ -0,0 +1,4 @@
+
+]>
+ ?>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/018.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/018.xml
new file mode 100644
index 000000000..5b3bf069c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/018.xml
@@ -0,0 +1,4 @@
+
+]>
+]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/019.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/019.xml
new file mode 100644
index 000000000..1e935d440
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/019.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/020.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/020.xml
new file mode 100644
index 000000000..e8525e6ef
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/020.xml
@@ -0,0 +1,4 @@
+
+]>
+]]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/021.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/021.xml
new file mode 100644
index 000000000..6923ee2c6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/021.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/022.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/022.xml
new file mode 100644
index 000000000..a510a7f75
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/022.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/023.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/023.xml
new file mode 100644
index 000000000..835e909aa
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/023.xml
@@ -0,0 +1,5 @@
+
+
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/024.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/024.xml
new file mode 100644
index 000000000..9ba2f0edd
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/024.xml
@@ -0,0 +1,6 @@
+
+
+">
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/025.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/025.xml
new file mode 100644
index 000000000..6c78b2eb2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/025.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/026.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/026.xml
new file mode 100644
index 000000000..4d7139614
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/026.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/027.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/027.xml
new file mode 100644
index 000000000..dcd3a9a3f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/027.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/028.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/028.xml
new file mode 100644
index 000000000..24e4ec064
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/028.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/029.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/029.xml
new file mode 100644
index 000000000..70e9ffb41
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/029.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/030.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/030.xml
new file mode 100644
index 000000000..a1711550e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/030.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/031.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/031.xml
new file mode 100644
index 000000000..d97d13707
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/031.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/032.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/032.xml
new file mode 100644
index 000000000..57ceada66
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/032.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/033.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/033.xml
new file mode 100644
index 000000000..e3ce40ed3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/033.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/034.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/034.xml
new file mode 100644
index 000000000..832e9d996
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/034.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/035.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/035.xml
new file mode 100644
index 000000000..a3500fdea
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/035.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/036.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/036.xml
new file mode 100644
index 000000000..ceacd4d0e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/036.xml
@@ -0,0 +1,5 @@
+
+]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/037.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/037.xml
new file mode 100644
index 000000000..e3add8eca
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/037.xml
@@ -0,0 +1,6 @@
+
+]>
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/038.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/038.xml
new file mode 100644
index 000000000..087662d78
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/038.xml
@@ -0,0 +1,6 @@
+
+
+]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/039.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/039.xml
new file mode 100644
index 000000000..84c5b9302
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/039.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/040.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/040.xml
new file mode 100644
index 000000000..4482f2cc0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/040.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/041.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/041.xml
new file mode 100644
index 000000000..80bb2dad8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/041.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/042.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/042.xml
new file mode 100644
index 000000000..5cb4bbe99
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/042.xml
@@ -0,0 +1,4 @@
+
+]>
+A
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/043.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/043.xml
new file mode 100644
index 000000000..4774e5a7b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/043.xml
@@ -0,0 +1,6 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/044.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/044.xml
new file mode 100644
index 000000000..8321a14ee
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/044.xml
@@ -0,0 +1,10 @@
+
+
+
+]>
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/045.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/045.xml
new file mode 100644
index 000000000..2d70b3214
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/045.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/046.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/046.xml
new file mode 100644
index 000000000..79e039b65
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/046.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/047.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/047.xml
new file mode 100644
index 000000000..6dd4ddf2a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/047.xml
@@ -0,0 +1,5 @@
+
+]>
+X
+Y
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/048.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/048.xml
new file mode 100644
index 000000000..e0c6caf38
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/048.xml
@@ -0,0 +1,4 @@
+
+]>
+]
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/049.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/049.xml
new file mode 100644
index 000000000..8cec20d7d
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/049.xml differ
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/050.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/050.xml
new file mode 100644
index 000000000..00e7a78c2
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/050.xml differ
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/051.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/051.xml
new file mode 100644
index 000000000..1202e5037
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/051.xml differ
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/052.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/052.xml
new file mode 100644
index 000000000..69c990296
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/052.xml
@@ -0,0 +1,4 @@
+
+]>
+ðô¿½
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/053.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/053.xml
new file mode 100644
index 000000000..d8718a478
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/053.xml
@@ -0,0 +1,6 @@
+">
+
+
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/054.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/054.xml
new file mode 100644
index 000000000..638693bca
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/054.xml
@@ -0,0 +1,10 @@
+
+]>
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/055.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/055.xml
new file mode 100644
index 000000000..a1a2a7a78
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/055.xml
@@ -0,0 +1,5 @@
+
+]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/056.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/056.xml
new file mode 100644
index 000000000..1aa08131a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/056.xml
@@ -0,0 +1,4 @@
+
+]>
+A
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/057.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/057.xml
new file mode 100644
index 000000000..796ec2d63
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/057.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/058.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/058.xml
new file mode 100644
index 000000000..f0830bf0d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/058.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/059.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/059.xml
new file mode 100644
index 000000000..07604ef6c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/059.xml
@@ -0,0 +1,10 @@
+
+
+
+]>
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/060.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/060.xml
new file mode 100644
index 000000000..fc20976ee
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/060.xml
@@ -0,0 +1,4 @@
+
+]>
+X
Y
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/061.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/061.xml
new file mode 100644
index 000000000..65f6d4d12
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/061.xml
@@ -0,0 +1,4 @@
+
+]>
+£
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/062.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/062.xml
new file mode 100644
index 000000000..5f4aab778
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/062.xml
@@ -0,0 +1,4 @@
+
+]>
+เจมสà¹
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/063.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/063.xml
new file mode 100644
index 000000000..a6dcdc628
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/063.xml
@@ -0,0 +1,4 @@
+
+]>
+<à¹à¸à¸¡à¸ªà¹>à¹à¸à¸¡à¸ªà¹>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/064.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/064.xml
new file mode 100644
index 000000000..c59acf874
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/064.xml
@@ -0,0 +1,4 @@
+
+]>
+𐀀
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/065.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/065.xml
new file mode 100644
index 000000000..1156e6d10
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/065.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/066.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/066.xml
new file mode 100644
index 000000000..f332ffcee
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/066.xml
@@ -0,0 +1,7 @@
+
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/067.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/067.xml
new file mode 100644
index 000000000..470ee6390
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/067.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/068.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/068.xml
new file mode 100644
index 000000000..e1f87a9f3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/068.xml
@@ -0,0 +1,5 @@
+
+
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/069.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/069.xml
new file mode 100644
index 000000000..8f4c458ea
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/069.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/070.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/070.xml
new file mode 100644
index 000000000..61a6d285b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/070.xml
@@ -0,0 +1,5 @@
+">
+%e;
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/071.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/071.xml
new file mode 100644
index 000000000..b07de8cbc
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/071.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/072.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/072.xml
new file mode 100644
index 000000000..68d5170b4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/072.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/073.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/073.xml
new file mode 100644
index 000000000..8466a10f0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/073.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/074.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/074.xml
new file mode 100644
index 000000000..d88455faf
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/074.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/075.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/075.xml
new file mode 100644
index 000000000..fd7310114
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/075.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/076.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/076.xml
new file mode 100644
index 000000000..7f3d03783
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/076.xml
@@ -0,0 +1,7 @@
+
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/077.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/077.xml
new file mode 100644
index 000000000..fb7b9faae
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/077.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/078.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/078.xml
new file mode 100644
index 000000000..6f8ce371b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/078.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/079.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/079.xml
new file mode 100644
index 000000000..b647d0ddc
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/079.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/080.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/080.xml
new file mode 100644
index 000000000..e64474642
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/080.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/081.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/081.xml
new file mode 100644
index 000000000..e17bb751d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/081.xml
@@ -0,0 +1,7 @@
+
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/082.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/082.xml
new file mode 100644
index 000000000..055e0c28a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/082.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/083.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/083.xml
new file mode 100644
index 000000000..1451165db
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/083.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/084.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/084.xml
new file mode 100644
index 000000000..c36bba29d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/084.xml
@@ -0,0 +1 @@
+]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/085.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/085.xml
new file mode 100644
index 000000000..d173b8b17
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/085.xml
@@ -0,0 +1,6 @@
+
+">
+
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/086.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/086.xml
new file mode 100644
index 000000000..1f82c22d9
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/086.xml
@@ -0,0 +1,6 @@
+
+
+">
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/087.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/087.xml
new file mode 100644
index 000000000..d3c7e3996
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/087.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/088.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/088.xml
new file mode 100644
index 000000000..7bd0e81ca
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/088.xml
@@ -0,0 +1,5 @@
+
+">
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/089.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/089.xml
new file mode 100644
index 000000000..e144c758d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/089.xml
@@ -0,0 +1,5 @@
+
+
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/090.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/090.xml
new file mode 100644
index 000000000..f349eefbc
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/090.xml
@@ -0,0 +1,7 @@
+
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/091.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/091.xml
new file mode 100644
index 000000000..a779ce85c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/091.xml
@@ -0,0 +1,7 @@
+
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/092.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/092.xml
new file mode 100644
index 000000000..4e8eb6c47
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/092.xml
@@ -0,0 +1,10 @@
+
+
+]>
+
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/093.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/093.xml
new file mode 100644
index 000000000..300578eb5
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/093.xml
@@ -0,0 +1,7 @@
+
+]>
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/094.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/094.xml
new file mode 100644
index 000000000..09045d9ed
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/094.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/095.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/095.xml
new file mode 100644
index 000000000..f6e12875e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/095.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/096.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/096.xml
new file mode 100644
index 000000000..5fd635dc4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/096.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/097.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/097.ent
new file mode 100644
index 000000000..8bb4305eb
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/097.ent
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/097.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/097.xml
new file mode 100644
index 000000000..74b636fe2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/097.xml
@@ -0,0 +1,8 @@
+
+
+
+%e;
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/098.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/098.xml
new file mode 100644
index 000000000..881de4ed8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/098.xml
@@ -0,0 +1,5 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/099.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/099.xml
new file mode 100644
index 000000000..64db61f02
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/099.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/100.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/100.xml
new file mode 100644
index 000000000..033855e6a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/100.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/101.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/101.xml
new file mode 100644
index 000000000..efd9efe66
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/101.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/102.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/102.xml
new file mode 100644
index 000000000..e034d048d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/102.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/103.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/103.xml
new file mode 100644
index 000000000..6b21a13f8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/103.xml
@@ -0,0 +1,4 @@
+
+]>
+<doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/104.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/104.xml
new file mode 100644
index 000000000..6469bc663
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/104.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/105.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/105.xml
new file mode 100644
index 000000000..3acc97c97
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/105.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/106.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/106.xml
new file mode 100644
index 000000000..4f3e8c805
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/106.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/107.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/107.xml
new file mode 100644
index 000000000..2a58153ca
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/107.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/108.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/108.xml
new file mode 100644
index 000000000..938f28723
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/108.xml
@@ -0,0 +1,7 @@
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/109.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/109.xml
new file mode 100644
index 000000000..dcf268689
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/109.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/110.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/110.xml
new file mode 100644
index 000000000..c6a3a873e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/110.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/111.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/111.xml
new file mode 100644
index 000000000..b4cdca602
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/111.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/112.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/112.xml
new file mode 100644
index 000000000..7924e6713
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/112.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/113.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/113.xml
new file mode 100644
index 000000000..51d2e6d42
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/113.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/114.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/114.xml
new file mode 100644
index 000000000..f706b1ddb
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/114.xml
@@ -0,0 +1,5 @@
+
+">
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/115.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/115.xml
new file mode 100644
index 000000000..cacac3316
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/115.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+&e1;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/116.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/116.xml
new file mode 100644
index 000000000..7a174cf35
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/116.xml
@@ -0,0 +1,5 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/117.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/117.xml
new file mode 100644
index 000000000..23c84068f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/117.xml
@@ -0,0 +1,5 @@
+
+
+]>
+]
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/118.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/118.xml
new file mode 100644
index 000000000..068db6329
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/118.xml
@@ -0,0 +1,5 @@
+
+
+]>
+]
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/119.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/119.xml
new file mode 100644
index 000000000..dafff48eb
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/119.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/001.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/001.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/002.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/002.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/003.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/003.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/004.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/004.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/005.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/005.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/006.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/006.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/007.xml
new file mode 100644
index 000000000..97cf3e3b8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/007.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/008.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/008.xml
new file mode 100644
index 000000000..3ea232c21
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/008.xml
@@ -0,0 +1 @@
+&<>"'
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/009.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/009.xml
new file mode 100644
index 000000000..97cf3e3b8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/009.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/010.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/010.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/010.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/011.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/011.xml
new file mode 100644
index 000000000..7293fb63d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/011.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/012.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/012.xml
new file mode 100644
index 000000000..5a0c9831a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/012.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/013.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/013.xml
new file mode 100644
index 000000000..c9c7ec5da
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/013.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/014.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/014.xml
new file mode 100644
index 000000000..ac6b28f97
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/014.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/015.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/015.xml
new file mode 100644
index 000000000..8e216eb99
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/015.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/016.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/016.xml
new file mode 100644
index 000000000..4fc76928b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/016.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/017.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/017.xml
new file mode 100644
index 000000000..3b9a2f8d4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/017.xml
@@ -0,0 +1 @@
+ ?>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/018.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/018.xml
new file mode 100644
index 000000000..a5471011d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/018.xml
@@ -0,0 +1 @@
+<foo>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/019.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/019.xml
new file mode 100644
index 000000000..05d4e2fcf
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/019.xml
@@ -0,0 +1 @@
+<&
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/020.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/020.xml
new file mode 100644
index 000000000..95ae08a12
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/020.xml
@@ -0,0 +1 @@
+<&]>]
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/021.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/021.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/021.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/022.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/022.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/022.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/023.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/023.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/023.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/024.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/024.xml
new file mode 100644
index 000000000..a9aa2074f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/024.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/025.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/025.xml
new file mode 100644
index 000000000..de0f56602
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/025.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/026.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/026.xml
new file mode 100644
index 000000000..de0f56602
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/026.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/027.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/027.xml
new file mode 100644
index 000000000..de0f56602
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/027.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/028.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/028.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/028.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/029.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/029.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/029.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/030.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/030.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/030.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/031.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/031.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/031.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/032.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/032.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/032.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/033.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/033.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/033.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/034.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/034.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/034.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/035.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/035.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/035.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/036.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/036.xml
new file mode 100644
index 000000000..2bcfb06cf
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/036.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/037.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/037.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/037.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/038.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/038.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/038.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/039.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/039.xml
new file mode 100644
index 000000000..82d117d49
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/039.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/040.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/040.xml
new file mode 100644
index 000000000..d79cfe149
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/040.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/041.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/041.xml
new file mode 100644
index 000000000..6f2cd5832
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/041.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/042.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/042.xml
new file mode 100644
index 000000000..f683039a8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/042.xml
@@ -0,0 +1 @@
+A
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/043.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/043.xml
new file mode 100644
index 000000000..e162b7650
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/043.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/044.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/044.xml
new file mode 100644
index 000000000..78028b704
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/044.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/045.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/045.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/045.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/046.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/046.xml
new file mode 100644
index 000000000..7293fb63d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/046.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/047.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/047.xml
new file mode 100644
index 000000000..b327ebd67
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/047.xml
@@ -0,0 +1 @@
+X
Y
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/048.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/048.xml
new file mode 100644
index 000000000..ced7d0271
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/048.xml
@@ -0,0 +1 @@
+]
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/049.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/049.xml
new file mode 100644
index 000000000..7cc53f9ea
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/049.xml
@@ -0,0 +1 @@
+£
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/050.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/050.xml
new file mode 100644
index 000000000..33703c792
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/050.xml
@@ -0,0 +1 @@
+à¹à¸à¸¡à¸ªà¹
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/051.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/051.xml
new file mode 100644
index 000000000..cfeb5a536
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/051.xml
@@ -0,0 +1 @@
+<à¹à¸à¸¡à¸ªà¹>à¹à¸à¸¡à¸ªà¹>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/052.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/052.xml
new file mode 100644
index 000000000..f5a048479
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/052.xml
@@ -0,0 +1 @@
+ðô¿½
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/053.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/053.xml
new file mode 100644
index 000000000..c4083843d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/053.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/054.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/054.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/054.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/055.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/055.xml
new file mode 100644
index 000000000..82d117d49
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/055.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/056.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/056.xml
new file mode 100644
index 000000000..f683039a8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/056.xml
@@ -0,0 +1 @@
+A
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/057.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/057.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/057.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/058.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/058.xml
new file mode 100644
index 000000000..f898cc8c9
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/058.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/059.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/059.xml
new file mode 100644
index 000000000..78028b704
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/059.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/060.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/060.xml
new file mode 100644
index 000000000..b327ebd67
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/060.xml
@@ -0,0 +1 @@
+X
Y
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/061.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/061.xml
new file mode 100644
index 000000000..7cc53f9ea
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/061.xml
@@ -0,0 +1 @@
+£
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/062.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/062.xml
new file mode 100644
index 000000000..33703c792
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/062.xml
@@ -0,0 +1 @@
+à¹à¸à¸¡à¸ªà¹
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/063.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/063.xml
new file mode 100644
index 000000000..cfeb5a536
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/063.xml
@@ -0,0 +1 @@
+<à¹à¸à¸¡à¸ªà¹>à¹à¸à¸¡à¸ªà¹>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/064.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/064.xml
new file mode 100644
index 000000000..f5a048479
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/064.xml
@@ -0,0 +1 @@
+ðô¿½
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/065.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/065.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/065.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/066.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/066.xml
new file mode 100644
index 000000000..7597d31bf
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/066.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/067.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/067.xml
new file mode 100644
index 000000000..4bbdad45e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/067.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/068.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/068.xml
new file mode 100644
index 000000000..4bbdad45e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/068.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/069.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/069.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/069.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/070.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/070.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/070.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/071.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/071.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/071.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/072.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/072.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/072.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/073.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/073.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/073.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/074.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/074.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/074.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/075.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/075.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/075.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/076.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/076.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/076.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/077.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/077.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/077.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/078.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/078.xml
new file mode 100644
index 000000000..fcab0cd7f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/078.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/079.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/079.xml
new file mode 100644
index 000000000..fcab0cd7f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/079.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/080.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/080.xml
new file mode 100644
index 000000000..fcab0cd7f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/080.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/081.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/081.xml
new file mode 100644
index 000000000..e356e7e4d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/081.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/082.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/082.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/082.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/083.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/083.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/083.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/084.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/084.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/084.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/085.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/085.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/085.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/086.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/086.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/086.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/087.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/087.xml
new file mode 100644
index 000000000..a9aa2074f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/087.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/088.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/088.xml
new file mode 100644
index 000000000..a5471011d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/088.xml
@@ -0,0 +1 @@
+<foo>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/089.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/089.xml
new file mode 100644
index 000000000..e01d86e8d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/089.xml
@@ -0,0 +1 @@
+ðô¿½ô¿¿
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/090.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/090.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/090.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/091.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/091.xml
new file mode 100644
index 000000000..dd3bbedf7
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/091.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/092.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/092.xml
new file mode 100644
index 000000000..87269f79d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/092.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/093.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/093.xml
new file mode 100644
index 000000000..631bfde91
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/093.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/094.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/094.xml
new file mode 100644
index 000000000..636ab4729
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/094.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/095.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/095.xml
new file mode 100644
index 000000000..a20706ee0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/095.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/096.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/096.xml
new file mode 100644
index 000000000..f898cc8c9
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/096.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/097.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/097.xml
new file mode 100644
index 000000000..e05cfe6c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/097.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/098.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/098.xml
new file mode 100644
index 000000000..f6408de9b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/098.xml
@@ -0,0 +1,2 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/099.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/099.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/099.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/100.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/100.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/100.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/101.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/101.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/101.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/102.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/102.xml
new file mode 100644
index 000000000..6e66b8da2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/102.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/103.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/103.xml
new file mode 100644
index 000000000..96495d45c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/103.xml
@@ -0,0 +1 @@
+<doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/104.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/104.xml
new file mode 100644
index 000000000..cc3def333
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/104.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/105.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/105.xml
new file mode 100644
index 000000000..5aed3d613
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/105.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/106.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/106.xml
new file mode 100644
index 000000000..1197d2ff9
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/106.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/107.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/107.xml
new file mode 100644
index 000000000..288f23cdf
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/107.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/108.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/108.xml
new file mode 100644
index 000000000..cc3def333
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/108.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/109.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/109.xml
new file mode 100644
index 000000000..c43bdf9b9
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/109.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/110.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/110.xml
new file mode 100644
index 000000000..a92237b4e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/110.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/111.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/111.xml
new file mode 100644
index 000000000..cc3def333
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/111.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/112.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/112.xml
new file mode 100644
index 000000000..c82f47bca
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/112.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/113.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/113.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/113.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/114.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/114.xml
new file mode 100644
index 000000000..8e0722aba
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/114.xml
@@ -0,0 +1 @@
+&foo;
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/115.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/115.xml
new file mode 100644
index 000000000..682b8140e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/115.xml
@@ -0,0 +1 @@
+v
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/116.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/116.xml
new file mode 100644
index 000000000..a79dff65f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/116.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/117.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/117.xml
new file mode 100644
index 000000000..ced7d0271
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/117.xml
@@ -0,0 +1 @@
+]
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/118.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/118.xml
new file mode 100644
index 000000000..31e37a939
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/118.xml
@@ -0,0 +1 @@
+]]
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/119.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/119.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/119.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/001.xml
new file mode 100644
index 000000000..b13acd2b8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/001.xml
@@ -0,0 +1,61 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/002.xml
new file mode 100644
index 000000000..759207aa2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/002.xml
@@ -0,0 +1,39 @@
+
+
+
+
+
+]>
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/003.xml
new file mode 100644
index 000000000..1335a779d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/003.xml
@@ -0,0 +1,40 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ '>
+]>
+
+&elinstance;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/004.xml
new file mode 100644
index 000000000..7cad5de26
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/004.xml
@@ -0,0 +1,17 @@
+
+
+
+
+
+
+'>
+]>
+
+&elinstance;
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/005.xml
new file mode 100644
index 000000000..7f05469e7
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/005.xml
@@ -0,0 +1,24 @@
+
+
+
+
+
+]>
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/006.xml
new file mode 100644
index 000000000..8f1287cb2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/006.xml
@@ -0,0 +1,22 @@
+
+
+
+
+
+]>
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/007.xml
new file mode 100644
index 000000000..9cecac24b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/007.xml
@@ -0,0 +1,26 @@
+
+
+
+
+
+]>
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/INDEX b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/INDEX
new file mode 100644
index 000000000..e8fccb052
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/INDEX
@@ -0,0 +1,12 @@
+001.xml tests whether additional white space in attribute value
+ is removed during normalization for every att type but
+ not for CDATA
+002.xml tests whether TABs, CRs, LFs, and CRLFs are converted
+ to spaces (only for CDATA, NMTOKEN, NMTOKENS)
+003.xml similar to 002.xml, but the attribute values occur
+ in internal entities
+004.xml tests whether CRLF normalization happens only once
+005.xml tests whether spaces, TABs, LFs, CRs, and CRLFs are correctly
+ processed if they are written as character references
+006.xml tests whether normalization is done before #FIXED comparison
+007.xml tests whether normalization is done before #FIXED comparison
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/001.xml
new file mode 100644
index 000000000..818ca6e52
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/001.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/002.xml
new file mode 100644
index 000000000..5167ac37d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/002.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/003.xml
new file mode 100644
index 000000000..782bcfbeb
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/003.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/004.xml
new file mode 100644
index 000000000..1b1c17198
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/004.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/005.xml
new file mode 100644
index 000000000..1f722da07
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/005.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/006.xml
new file mode 100644
index 000000000..3aefc89f0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/006.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/007.xml
new file mode 100644
index 000000000..3aefc89f0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/007.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/001.xml
new file mode 100644
index 000000000..4523f1b9f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/001.xml
@@ -0,0 +1,10 @@
+
+
+
+
+]>
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/002+.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/002+.xml
new file mode 100644
index 000000000..81a22e2d3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/002+.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/003.xml
new file mode 100644
index 000000000..45cd7eba7
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/003.xml
@@ -0,0 +1,13 @@
+
+
+]>
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/INDEX b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/INDEX
new file mode 100644
index 000000000..56368c527
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/INDEX
@@ -0,0 +1,12 @@
+This directory contains real regression tests, i.e. it is tested whether
+reported bugs have been fixed.
+
+001.xml 2000-08-26: Haruo's single quote bug. Attvalues delimited
+ by single quotes did not work for the UTF-8 lexer.
+002+.xml 2000-08-26: Haruo's file-names-are-not-URLs bug. from_file
+ interpreted the file name as URL-encoded string. "002+.xml"
+ because the "+" must not be decoded as space.
+003.xml 2000-08-26: Alain's bug that data nodes must not be merged
+ where PI nodes are created. In the "comments" directory
+ there is another test for the case that comments delimit
+ data material
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/out/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/out/001.xml
new file mode 100644
index 000000000..e9d83b3e6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/out/001.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/out/002+.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/out/002+.xml
new file mode 100644
index 000000000..794447b8f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/out/002+.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/out/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/out/003.xml
new file mode 100644
index 000000000..22bd71013
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/out/003.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/comments/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/comments/001.xml
new file mode 100644
index 000000000..fcb4a00af
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/comments/001.xml
@@ -0,0 +1,13 @@
+
+
+]>
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/comments/INDEX b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/comments/INDEX
new file mode 100644
index 000000000..0df471ff0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/comments/INDEX
@@ -0,0 +1 @@
+001 Checks whether enable_comment_nodes works
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/comments/out/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/comments/out/001.xml
new file mode 100644
index 000000000..f03911dd5
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/comments/out/001.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/001.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/001.ent
new file mode 100644
index 000000000..fde4af2e1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/001.ent
@@ -0,0 +1,2 @@
+
+]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/001.xml
new file mode 100644
index 000000000..02ef0bb08
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/001.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/002.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/002.ent
new file mode 100644
index 000000000..19c02066b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/002.ent
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/002.xml
new file mode 100644
index 000000000..0c5372cbb
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/002.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/003.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/003.ent
new file mode 100644
index 000000000..90bda5f94
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/003.ent
@@ -0,0 +1,3 @@
+
+]]>
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/003.xml
new file mode 100644
index 000000000..c4b33e49a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/003.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/004.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/004.ent
new file mode 100644
index 000000000..8ab56971b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/004.ent
@@ -0,0 +1,3 @@
+
+
+]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/004.xml
new file mode 100644
index 000000000..740d17301
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/004.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/005.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/005.ent
new file mode 100644
index 000000000..bd26a5466
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/005.ent
@@ -0,0 +1,4 @@
+
+
+]]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/005.xml
new file mode 100644
index 000000000..aa3a8f906
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/005.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/006.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/006.ent
new file mode 100644
index 000000000..7ff3cdd41
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/006.ent
@@ -0,0 +1,3 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/006.xml
new file mode 100644
index 000000000..bd2ee3261
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/006.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/007.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/007.ent
new file mode 100644
index 000000000..180c6b0da
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/007.ent
@@ -0,0 +1,4 @@
+
+">
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/007.xml
new file mode 100644
index 000000000..1c5bc80ef
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/007.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/008.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/008.ent
new file mode 100644
index 000000000..e36d707a2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/008.ent
@@ -0,0 +1,4 @@
+
+">]]>
+'>]]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/008.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/008.xml
new file mode 100644
index 000000000..c140c0a2a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/008.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/009.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/009.ent
new file mode 100644
index 000000000..617d3d9b5
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/009.ent
@@ -0,0 +1,3 @@
+
+ -->]]>
+ -->]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/009.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/009.xml
new file mode 100644
index 000000000..c75bbb65a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/009.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/010.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/010.ent
new file mode 100644
index 000000000..9a72698ad
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/010.ent
@@ -0,0 +1,5 @@
+
+]]>
+]]>
+]]>
+]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/010.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/010.xml
new file mode 100644
index 000000000..93b5cf6cf
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/010.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/011.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/011.ent
new file mode 100644
index 000000000..e24aad155
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/011.ent
@@ -0,0 +1,6 @@
+
+ ]]>
+
+]]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/011.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/011.xml
new file mode 100644
index 000000000..587ab5d10
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/011.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/INDEX b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/INDEX
new file mode 100644
index 000000000..0360292f7
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/INDEX
@@ -0,0 +1,20 @@
+001 IGNORE works:
+002 [precondition for 003] The first ATTLIST declaration for the same
+ attribute counts
+003 INCLUDE works:
+004 IGNORE works: with e="IGNORE"
+005 INCLUDE works: with e="INCLUDE"
+006 IGNORE works: ignoring a section that would
+ be illegal
+007 Within ignored sections references to parameter references are
+ not resolved.
+ NOTE: You cannot derive this directly from the XML spec. because a
+ precise definition what "ignoring" means is missing. This property
+ is an interpretation of the statement about reliable parsing in
+ section 3.4.
+008 Ignored sections may contain string literals containing "]]>".
+ NOTE: same problem with XML spec as 007
+009 Ignored sections may contain comments containing "]]>".
+ NOTE: same problem with XML spec as 007
+010 Nested conditional sections with outermost IGNORE
+011 Nested conditional sections with outermost INCLUDE
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/001.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/001.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/002.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/002.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/003.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/003.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/004.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/004.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/005.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/005.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/006.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/006.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/007.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/007.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/008.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/008.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/008.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/009.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/009.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/009.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/010.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/010.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/010.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/011.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/011.xml
new file mode 100644
index 000000000..7e8f18348
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/011.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/001.xml
new file mode 100644
index 000000000..091536317
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/001.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/002.xml
new file mode 100644
index 000000000..e993d3ab5
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/002.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/003.xml
new file mode 100644
index 000000000..c0bfad4b5
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/003.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/004.xml
new file mode 100644
index 000000000..889604db7
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/004.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/005.xml
new file mode 100644
index 000000000..609962b82
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/005.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/006.xml
new file mode 100644
index 000000000..265d27201
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/006.xml
@@ -0,0 +1,7 @@
+
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/007.xml
new file mode 100644
index 000000000..56fce5363
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/007.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/INDEX b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/INDEX
new file mode 100644
index 000000000..21b68b931
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/INDEX
@@ -0,0 +1,14 @@
+
+
+001.xml Whether it works for undeclared elements
+002.xml Whether it works for undeclared elements with attributes
+003.xml Whether it works for undeclared elements in declarations
+004.xml Whether it works for undeclared root elements
+005.xml Whether it works for undeclared notations
+006.xml Whether it works for undeclared notations which are actually
+ referred to
+
+
+
+007.xml Whether it works
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/001.xml
new file mode 100644
index 000000000..61040a81b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/001.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/002.xml
new file mode 100644
index 000000000..d894fe8d8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/002.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/003.xml
new file mode 100644
index 000000000..61040a81b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/003.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/004.xml
new file mode 100644
index 000000000..61040a81b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/004.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/005.xml
new file mode 100644
index 000000000..7df325ba2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/005.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/006.xml
new file mode 100644
index 000000000..f5d7becdd
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/006.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/007.xml
new file mode 100644
index 000000000..8685a8aa0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/007.xml
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/run_canonxml b/helm/DEVEL/pxp/pxp/rtests/canonxml/run_canonxml
new file mode 100755
index 000000000..2c40807dd
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/run_canonxml
@@ -0,0 +1,31 @@
+#! /bin/bash
+
+check_dir () {
+ dir="$1"
+ shift
+ xmlfiles=`cd $dir && echo *.xml`
+ for file in $xmlfiles; do
+ echo -n "File $dir/$file: "
+ ./test_canonxml "$@" "$dir/$file" >out.xml
+ if cmp out.xml "$dir/out/$file"; then
+ echo "OK"
+ else
+ echo "NOT OK"
+ read
+ fi
+ done
+}
+
+check_dir "data_valid/conditional"
+check_dir "data_valid/att_normalization"
+check_dir "data_valid/optional_decls"
+check_dir "data_valid/comments" -comments
+check_dir "data_valid/bugfixes"
+
+#check_dir "data_jclark_valid/sa-problems"
+#check_dir "data_jclark_valid/ext-sa-problems"
+check_dir "data_jclark_valid/sa"
+check_dir "data_jclark_valid/not-sa"
+check_dir "data_jclark_valid/ext-sa"
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/test_canonxml.ml b/helm/DEVEL/pxp/pxp/rtests/canonxml/test_canonxml.ml
new file mode 100644
index 000000000..ef83a28fc
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/canonxml/test_canonxml.ml
@@ -0,0 +1,239 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+open Pxp_document;;
+open Pxp_yacc;;
+open Pxp_types;;
+
+let error_happened = ref false;;
+
+let rec prerr_error e =
+ prerr_endline (string_of_exn e)
+;;
+
+class warner =
+ object
+ method warn w =
+ prerr_endline ("WARNING: " ^ w)
+ end
+;;
+
+let outbuf = String.create 8192;;
+
+let output_utf8 config s =
+ match config.encoding with
+ `Enc_utf8 ->
+ print_string s
+ | `Enc_iso88591 ->
+ for i = 0 to String.length s - 1 do
+ let c = Char.code(s.[i]) in
+ if c <= 127 then
+ print_char(Char.chr(c))
+ else begin
+ print_char(Char.chr(0xc0 lor (c lsr 6)));
+ print_char(Char.chr(0x80 lor (c land 0x3f)));
+ end
+ done
+ | _ -> assert false
+;;
+
+
+let re = Str.regexp "[&<>\"\009\010\013]";;
+
+let escaped s =
+ Str.global_substitute
+ re
+ (fun _ ->
+ match Str.matched_string s with
+ "&" -> "&"
+ | "<" -> "<"
+ | ">" -> ">"
+ | "\"" -> """
+ | "\009" -> " "
+ | "\010" -> "
"
+ | "\013" -> "
"
+ | _ -> assert false
+ )
+ s
+;;
+
+
+let rec output_xml config n =
+ match n # node_type with
+ T_super_root ->
+ n # iter_nodes (output_xml config)
+ | T_pinstr pi_name ->
+ let [ pi ] = n # pinstr pi_name in
+ output_utf8 config "";
+ output_utf8 config (pi # target);
+ output_utf8 config " ";
+ output_utf8 config (pi # value);
+ output_utf8 config "?>";
+ | T_element name ->
+ output_utf8 config "<";
+ output_utf8 config name;
+ let sorted_attnames =
+ Sort.list ( <= ) (n # attribute_names) in
+ List.iter
+ (fun attname ->
+ match n # attribute attname with
+ Value v ->
+ output_utf8 config " ";
+ output_utf8 config attname;
+ output_utf8 config "=\"";
+ output_utf8 config (escaped v);
+ output_utf8 config "\"";
+ | Valuelist vl ->
+ let v = String.concat " " vl in
+ output_utf8 config " ";
+ output_utf8 config attname;
+ output_utf8 config "=\"";
+ output_utf8 config (escaped v);
+ output_utf8 config "\"";
+ | Implied_value ->
+ ()
+ )
+ sorted_attnames;
+ output_utf8 config ">";
+ n # iter_nodes (output_xml config);
+ output_utf8 config "";
+ output_utf8 config name;
+ output_utf8 config ">";
+ | T_data ->
+ let v = n # data in
+ output_utf8 config (escaped v)
+ | T_comment ->
+ let v =
+ match n # comment with
+ None -> assert false
+ | Some x -> x
+ in
+ output_utf8 config ("")
+ | _ ->
+ assert false
+;;
+
+
+let parse debug wf iso88591 comments filename =
+ let spec =
+ let e = new element_impl default_extension in
+ e # keep_always_whitespace_mode;
+ make_spec_from_mapping
+ ~super_root_exemplar: e
+ ~default_pinstr_exemplar: e
+ ~comment_exemplar: e
+ ~data_exemplar: (new data_impl default_extension)
+ ~default_element_exemplar: e
+ ~element_mapping: (Hashtbl.create 1)
+ ()
+ in
+ let config =
+ { default_config with
+ warner = new warner;
+ debugging_mode = debug;
+ enable_pinstr_nodes = true;
+ enable_super_root_node = true;
+ enable_comment_nodes = comments;
+ encoding = if iso88591 then `Enc_iso88591 else `Enc_utf8;
+ idref_pass = true;
+ }
+ in
+ try
+ let parse_fn =
+ if wf then parse_wfdocument_entity
+ else
+ let index = new hash_index in
+ parse_document_entity
+ ?transform_dtd:None
+ ~id_index:(index :> 'ext index)
+ in
+ let tree =
+ parse_fn
+ config
+ (from_file filename)
+ spec
+ in
+ output_xml config (tree # root)
+ with
+ e ->
+ error_happened := true;
+ prerr_error e
+;;
+
+
+let main() =
+ let debug = ref false in
+ let wf = ref false in
+ let iso88591 = ref false in
+ let comments = ref false in
+ let files = ref [] in
+ Arg.parse
+ [ "-d", Arg.Set debug,
+ " turn debugging mode on";
+ "-wf", Arg.Set wf,
+ " check only on well-formedness";
+ "-iso-8859-1", Arg.Set iso88591,
+ " use ISO-8859-1 as internal encoding instead of UTF-8";
+ "-comments", Arg.Set comments,
+ " output comments, too";
+ ]
+ (fun x -> files := x :: !files)
+ "
+usage: test_canonxml [options] file ...
+
+List of options:";
+ files := List.rev !files;
+ List.iter (parse !debug !wf !iso88591 !comments) !files;
+;;
+
+
+main();
+if !error_happened then exit(1);;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.8 2000/08/17 00:51:57 gerd
+ * Added -comments option to test enable_comment_nodes.
+ *
+ * Revision 1.7 2000/08/16 23:44:17 gerd
+ * Updates because of changes of the PXP API.
+ *
+ * Revision 1.6 2000/07/14 14:56:55 gerd
+ * Updated: warner.
+ *
+ * Revision 1.5 2000/07/14 14:17:58 gerd
+ * Updated because of iterface changes.
+ *
+ * Revision 1.4 2000/07/09 01:06:20 gerd
+ * Updated.
+ *
+ * Revision 1.3 2000/06/04 20:31:03 gerd
+ * Updates because of renamed PXP modules.
+ *
+ * Revision 1.2 2000/05/20 20:34:28 gerd
+ * Changed for UTF-8 support.
+ *
+ * Revision 1.1 2000/04/30 20:13:01 gerd
+ * Initial revision.
+ *
+ * Revision 1.3 1999/11/09 22:27:30 gerd
+ * The programs returns now an exit code of 1 if one of the
+ * XML files produces an error.
+ *
+ * Revision 1.2 1999/09/01 23:09:56 gerd
+ * Added the option -wf that switches to well-formedness checking
+ * instead of validation.
+ *
+ * Revision 1.1 1999/08/14 22:20:53 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/rtests/codewriter/Makefile b/helm/DEVEL/pxp/pxp/rtests/codewriter/Makefile
new file mode 100644
index 000000000..bacc75ce0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/codewriter/Makefile
@@ -0,0 +1,28 @@
+# make validate: make bytecode executable
+# make validate.opt: make native executable
+# make clean: remove intermediate files (in this directory)
+# make CLEAN: remove intermediate files (recursively)
+# make distclean: remove any superflous files (recursively)
+#----------------------------------------------------------------------
+
+OCAMLPATH=../..
+
+compile: compile.ml
+ ocamlfind ocamlc -g -custom -o compile -package .,str -linkpkg compile.ml
+
+#----------------------------------------------------------------------
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa sample sample.ml out1 out2
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~
+ rm -f compile
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/codewriter/compile.ml b/helm/DEVEL/pxp/pxp/rtests/codewriter/compile.ml
new file mode 100644
index 000000000..1bd6e2266
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/codewriter/compile.ml
@@ -0,0 +1,131 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+open Pxp_document;;
+open Pxp_yacc;;
+open Pxp_types;;
+
+let error_happened = ref false;;
+
+let rec prerr_error e =
+ prerr_endline (string_of_exn e)
+;;
+
+
+class warner =
+ object
+ method warn w =
+ prerr_endline ("WARNING: " ^ w)
+ end
+;;
+
+
+let compile in_filename out_filename print super_root pis comments =
+ let spec =
+ let e = new element_impl default_extension in
+ make_spec_from_mapping
+ ~super_root_exemplar: e
+ ~default_pinstr_exemplar: e
+ ~comment_exemplar: e
+ ~data_exemplar: (new data_impl default_extension)
+ ~default_element_exemplar: e
+ ~element_mapping: (Hashtbl.create 1)
+ ()
+ in
+ let config =
+ { default_config with
+ encoding = `Enc_utf8;
+ warner = new warner;
+ enable_super_root_node = super_root;
+ enable_pinstr_nodes = pis;
+ enable_comment_nodes = comments;
+ }
+ in
+ try
+ let tree =
+ parse_document_entity
+ config
+ (from_file in_filename)
+ spec
+ in
+
+ let ch = open_out out_filename in
+ Pxp_codewriter.write_document ch tree;
+ output_string ch "(create_document (new Pxp_types.drop_warnings) Pxp_yacc.default_spec) # write (Pxp_types.Out_channel stdout) `Enc_utf8;;\n";
+ close_out ch;
+
+ if print then
+ tree # write (Out_channel stdout) `Enc_utf8;
+ with
+ e ->
+ error_happened := true;
+ prerr_error e
+;;
+
+
+let main() =
+ let in_file = ref "" in
+ let out_file = ref "" in
+ let print_file = ref false in
+ let super_root = ref false in
+ let pis = ref false in
+ let comments = ref false in
+ Arg.parse
+ [ "-in", (Arg.String (fun s -> in_file := s)),
+ " Set the XML file to read";
+ "-out", (Arg.String (fun s -> out_file := s)),
+ " Set the Ocaml file to write";
+ "-print", (Arg.Set print_file),
+ " Print the XML file in standard form";
+ "-super-root", Arg.Set super_root,
+ " Generate a super root node";
+ "-pis", Arg.Set pis,
+ " Generate wrapper nodes for processing instructions";
+ "-comments", Arg.Set comments,
+ " Generate nodes for comments";
+ ]
+ (fun x -> raise (Arg.Bad "Unexpected argument"))
+ "
+usage: compile [ options ]
+
+List of options:";
+ if !in_file = "" then begin
+ prerr_endline "No input file specified.";
+ exit 1
+ end;
+ if !out_file = "" then begin
+ prerr_endline "No output file specified.";
+ exit 1
+ end;
+ compile !in_file !out_file !print_file !super_root !pis !comments
+;;
+
+
+main();
+if !error_happened then exit(1);;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:35 lpadovan
+ * Initial revision
+ *
+ * Revision 1.4 2000/08/17 01:20:15 gerd
+ * Update: Also tested whether super root nodes, pinstr nodes
+ * and comment nodes work.
+ * Note: comment nodes are not fully tested yet.
+ *
+ * Revision 1.3 2000/08/16 23:44:19 gerd
+ * Updates because of changes of the PXP API.
+ *
+ * Revision 1.2 2000/07/16 17:54:15 gerd
+ * Updated because of PXP interface changes.
+ *
+ * Revision 1.1 2000/07/09 00:33:32 gerd
+ * Initial revision.
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/rtests/codewriter/run_codewriter b/helm/DEVEL/pxp/pxp/rtests/codewriter/run_codewriter
new file mode 100755
index 000000000..08e0a428e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/codewriter/run_codewriter
@@ -0,0 +1,3 @@
+#! /bin/sh
+
+./test_codewriter sample001.xml
diff --git a/helm/DEVEL/pxp/pxp/rtests/codewriter/sample001.xml b/helm/DEVEL/pxp/pxp/rtests/codewriter/sample001.xml
new file mode 100644
index 000000000..4516b71c8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/codewriter/sample001.xml
@@ -0,0 +1,44 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+]>
+
+
+
+
+
+
+ This is text!
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/codewriter/test_codewriter b/helm/DEVEL/pxp/pxp/rtests/codewriter/test_codewriter
new file mode 100755
index 000000000..769b6b944
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/codewriter/test_codewriter
@@ -0,0 +1,18 @@
+#! /bin/sh
+
+set -e
+
+sample="$1"
+echo "Testing $sample:"
+./compile -in "$sample" -out "sample.ml" -print -super-root -pis -comments >"out1"
+echo "- code written to sample.ml, formatted data to out1"
+OCAMLPATH=../.. ocamlfind ocamlc -package . -linkpkg -custom sample.ml -o sample
+echo "- sample.ml compiled to sample"
+./sample >out2
+echo "- re-read data written to out2"
+if cmp out1 out2; then
+ echo "- out1 and out2 are identical! OK"
+else
+ echo "- out1 and out2 differ! FAILURE!"
+ exit 1
+fi
diff --git a/helm/DEVEL/pxp/pxp/rtests/dumpfiles.ml b/helm/DEVEL/pxp/pxp/rtests/dumpfiles.ml
new file mode 100644
index 000000000..d5f9e5071
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/dumpfiles.ml
@@ -0,0 +1,56 @@
+
+
+let dump_file name =
+ let ch = open_in_bin name in
+ let len = in_channel_length ch in
+ let sin = String.create len in
+ really_input ch sin 0 len;
+ close_in ch;
+
+ Printf.printf "\\noindent\\begin{minipage}{5.5cm}\n";
+ (* Printf.printf "\\rule{5.5cm}{1pt}\n"; *)
+ Printf.printf "\\footnotesize\\bf File %s:\\\\\n" name;
+ Printf.printf "\\tt{}";
+
+ for i = 0 to len - 1 do
+ match sin.[i] with
+ ('\000'..'\008'|'\011'|'\012'|'\014'..'\031'|'\127'..'\255') as c ->
+ Printf.printf "{\\sl (%02x)}\\linebreak[2]" (Char.code c)
+ | '\009' ->
+ Printf.printf "{\\sl HT}\\linebreak[3]"
+ | '\010' ->
+ Printf.printf "{\\sl LF}\\\\\n"
+ | '\013' ->
+ Printf.printf "{\\sl CR}";
+ if not(i < len - 1 && sin.[i+1] = '\010') then
+ Printf.printf "\\\\\n";
+ | ' ' ->
+ Printf.printf "\\symbol{32}\\linebreak[3]"
+
+ | ('"'|'#'|'$'|'%'|'&'|'-'|'<'|'>'|'['|'\\'|']'|'^'|'_'|'`'|
+ '{'|'|'|'}'|'~') as c ->
+ Printf.printf "\\symbol{%d}\\linebreak[2]" (Char.code c)
+ | c ->
+ print_char c;
+ print_string "\\linebreak[0]"
+ done;
+
+ Printf.printf "\\mbox{}\\\\\n";
+ Printf.printf "\\rule{5.5cm}{1pt}\n";
+ Printf.printf "\\end{minipage}\n"
+;;
+
+
+print_endline "\\documentclass[a4paper]{article}";
+print_endline "\\usepackage{multicol}";
+print_endline "\\begin{document}";
+print_endline "\\begin{multicols}{2}";
+for i = 1 to Array.length(Sys.argv)-1 do
+ dump_file Sys.argv.(i)
+done;
+print_endline "\\end{multicols}";
+print_endline "\\end{document}"
+;;
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/Makefile b/helm/DEVEL/pxp/pxp/rtests/negative/Makefile
new file mode 100644
index 000000000..2e8842c16
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/Makefile
@@ -0,0 +1,28 @@
+# make test_negative: make bytecode executable
+# make clean: remove intermediate files (in this directory)
+# make CLEAN: remove intermediate files (recursively)
+# make distclean: remove any superflous files (recursively)
+#----------------------------------------------------------------------
+
+OCAMLPATH=../..
+
+test_negative: test_negative.ml
+ ocamlfind ocamlc -custom -o test_negative -package .,str -linkpkg test_negative.ml
+
+#----------------------------------------------------------------------
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa current.out
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~
+ rm -f test_negative
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/README b/helm/DEVEL/pxp/pxp/rtests/negative/README
new file mode 100644
index 000000000..cfdaf38a3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/README
@@ -0,0 +1,25 @@
+----------------------------------------------------------------------
+Regression test "negative":
+----------------------------------------------------------------------
+
+- An errorneous XML file is parsed, and the error message is printed.
+
+- The output is compared with a reference file. The test is only
+ passed if the output and the reference are equal.
+
+- Test data "data_jclark_notwf":
+ Contains the samples by James Clark that are not well-formed.
+ The subdirectories:
+ - sa: standalone documents
+ - not-sa: non-standalone document (with external DTD)
+ - ext-sa: non-standalone document (with other external entity)
+
+- Test data "data_jclark_invalid":
+ Contains the samples by James Clark that are invalid.
+
+- Tests that are not passed have been moved into the *-problems directories.
+ The reason is typically that characters have been used not in the
+ Latin 1 character set.
+
+- Test data "data_notwf":
+ Contains own tests with samples that are not well-formed.
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/001.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/001.out
new file mode 100644
index 000000000..2d51ff6e2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/001.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/001.xml", at line 5, position 3:
+ERROR (Validity constraint): The root element is `b' but is declared as `a
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/001.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/001.xml
new file mode 100644
index 000000000..9a346e789
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/001.xml
@@ -0,0 +1,5 @@
+
+
+]>
+x
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/010.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/010.out
new file mode 100644
index 000000000..8a2f8d6a5
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/010.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/010.xml", at line 7, position 14:
+ERROR (Validity constraint): Attribute `id' is lexically malformed
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/010.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/010.xml
new file mode 100644
index 000000000..16e0e237f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/010.xml
@@ -0,0 +1,7 @@
+
+
+
+]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/011.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/011.out
new file mode 100644
index 000000000..a03a2046b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/011.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/011.xml", at line 10, position 17:
+ERROR (Validity constraint): ID not unique
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/011.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/011.xml
new file mode 100644
index 000000000..6aa309120
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/011.xml
@@ -0,0 +1,11 @@
+
+
+
+
+]>
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/012.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/012.out
new file mode 100644
index 000000000..187096309
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/012.out
@@ -0,0 +1,3 @@
+WARNING: More than one ATTLIST declaration for element type `el'
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/012.xml", at line 6, position 1:
+ERROR (Validity constraint): More than one ID attribute for element `el'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/012.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/012.xml
new file mode 100644
index 000000000..d5bd7faf5
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/012.xml
@@ -0,0 +1,8 @@
+
+
+
+
+]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/013.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/013.out
new file mode 100644
index 000000000..98092c369
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/013.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/013.xml", at line 5, position 1:
+ERROR (Validity constraint): ID attribute must be #IMPLIED or #REQUIRED; element `el'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/013.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/013.xml
new file mode 100644
index 000000000..7d321a433
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/013.xml
@@ -0,0 +1,7 @@
+
+
+
+]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/014.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/014.out
new file mode 100644
index 000000000..ec1247ba5
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/014.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/014.xml", at line 5, position 1:
+ERROR (Validity constraint): ID attribute must be #IMPLIED or #REQUIRED; element `el'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/014.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/014.xml
new file mode 100644
index 000000000..a56220d1d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/014.xml
@@ -0,0 +1,7 @@
+
+
+
+]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/015.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/015.out
new file mode 100644
index 000000000..18dd9c93d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/015.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/015.xml", at line 7, position 17:
+ERROR (Validity constraint): Attribute `idref' is lexically malformed
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/015.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/015.xml
new file mode 100644
index 000000000..d707f890c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/015.xml
@@ -0,0 +1,7 @@
+
+
+
+]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/016.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/016.out
new file mode 100644
index 000000000..419228677
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/016.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/016.xml", at line 7, position 22:
+ERROR (Validity constraint): Attribute `idrefs' is lexically malformed
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/016.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/016.xml
new file mode 100644
index 000000000..7ea94555a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/016.xml
@@ -0,0 +1,7 @@
+
+
+
+]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/017.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/017.out
new file mode 100644
index 000000000..97115e6ed
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/017.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/017.xml" at line 12, position 2:
+ERROR (Validity constraint): Attribute `idref' of element `el' refers to unknown ID `a20'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/017.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/017.xml
new file mode 100644
index 000000000..da6e58210
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/017.xml
@@ -0,0 +1,13 @@
+
+
+
+
+]>
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/018.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/018.out
new file mode 100644
index 000000000..8d761c95f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/018.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/018.xml" at line 12, position 2:
+ERROR (Validity constraint): Attribute `idrefs' of element `el' refers to unknown ID `a20'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/018.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/018.xml
new file mode 100644
index 000000000..c3f011b1a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/018.xml
@@ -0,0 +1,13 @@
+
+
+
+
+]>
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/019.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/019.out
new file mode 100644
index 000000000..661724568
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/019.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/019.xml", at line 6, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `x'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/019.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/019.xml
new file mode 100644
index 000000000..e10ed94ec
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/019.xml
@@ -0,0 +1,8 @@
+
+
+
+
+]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/020.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/020.out
new file mode 100644
index 000000000..a95a6a738
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/020.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/020.xml", at line 6, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `x'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/020.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/020.xml
new file mode 100644
index 000000000..4f696dd40
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/020.xml
@@ -0,0 +1,8 @@
+
+
+
+
+]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/021.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/021.out
new file mode 100644
index 000000000..bb950c74a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/021.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/021.xml", at line 6, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `x'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/021.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/021.xml
new file mode 100644
index 000000000..64662ac80
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/021.xml
@@ -0,0 +1,8 @@
+
+
+
+
+]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/022.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/022.out
new file mode 100644
index 000000000..2b8e09185
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/022.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/022.xml", at line 6, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `x'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/022.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/022.xml
new file mode 100644
index 000000000..3e8fe89dc
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/022.xml
@@ -0,0 +1,8 @@
+
+
+
+
+]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/023.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/023.out
new file mode 100644
index 000000000..4775ce1a5
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/023.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/023.xml", at line 6, position 13:
+ERROR (Validity constraint): Attribute `nm' is lexically malformed
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/023.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/023.xml
new file mode 100644
index 000000000..acf09e175
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/023.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/024.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/024.out
new file mode 100644
index 000000000..ad436a128
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/024.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/024.xml", at line 6, position 17:
+ERROR (Validity constraint): Attribute `nms' is lexically malformed
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/024.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/024.xml
new file mode 100644
index 000000000..cdeabbc90
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/024.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/025.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/025.out
new file mode 100644
index 000000000..f832e5d37
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/025.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/025.xml", at line 5, position 1:
+ERROR (Validity constraint): Default value for attribute `idref' is lexically malformed
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/025.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/025.xml
new file mode 100644
index 000000000..6b5531c3f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/025.xml
@@ -0,0 +1,7 @@
+
+
+
+]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/026.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/026.out
new file mode 100644
index 000000000..2e311ed19
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/026.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/026.xml", at line 5, position 1:
+ERROR (Validity constraint): Default value for attribute `idrefs' is lexically malformed
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/026.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/026.xml
new file mode 100644
index 000000000..75757c46d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/026.xml
@@ -0,0 +1,7 @@
+
+
+
+]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/027.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/027.out
new file mode 100644
index 000000000..f753d46d4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/027.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/027.xml", at line 6, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `x'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/027.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/027.xml
new file mode 100644
index 000000000..9c1fffbdb
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/027.xml
@@ -0,0 +1,8 @@
+
+
+
+
+]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/028.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/028.out
new file mode 100644
index 000000000..b8c4c8337
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/028.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/028.xml", at line 6, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `x'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/028.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/028.xml
new file mode 100644
index 000000000..2165803b2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/028.xml
@@ -0,0 +1,8 @@
+
+
+
+
+]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/029.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/029.out
new file mode 100644
index 000000000..3e71466f4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/029.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/029.xml", at line 5, position 1:
+ERROR (Validity constraint): Default value for attribute `nm' is lexically malformed
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/029.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/029.xml
new file mode 100644
index 000000000..a9cc68fe7
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/029.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/030.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/030.out
new file mode 100644
index 000000000..1c25ca8ce
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/030.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/030.xml", at line 5, position 1:
+ERROR (Validity constraint): Default value for attribute `nms' is lexically malformed
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/030.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/030.xml
new file mode 100644
index 000000000..da8a26c07
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/030.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/031.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/031.out
new file mode 100644
index 000000000..07ff0ff4f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/031.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/031.xml", at line 6, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `jpeg'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/031.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/031.xml
new file mode 100644
index 000000000..16d3b3bb1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/031.xml
@@ -0,0 +1,7 @@
+
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/032.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/032.out
new file mode 100644
index 000000000..5a5b4b6e5
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/032.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/032.xml", at line 6, position 1:
+ERROR (Validity constraint): Illegal default value for attribute `n' in declaration for element `el'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/032.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/032.xml
new file mode 100644
index 000000000..f4e2d4d17
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/032.xml
@@ -0,0 +1,7 @@
+
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/033.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/033.out
new file mode 100644
index 000000000..de96bda6a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/033.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/033.xml", at line 7, position 14:
+ERROR (Validity constraint): Attribute `n' does not match one of the declared notation names
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/033.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/033.xml
new file mode 100644
index 000000000..0ca58da48
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/033.xml
@@ -0,0 +1,7 @@
+
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/034.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/034.out
new file mode 100644
index 000000000..e1cca97a1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/034.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/034.xml", at line 8, position 1:
+ERROR (Validity constraint): More than one NOTATION attribute for element `el'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/034.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/034.xml
new file mode 100644
index 000000000..10ee38f0a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/034.xml
@@ -0,0 +1,9 @@
+
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/035.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/035.out
new file mode 100644
index 000000000..9c3ea4513
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/035.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/035.xml", at line 5, position 1:
+ERROR (Validity constraint): Illegal default value for attribute `enum' in declaration for element `el'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/035.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/035.xml
new file mode 100644
index 000000000..a42060c87
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/035.xml
@@ -0,0 +1,7 @@
+
+
+
+]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/036.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/036.out
new file mode 100644
index 000000000..39e3f7793
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/036.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/036.xml", at line 7, position 14:
+ERROR (Validity constraint): Attribute `enum' does not match one of the declared enumerator tokens
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/036.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/036.xml
new file mode 100644
index 000000000..12cf01d3d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/036.xml
@@ -0,0 +1,7 @@
+
+
+
+]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/037.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/037.out
new file mode 100644
index 000000000..afa614c30
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/037.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/037.xml", at line 7, position 5:
+ERROR (Validity constraint): Required attribute `x' is missing
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/037.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/037.xml
new file mode 100644
index 000000000..2619e6010
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/037.xml
@@ -0,0 +1,7 @@
+
+
+
+]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/038.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/038.out
new file mode 100644
index 000000000..902d7cd78
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/038.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/038.xml", at line 7, position 13:
+ERROR (Validity constraint): Attribute `x' is fixed, but has here a different value
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/038.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/038.xml
new file mode 100644
index 000000000..2ad30db88
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/038.xml
@@ -0,0 +1,7 @@
+
+
+
+]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/060.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/060.ent
new file mode 100644
index 000000000..8c23f3e1f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/060.ent
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/060.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/060.out
new file mode 100644
index 000000000..29296ae7c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/060.out
@@ -0,0 +1,4 @@
+WARNING: More than one ATTLIST declaration for element type `el'
+WARNING: More than one ATTLIST declaration for element type `el'
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/060.xml", at line 17, position 12:
+ERROR (Validity constraint): Attribute `v3' of element type `el' violates standalone declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/060.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/060.xml
new file mode 100644
index 000000000..1cc0f2d37
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/060.xml
@@ -0,0 +1,19 @@
+
+
+
+
+
+'>
+%declare_v2;
+
+%declare_v3;
+]>
+
+
+
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/061.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/061.ent
new file mode 100644
index 000000000..03674e93d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/061.ent
@@ -0,0 +1,2 @@
+'>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/061.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/061.out
new file mode 100644
index 000000000..40d1d076f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/061.out
@@ -0,0 +1,4 @@
+WARNING: More than one ATTLIST declaration for element type `el'
+WARNING: More than one ATTLIST declaration for element type `el'
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/061.xml", at line 18, position 12:
+ERROR (Validity constraint): Attribute `v3' of element type `el' violates standalone declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/061.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/061.xml
new file mode 100644
index 000000000..226df7889
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/061.xml
@@ -0,0 +1,20 @@
+
+
+
+
+
+'>
+%declare_v2;
+
+%declare_declare_v3;
+%declare_v3;
+]>
+
+
+
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/062.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/062.ent
new file mode 100644
index 000000000..8c23f3e1f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/062.ent
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/062.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/062.out
new file mode 100644
index 000000000..b0266135e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/062.out
@@ -0,0 +1,4 @@
+WARNING: More than one ATTLIST declaration for element type `el'
+WARNING: More than one ATTLIST declaration for element type `el'
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/062.xml", at line 15, position 12:
+ERROR (Validity constraint): Attribute `v3' of element type `el' violates standalone declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/062.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/062.xml
new file mode 100644
index 000000000..458ac28cd
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/062.xml
@@ -0,0 +1,17 @@
+
+
+
+
+
+'>
+%declare_v2;
+]>
+
+
+
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/063.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/063.ent
new file mode 100644
index 000000000..2d72317e0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/063.ent
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/063.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/063.out
new file mode 100644
index 000000000..275ca29af
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/063.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/063.xml", at line 15, position 2:
+ERROR (Validity constraint): Reference to entity `e3' violates standalone declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/063.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/063.xml
new file mode 100644
index 000000000..18b7061bf
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/063.xml
@@ -0,0 +1,17 @@
+
+
+
+
+'>
+%declare_e2;
+
+%declare_e3;
+]>
+
+
+ &e1;
+ &e2;
+ &e3;
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/064.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/064.ent
new file mode 100644
index 000000000..2d72317e0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/064.ent
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/064.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/064.out
new file mode 100644
index 000000000..d825206ea
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/064.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/064.xml", at line 17, position 10:
+ERROR (Validity constraint): Reference to entity `e3' violates standalone declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/064.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/064.xml
new file mode 100644
index 000000000..4fb66cdc2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/064.xml
@@ -0,0 +1,19 @@
+
+
+
+
+'>
+%declare_e2;
+
+%declare_e3;
+
+
+]>
+
+
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/065.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/065.ent
new file mode 100644
index 000000000..2d72317e0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/065.ent
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/065.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/065.out
new file mode 100644
index 000000000..e2591c632
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/065.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/065.xml", at line 13, position 24:
+ERROR (Validity constraint): Reference to entity `e3' violates standalone declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/065.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/065.xml
new file mode 100644
index 000000000..3a52645a2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/065.xml
@@ -0,0 +1,23 @@
+
+
+
+
+'>
+%declare_e2;
+
+%declare_e3;
+
+
+]>
+
+
+
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/066.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/066.ent
new file mode 100644
index 000000000..136d73e65
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/066.ent
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/066.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/066.out
new file mode 100644
index 000000000..d14209f38
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/066.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/066.xml", at line 13, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `n3'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/066.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/066.xml
new file mode 100644
index 000000000..a1cd7a6a9
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/066.xml
@@ -0,0 +1,20 @@
+
+
+
+
+'>
+%declare_e2;
+
+%declare_e3;
+
+
+]>
+
+
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/067.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/067.ent
new file mode 100644
index 000000000..136d73e65
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/067.ent
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/067.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/067.out
new file mode 100644
index 000000000..117eda985
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/067.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/067.xml", at line 15, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `n3'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/067.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/067.xml
new file mode 100644
index 000000000..bb4edd353
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/067.xml
@@ -0,0 +1,23 @@
+
+
+
+
+'>
+%declare_e2;
+
+%declare_e3;
+
+
+]>
+
+
+
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/068.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/068.ent
new file mode 100644
index 000000000..5a19ad888
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/068.ent
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/068.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/068.out
new file mode 100644
index 000000000..394255683
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/068.out
@@ -0,0 +1,4 @@
+WARNING: More than one ATTLIST declaration for element type `el'
+WARNING: More than one ATTLIST declaration for element type `el'
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/068.xml", at line 19, position 23:
+ERROR (Validity constraint): Attribute `v3' of element type `el' violates standalone declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/068.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/068.xml
new file mode 100644
index 000000000..31c29f816
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/068.xml
@@ -0,0 +1,21 @@
+
+
+
+
+
+'>
+%declare_v2;
+
+%declare_v3;
+]>
+
+
+
+
+
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/069.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/069.ent
new file mode 100644
index 000000000..4bad1990e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/069.ent
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/069.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/069.out
new file mode 100644
index 000000000..ca71500fc
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/069.out
@@ -0,0 +1,4 @@
+WARNING: More than one ATTLIST declaration for element type `el'
+WARNING: More than one ATTLIST declaration for element type `el'
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/069.xml", at line 19, position 27:
+ERROR (Validity constraint): Attribute `v3' of element type `el' violates standalone declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/069.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/069.xml
new file mode 100644
index 000000000..a0f1b7059
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/069.xml
@@ -0,0 +1,21 @@
+
+
+
+
+
+'>
+%declare_v2;
+
+%declare_v3;
+]>
+
+
+
+
+
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/070.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/070.ent
new file mode 100644
index 000000000..1792bb905
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/070.ent
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/070.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/070.out
new file mode 100644
index 000000000..4bbfb52d6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/070.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/070.xml", at line 19, position 32:
+ERROR (Validity constraint): Element `outer3' violates standalone declaration because extra white space separates the sub elements
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/070.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/070.xml
new file mode 100644
index 000000000..85e400abd
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/070.xml
@@ -0,0 +1,21 @@
+
+
+
+
+
+'>
+%declare_outer2;
+
+%declare_outer3;
+]>
+
+
+
+
+
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/080.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/080.out
new file mode 100644
index 000000000..174afa06d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/080.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/080.xml", at line 4, position 0:
+ERROR (Validity constraint): The content model of element `b' is not deterministic
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/080.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/080.xml
new file mode 100644
index 000000000..8deeef449
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/080.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/081.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/081.out
new file mode 100644
index 000000000..1f3cfaa9f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/081.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/081.xml", at line 4, position 0:
+ERROR (Validity constraint): The content model of element `b' is not deterministic
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/081.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/081.xml
new file mode 100644
index 000000000..293eeae53
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/081.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/INDEX b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/INDEX
new file mode 100644
index 000000000..e2090fd67
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/INDEX
@@ -0,0 +1,75 @@
+----------------------------------------
+Root element
+----------------------------------------
+
+001.xml Declared root element type matches actual root element type
+
+----------------------------------------
+Attributes
+----------------------------------------
+
+010.xml ID attributes must match the Name production (not nmtoken)
+011.xml ID attributes uniquely identify the element bearing them
+ *** TODO ***
+012.xml No element type must have several ID attributes declared
+013.xml No ID attribute must have a default
+014.xml No ID attribute must have a default (FIXED)
+015.xml Attributes of type IDREF must match the Name production
+016.xml Attributes of type IDREFS must match the Names production
+017.xml Attributes of type IDREF must match the value of an ID
+ attribute
+ *** TODO ***
+018.xml Attributes of type IDREFS must match the values of ID
+ attributes
+ *** TODO ***
+019.xml Attributes of type ENTITY must match the Name production
+020.xml Attributes of type ENTITIES must match the Names production
+021.xml Attributes of type ENTITY must match an unparsed entity
+022.xml Attributes of type ENTITIES must match unparsed entities
+023.xml Attributes of type NMTOKEN must match the nmtoken production
+024.xml Attributes of type NMTOKENS must match the nmtokens production
+025.xml like 015.xml, but the default value is tested
+026.xml like 016.xml, but the default value is tested
+027.xml like 019.xml, but the default value is tested
+028.xml like 020.xml, but the default value is tested
+029.xml like 023.xml, but the default value is tested
+030.xml like 024.xml, but the default value is tested
+031.xml all notation names in the declaration must have been declared
+032.xml Values of NOTATION type must match one declared value
+033.xml Values of NOTATION type must match one declared value
+034.xml Only one NOTATION attribute per element
+035.xml Values of enum type must match one of the declared values
+036.xml Values of enum type must match one of the declared values
+037.xml missing #REQUIRED attribute
+038.xml #FIXED attributes must match the declared default
+
+----------------------------------------
+Standalone declaration
+----------------------------------------
+
+060.xml Externally declared default values are rejected
+061.xml variant of 060.xml (internal entity within external entity)
+062.xml variant of 060.xml (external subset of DTD)
+063.xml Externally declared parsed general entities are rejected
+ (entity ref occurs in main text)
+064.xml Externally declared parsed general entities are rejected
+ (entity ref occurs in attribute value)
+065.xml Externally declared parsed general entities are rejected
+ (entity ref occurs in attribute default)
+ *** THINK ABOUT THIS CASE AGAIN ***
+066.xml Externally declared unparsed entities are rejected
+ (entity ref occurs in attribute value)
+067.xml Externally declared unparsed entities are rejected
+ (entity ref occurs in attribute default)
+068.xml Externally declared NMTOKEN attributes require normal form
+069.xml Externally declared NMTOKENS attributes require normal form
+070.xml Externally declared elements with regexp content model
+ do not like extra white space
+
+----------------------------------------
+Deterministics models
+----------------------------------------
+
+080.xml One example
+081.xml Another example
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/001.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/001.ent
new file mode 100644
index 000000000..fea9c4e10
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/001.ent
@@ -0,0 +1,3 @@
+
+
+%e; -->
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/001.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/001.out
new file mode 100644
index 000000000..074f8eab8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/001.out
@@ -0,0 +1,3 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_invalid/001.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "001.ent", at line 3, position 3:
+ERROR (Well-formedness constraint): `-->' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/001.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/001.xml
new file mode 100644
index 000000000..02ef0bb08
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/001.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/002.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/002.ent
new file mode 100644
index 000000000..35b4309a3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/002.ent
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/002.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/002.out
new file mode 100644
index 000000000..2883a3238
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/002.out
@@ -0,0 +1,3 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_invalid/002.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "002.ent", at line 2, position 18:
+ERROR (Validity constraint): Entities not properly nested with parentheses
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/002.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/002.xml
new file mode 100644
index 000000000..0c5372cbb
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/002.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/003.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/003.ent
new file mode 100644
index 000000000..3610f125a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/003.ent
@@ -0,0 +1,2 @@
+
+%e; doc (#PCDATA)>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/003.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/003.out
new file mode 100644
index 000000000..9125d01a6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/003.out
@@ -0,0 +1,3 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_invalid/003.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "003.ent", at line 2, position 17:
+ERROR (Validity constraint): Entities not properly nested with ELEMENT declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/003.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/003.xml
new file mode 100644
index 000000000..c4b33e49a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/003.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/004.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/004.ent
new file mode 100644
index 000000000..312726319
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/004.ent
@@ -0,0 +1,3 @@
+
+">
+%e1; doc (#PCDATA) %e2;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/004.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/004.out
new file mode 100644
index 000000000..f7b93fcba
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/004.out
@@ -0,0 +1,4 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_invalid/004.xml", at line 1, position 30:
+In entity e2, at line 1, position 1:
+Called from entity [dtd] = SYSTEM "004.ent", line 3, position 19:
+ERROR (Validity constraint): Entities not properly nested with ELEMENT declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/004.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/004.xml
new file mode 100644
index 000000000..740d17301
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/004.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/005.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/005.ent
new file mode 100644
index 000000000..3326c04f5
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/005.ent
@@ -0,0 +1,2 @@
+">
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/006.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/006.ent
new file mode 100644
index 000000000..4bd731463
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/006.ent
@@ -0,0 +1,2 @@
+">
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/001.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/001.ent
new file mode 100644
index 000000000..378a2074b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/001.ent
@@ -0,0 +1 @@
+&e;
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/001.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/001.out
new file mode 100644
index 000000000..57edec389
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/001.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/ext-sa/001.xml", at line 3, position 1:
+ERROR (Validity constraint): The root element is not declared
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/001.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/001.xml
new file mode 100644
index 000000000..673dc58e2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/001.xml
@@ -0,0 +1,4 @@
+
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/002.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/002.ent
new file mode 100644
index 000000000..c7bec6345
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/002.ent
@@ -0,0 +1,3 @@
+
+data
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/002.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/002.out
new file mode 100644
index 000000000..65e741d0b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/002.out
@@ -0,0 +1,3 @@
+In entity e = SYSTEM "002.ent", at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/ext-sa/002.xml", line 5, position 5:
+ERROR (Well-formedness constraint): Bad XML declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/002.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/002.xml
new file mode 100644
index 000000000..2ee598889
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/002.xml
@@ -0,0 +1,5 @@
+
+
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/003.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/003.ent
new file mode 100644
index 000000000..a0d0d0449
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/003.ent
@@ -0,0 +1,2 @@
+
+data
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/003.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/003.out
new file mode 100644
index 000000000..1d090c7a4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/003.out
@@ -0,0 +1,3 @@
+In entity e = SYSTEM "003.ent", at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/ext-sa/003.xml", line 5, position 5:
+ERROR (Well-formedness constraint): Bad XML declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/003.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/003.xml
new file mode 100644
index 000000000..407a4a1ee
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/003.xml
@@ -0,0 +1,5 @@
+
+
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/001.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/001.ent
new file mode 100644
index 000000000..b0292fcb3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/001.ent
@@ -0,0 +1,3 @@
+
+]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/001.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/001.out
new file mode 100644
index 000000000..577cce20e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/001.out
@@ -0,0 +1,3 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/001.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "001.ent", at line 3, position 0:
+ERROR (Well-formedness constraint): `>]>' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/001.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/001.xml
new file mode 100644
index 000000000..02ef0bb08
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/001.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/002.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/002.out
new file mode 100644
index 000000000..8cb67833e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/002.out
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 1:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/002.xml", line 4, position 0:
+ERROR (Well-formedness constraint): `]' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/002.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/002.xml
new file mode 100644
index 000000000..a0a538df8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/002.xml
@@ -0,0 +1,6 @@
+
+">
+%e;
+]>
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/003.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/003.ent
new file mode 100644
index 000000000..5b49337a2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/003.ent
@@ -0,0 +1,2 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/004.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/004.ent
new file mode 100644
index 000000000..fcce6e0a6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/004.ent
@@ -0,0 +1,2 @@
+
+]>' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/004.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/004.xml
new file mode 100644
index 000000000..740d17301
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/004.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/005.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/005.ent
new file mode 100644
index 000000000..aae85596b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/005.ent
@@ -0,0 +1,2 @@
+
+%e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/005.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/005.out
new file mode 100644
index 000000000..da38b032e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/005.out
@@ -0,0 +1,3 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/005.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "005.ent", at line 2, position 0:
+ERROR (Well-formedness constraint): Reference to undeclared parameter entity `e'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/005.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/005.xml
new file mode 100644
index 000000000..aa3a8f906
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/005.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/006.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/006.ent
new file mode 100644
index 000000000..b60f41ce4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/006.ent
@@ -0,0 +1,3 @@
+
+]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/006.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/006.out
new file mode 100644
index 000000000..edc29eace
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/006.out
@@ -0,0 +1,3 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/006.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "006.ent", at line 2, position 0:
+ERROR (Well-formedness constraint): Bad conditional section
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/006.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/006.xml
new file mode 100644
index 000000000..bd2ee3261
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/006.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/007.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/007.ent
new file mode 100644
index 000000000..62a92ed49
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/007.ent
@@ -0,0 +1,3 @@
+
+]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/007.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/007.out
new file mode 100644
index 000000000..e8b088d74
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/007.out
@@ -0,0 +1,3 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/007.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "007.ent", at line 1, position 0:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/007.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/007.xml
new file mode 100644
index 000000000..1c5bc80ef
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/007.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/008.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/008.ent
new file mode 100644
index 000000000..11172a929
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/008.ent
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/008.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/008.out
new file mode 100644
index 000000000..7370c42e1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/008.out
@@ -0,0 +1,3 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/008.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "008.ent", at line 2, position 17:
+ERROR (Well-formedness constraint): The character '%' must be written as '%'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/008.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/008.xml
new file mode 100644
index 000000000..c140c0a2a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/008.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa-problems/140.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa-problems/140.xml
new file mode 100644
index 000000000..062b2135e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa-problems/140.xml
@@ -0,0 +1,4 @@
+゚>">
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa-problems/141.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa-problems/141.xml
new file mode 100644
index 000000000..6d864a309
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa-problems/141.xml
@@ -0,0 +1,4 @@
+">
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/001.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/001.out
new file mode 100644
index 000000000..bff9b8250
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/001.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/001.xml", at line 3, position 0:
+ERROR (Well-formedness constraint): Illegal inside tags
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/001.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/001.xml
new file mode 100644
index 000000000..253fea691
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/001.xml
@@ -0,0 +1,5 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/002.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/002.out
new file mode 100644
index 000000000..c6d2a49d8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/002.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/002.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): The left angle bracket '<' must be written as '<'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/002.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/002.xml
new file mode 100644
index 000000000..6ca801a4e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/002.xml
@@ -0,0 +1,4 @@
+
+<.doc>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/003.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/003.out
new file mode 100644
index 000000000..65fd8efb4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/003.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/003.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Illegal token or character
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/003.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/003.xml
new file mode 100644
index 000000000..07a534da6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/003.xml
@@ -0,0 +1 @@
+ ?>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/004.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/004.out
new file mode 100644
index 000000000..18d0d66e8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/004.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/004.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Illegal processing instruction
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/004.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/004.xml
new file mode 100644
index 000000000..f89e662f6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/004.xml
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/005.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/005.out
new file mode 100644
index 000000000..f5ece686e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/005.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/005.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Illegal processing instruction
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/005.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/005.xml
new file mode 100644
index 000000000..16127dc7a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/005.xml
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/006.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/006.out
new file mode 100644
index 000000000..06dd72899
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/006.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/006.xml", at line 1, position 20:
+ERROR (Well-formedness constraint): Double hyphens are illegal inside comments
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/006.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/006.xml
new file mode 100644
index 000000000..789d0b808
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/006.xml
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/007.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/007.out
new file mode 100644
index 000000000..dccf06490
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/007.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/007.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/007.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/007.xml
new file mode 100644
index 000000000..2b017e324
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/007.xml
@@ -0,0 +1 @@
+& no refc
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/008.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/008.out
new file mode 100644
index 000000000..fc1624140
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/008.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/008.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/008.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/008.xml
new file mode 100644
index 000000000..3117de42f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/008.xml
@@ -0,0 +1 @@
+&.entity;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/009.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/009.out
new file mode 100644
index 000000000..74802e50a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/009.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/009.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/009.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/009.xml
new file mode 100644
index 000000000..62f923913
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/009.xml
@@ -0,0 +1 @@
+RE;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/010.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/010.out
new file mode 100644
index 000000000..c7c12d0f0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/010.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/010.xml", at line 1, position 7:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/010.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/010.xml
new file mode 100644
index 000000000..44a48f9fa
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/010.xml
@@ -0,0 +1 @@
+A & B
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/011.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/011.out
new file mode 100644
index 000000000..af9e50ef1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/011.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/011.xml", at line 1, position 7:
+ERROR (Well-formedness constraint): Bad attribute list
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/011.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/011.xml
new file mode 100644
index 000000000..ae4175876
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/011.xml
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/012.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/012.out
new file mode 100644
index 000000000..5447f51ce
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/012.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/012.xml", at line 1, position 8:
+ERROR (Well-formedness constraint): Bad attribute list
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/012.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/012.xml
new file mode 100644
index 000000000..d4ffd747e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/012.xml
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/013.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/013.out
new file mode 100644
index 000000000..701c4aadd
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/013.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/013.xml", at line 1, position 8:
+ERROR (Well-formedness constraint): Cannot find the second quotation mark
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/013.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/013.xml
new file mode 100644
index 000000000..c436a25c4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/013.xml
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/015.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/015.out
new file mode 100644
index 000000000..a7826eca7
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/015.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/015.xml", at line 1, position 8:
+ERROR (Well-formedness constraint): Bad attribute list
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/015.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/015.xml
new file mode 100644
index 000000000..6f6f32f52
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/015.xml
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/016.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/016.out
new file mode 100644
index 000000000..c7e489f6e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/016.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/016.xml", at line 1, position 13:
+ERROR (Well-formedness constraint): `>' or `/>' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/016.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/016.xml
new file mode 100644
index 000000000..6a69ddb6c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/016.xml
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/017.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/017.out
new file mode 100644
index 000000000..9ad92fb30
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/017.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/017.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/017.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/017.xml
new file mode 100644
index 000000000..9474c49ea
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/017.xml
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/018.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/018.out
new file mode 100644
index 000000000..9fd2963cc
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/018.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/018.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/018.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/018.xml
new file mode 100644
index 000000000..cdbf8b359
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/018.xml
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/019.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/019.out
new file mode 100644
index 000000000..5e1846c53
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/019.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/019.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): The left angle bracket '<' must be written as '<'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/019.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/019.xml
new file mode 100644
index 000000000..7205628bb
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/019.xml
@@ -0,0 +1 @@
+>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/020.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/020.out
new file mode 100644
index 000000000..8e64d3798
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/020.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/020.xml", at line 1, position 8:
+ERROR (Well-formedness constraint): The character '&' must be written as '&'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/020.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/020.xml
new file mode 100644
index 000000000..d672b1cf6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/020.xml
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/021.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/021.out
new file mode 100644
index 000000000..d6e979cce
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/021.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/021.xml", at line 1, position 8:
+ERROR (Well-formedness constraint): The character '&' must be written as '&'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/021.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/021.xml
new file mode 100644
index 000000000..c5d1e399f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/021.xml
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/022.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/022.out
new file mode 100644
index 000000000..f16b0e428
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/022.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/022.xml", at line 1, position 8:
+ERROR (Well-formedness constraint): The character '&' must be written as '&'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/022.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/022.xml
new file mode 100644
index 000000000..c47a4d679
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/022.xml
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/023.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/023.out
new file mode 100644
index 000000000..dca9b948c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/023.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/023.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Illegal inside tags
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/023.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/023.xml
new file mode 100644
index 000000000..9d1ecd82b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/023.xml
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/024.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/024.out
new file mode 100644
index 000000000..a6cec019a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/024.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/024.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): The left angle bracket '<' must be written as '<'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/024.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/024.xml
new file mode 100644
index 000000000..3e5672770
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/024.xml
@@ -0,0 +1,3 @@
+
+<123>123>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/025.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/025.out
new file mode 100644
index 000000000..8992cdfc3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/025.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/025.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): The sequence ']]>' must be written as ']]>'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/025.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/025.xml
new file mode 100644
index 000000000..9ad426642
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/025.xml
@@ -0,0 +1 @@
+]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/026.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/026.out
new file mode 100644
index 000000000..6f3ff0588
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/026.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/026.xml", at line 1, position 6:
+ERROR (Well-formedness constraint): The sequence ']]>' must be written as ']]>'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/026.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/026.xml
new file mode 100644
index 000000000..5543609ca
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/026.xml
@@ -0,0 +1 @@
+]]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/027.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/027.out
new file mode 100644
index 000000000..5274a84e4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/027.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/027.xml", at line 4, position 0:
+ERROR (Well-formedness constraint): `-->' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/027.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/027.xml
new file mode 100644
index 000000000..0ae9fa68e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/027.xml
@@ -0,0 +1,3 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/033.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/033.out
new file mode 100644
index 000000000..61b600198
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/033.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/033.xml", at line 1, position 8:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/033.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/033.xml
new file mode 100644
index 000000000..ea1693c86
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/033.xml
@@ -0,0 +1 @@
+abcdef
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/034.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/034.out
new file mode 100644
index 000000000..48ee6008f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/034.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/034.xml", at line 1, position 4:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/034.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/034.xml
new file mode 100644
index 000000000..84841f4c0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/034.xml
@@ -0,0 +1 @@
+A form-feed is not white space or a name character
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/035.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/035.out
new file mode 100644
index 000000000..9798306b0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/035.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/035.xml", at line 1, position 7:
+ERROR (Well-formedness constraint): The left angle bracket '<' must be written as '<'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/035.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/035.xml
new file mode 100644
index 000000000..7032f9ac7
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/035.xml
@@ -0,0 +1 @@
+1 < 2 but not in XML
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/036.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/036.out
new file mode 100644
index 000000000..63baee38d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/036.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/036.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Data not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/036.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/036.xml
new file mode 100644
index 000000000..b3259d0e9
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/036.xml
@@ -0,0 +1,2 @@
+
+Illegal data
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/037.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/037.out
new file mode 100644
index 000000000..bdeb907a5
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/037.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/037.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Character reference not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/037.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/037.xml
new file mode 100644
index 000000000..356448a46
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/037.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/038.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/038.out
new file mode 100644
index 000000000..9c9acf3e0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/038.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/038.xml", at line 1, position 29:
+ERROR (Well-formedness constraint): Attribute `x' occurs twice in element `doc'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/038.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/038.xml
new file mode 100644
index 000000000..0590e53c6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/038.xml
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/039.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/039.out
new file mode 100644
index 000000000..3227cb0c6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/039.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/039.xml", at line 1, position 12:
+ERROR (Well-formedness constraint): End-tag does not match start-tag
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/039.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/039.xml
new file mode 100644
index 000000000..971effa35
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/039.xml
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/040.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/040.out
new file mode 100644
index 000000000..e58e599d4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/040.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/040.xml", at line 2, position 5:
+ERROR (Well-formedness constraint): Document must consist of only one toplevel element
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/040.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/040.xml
new file mode 100644
index 000000000..7591d8bf8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/040.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/041.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/041.out
new file mode 100644
index 000000000..c90cfadf4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/041.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/041.xml", at line 2, position 5:
+ERROR (Well-formedness constraint): Document must consist of only one toplevel element
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/041.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/041.xml
new file mode 100644
index 000000000..405efd3d9
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/041.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/042.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/042.out
new file mode 100644
index 000000000..ea78b5e20
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/042.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/042.xml", at line 1, position 11:
+SYNTAX ERROR
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/042.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/042.xml
new file mode 100644
index 000000000..1247cc8e8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/042.xml
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/043.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/043.out
new file mode 100644
index 000000000..24860aab8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/043.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/043.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Data not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/043.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/043.xml
new file mode 100644
index 000000000..5cc527ca5
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/043.xml
@@ -0,0 +1,2 @@
+
+Illegal data
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/044.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/044.out
new file mode 100644
index 000000000..573b7c431
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/044.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/044.xml", at line 1, position 12:
+ERROR (Well-formedness constraint): Document must consist of only one toplevel element
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/044.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/044.xml
new file mode 100644
index 000000000..b71d06244
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/044.xml
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/045.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/045.out
new file mode 100644
index 000000000..78a1ed330
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/045.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/045.xml", at line 2, position 2:
+ERROR (Well-formedness constraint): Illegal inside tags
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/045.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/045.xml
new file mode 100644
index 000000000..f7540e8d1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/045.xml
@@ -0,0 +1,4 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/046.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/046.out
new file mode 100644
index 000000000..790f16795
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/046.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/046.xml", at line 2, position 2:
+ERROR (Well-formedness constraint): Illegal inside tags
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/046.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/046.xml
new file mode 100644
index 000000000..d5d901e24
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/046.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/047.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/047.out
new file mode 100644
index 000000000..6b2fd78f9
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/047.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/047.xml", at line 2, position 3:
+ERROR (Well-formedness constraint): Illegal inside tags
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/047.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/047.xml
new file mode 100644
index 000000000..00ae5234c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/047.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/048.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/048.out
new file mode 100644
index 000000000..4d54039e6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/048.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/048.xml", at line 3, position 0:
+ERROR (Well-formedness constraint): CDATA section not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/048.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/048.xml
new file mode 100644
index 000000000..9092ffa2a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/048.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/049.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/049.out
new file mode 100644
index 000000000..260dee4f1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/049.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/049.xml", at line 3, position 15:
+ERROR (Well-formedness constraint): End-tag does not match start-tag
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/049.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/049.xml
new file mode 100644
index 000000000..e5a6b7087
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/049.xml
@@ -0,0 +1,4 @@
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/050.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/050.out
new file mode 100644
index 000000000..b813938da
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/050.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/050.xml", at line 1, position 0:
+SYNTAX ERROR
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/050.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/050.xml
new file mode 100644
index 000000000..e69de29bb
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/051.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/051.out
new file mode 100644
index 000000000..15e4f65b7
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/051.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/051.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/051.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/051.xml
new file mode 100644
index 000000000..19f13dec1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/051.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/052.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/052.out
new file mode 100644
index 000000000..331a4c393
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/052.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/052.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Content not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/052.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/052.xml
new file mode 100644
index 000000000..b3fe37672
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/052.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/053.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/053.out
new file mode 100644
index 000000000..88a0dda4f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/053.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/053.xml", at line 1, position 10:
+ERROR (Well-formedness constraint): End-tag does not match start-tag
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/053.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/053.xml
new file mode 100644
index 000000000..bf5c6d506
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/053.xml
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/054.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/054.out
new file mode 100644
index 000000000..e20db324d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/054.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/054.xml", at line 2, position 36:
+ERROR (Well-formedness constraint): Whitespace is missing between the literals of the PUBLIC identifier
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/054.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/054.xml
new file mode 100644
index 000000000..f4d24e26c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/054.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/055.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/055.out
new file mode 100644
index 000000000..9d9f52d10
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/055.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/055.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Illegal token or character
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/055.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/055.xml
new file mode 100644
index 000000000..ae922eec2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/055.xml
@@ -0,0 +1,2 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/056.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/056.out
new file mode 100644
index 000000000..ba062b048
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/056.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/056.xml", at line 1, position 14:
+ERROR (Well-formedness constraint): Content not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/056.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/056.xml
new file mode 100644
index 000000000..b4a32cbe2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/056.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/057.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/057.out
new file mode 100644
index 000000000..66ab6fa68
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/057.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/057.xml", at line 2, position 22:
+ERROR (Well-formedness constraint): `>' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/057.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/057.xml
new file mode 100644
index 000000000..af3b2643e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/057.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/058.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/058.out
new file mode 100644
index 000000000..4a42cd203
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/058.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/058.xml", at line 3, position 21:
+ERROR (Well-formedness constraint): `|' and more names expected, or `)'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/058.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/058.xml
new file mode 100644
index 000000000..6b525cfbc
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/058.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/059.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/059.out
new file mode 100644
index 000000000..a31204f07
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/059.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/059.xml", at line 3, position 25:
+ERROR (Well-formedness constraint): #REQUIRED, #IMPLIED, #FIXED or a string literal expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/059.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/059.xml
new file mode 100644
index 000000000..4a5c9565f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/059.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/060.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/060.out
new file mode 100644
index 000000000..295f54347
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/060.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/060.xml", at line 3, position 21:
+ERROR (Well-formedness constraint): One of CDATA, ID, IDREF, IDREFS, ENTITY, ENTITIES, NMTOKEN, NMTOKENS, NOTATION, or a subexpression expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/060.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/060.xml
new file mode 100644
index 000000000..3ddde346b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/060.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/061.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/061.out
new file mode 100644
index 000000000..1545e2702
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/061.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/061.xml", at line 2, position 28:
+ERROR (Well-formedness constraint): Whitespace is missing between the literals of the PUBLIC identifier
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/061.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/061.xml
new file mode 100644
index 000000000..d58093d4d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/061.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/062.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/062.out
new file mode 100644
index 000000000..581d3bbbe
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/062.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/062.xml", at line 2, position 12:
+ERROR (Well-formedness constraint): Whitespace is missing
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/062.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/062.xml
new file mode 100644
index 000000000..4f091e45b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/062.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/063.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/063.out
new file mode 100644
index 000000000..128b19150
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/063.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/063.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Restriction of the internal subset: Conditional sections not allowed
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/063.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/063.xml
new file mode 100644
index 000000000..f9bd03cb0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/063.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/064.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/064.out
new file mode 100644
index 000000000..ad7f21cfd
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/064.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/064.xml", at line 3, position 20:
+ERROR (Well-formedness constraint): Whitespace is missing
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/064.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/064.xml
new file mode 100644
index 000000000..f8d5894c1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/064.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/065.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/065.out
new file mode 100644
index 000000000..4675f7539
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/065.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/065.xml", at line 3, position 16:
+ERROR (Well-formedness constraint): Whitespace is missing
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/065.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/065.xml
new file mode 100644
index 000000000..29dc6e519
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/065.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/066.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/066.out
new file mode 100644
index 000000000..0812fc43f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/066.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/066.xml", at line 3, position 26:
+ERROR (Well-formedness constraint): Whitespace is missing
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/066.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/066.xml
new file mode 100644
index 000000000..04dbdb749
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/066.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/067.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/067.out
new file mode 100644
index 000000000..4ed711039
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/067.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/067.xml", at line 3, position 22:
+ERROR (Well-formedness constraint): Whitespace is missing
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/067.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/067.xml
new file mode 100644
index 000000000..de125795a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/067.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/068.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/068.out
new file mode 100644
index 000000000..186ea4258
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/068.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/068.xml", at line 3, position 25:
+ERROR (Well-formedness constraint): Error in NOTATION type (perhaps missing whitespace after NOTATION?)
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/068.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/068.xml
new file mode 100644
index 000000000..9f4a0093e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/068.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/069.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/069.out
new file mode 100644
index 000000000..d508784ac
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/069.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/069.xml", at line 4, position 38:
+ERROR (Well-formedness constraint): Whitespace missing before `NDATA'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/069.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/069.xml
new file mode 100644
index 000000000..a3ac7ea14
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/069.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/070.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/070.out
new file mode 100644
index 000000000..d0813ed93
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/070.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/070.xml", at line 1, position 40:
+ERROR (Well-formedness constraint): Double hyphens are illegal inside comments
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/070.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/070.xml
new file mode 100644
index 000000000..a3ec12fab
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/070.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/071.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/071.out
new file mode 100644
index 000000000..01100f9ed
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/071.out
@@ -0,0 +1,5 @@
+In entity e3, at line 1, position 0:
+Called from entity e2, line 1, position 0:
+Called from entity e1, line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/071.xml", line 6, position 5:
+ERROR (Validity constraint): Recursive reference to entity `e1'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/071.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/071.xml
new file mode 100644
index 000000000..8fe3ef783
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/071.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+&e1;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/072.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/072.out
new file mode 100644
index 000000000..5534c5ca6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/072.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/072.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Reference to undeclared general entity `foo'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/072.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/072.xml
new file mode 100644
index 000000000..65fd17cb6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/072.xml
@@ -0,0 +1 @@
+&foo;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/073.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/073.out
new file mode 100644
index 000000000..855179a9b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/073.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/073.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): Reference to undeclared general entity `f'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/073.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/073.xml
new file mode 100644
index 000000000..cd61644e9
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/073.xml
@@ -0,0 +1,4 @@
+
+]>
+&f;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/074.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/074.out
new file mode 100644
index 000000000..5cf73013c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/074.out
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 5:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/074.xml", line 5, position 5:
+ERROR (Well-formedness constraint): End-tag not in the same entity as the start-tag
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/074.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/074.xml
new file mode 100644
index 000000000..dca3f1183
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/074.xml
@@ -0,0 +1,6 @@
+">
+]>
+
+&e;
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/075.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/075.out
new file mode 100644
index 000000000..ed842c96f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/075.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/075.xml", at line 6, position 7:
+ERROR (Well-formedness constraint): Recursive reference to general entity `e1'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/075.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/075.xml
new file mode 100644
index 000000000..9784de3d1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/075.xml
@@ -0,0 +1,7 @@
+
+
+
+]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/076.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/076.out
new file mode 100644
index 000000000..63a0d3891
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/076.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/076.xml", at line 1, position 7:
+ERROR (Well-formedness constraint): Reference to undeclared general entity `foo'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/076.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/076.xml
new file mode 100644
index 000000000..40641942b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/076.xml
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/077.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/077.out
new file mode 100644
index 000000000..6c854b726
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/077.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/077.xml", at line 4, position 7:
+ERROR (Well-formedness constraint): Reference to undeclared general entity `bar'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/077.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/077.xml
new file mode 100644
index 000000000..36f3391ac
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/077.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/078.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/078.out
new file mode 100644
index 000000000..fffb5533d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/078.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/078.xml", at line 3, position 22:
+ERROR (Well-formedness constraint): Reference to undeclared general entity `foo'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/078.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/078.xml
new file mode 100644
index 000000000..e3af2f28c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/078.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/079.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/079.out
new file mode 100644
index 000000000..5469d5965
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/079.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/079.xml", at line 6, position 22:
+ERROR (Well-formedness constraint): Recursive reference to general entity `e1'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/079.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/079.xml
new file mode 100644
index 000000000..c778a8066
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/079.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/080.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/080.out
new file mode 100644
index 000000000..aadb4b7f3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/080.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/080.xml", at line 6, position 29:
+ERROR (Well-formedness constraint): Recursive reference to general entity `e1'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/080.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/080.xml
new file mode 100644
index 000000000..4949e99f2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/080.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/081.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/081.out
new file mode 100644
index 000000000..cdaf25721
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/081.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/081.xml", at line 4, position 7:
+Other exception: Sys_error("/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/nul: No such file or directory")
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/081.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/081.xml
new file mode 100644
index 000000000..3be7e41a8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/081.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/082.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/082.out
new file mode 100644
index 000000000..72a6b3ae2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/082.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/082.xml", at line 4, position 22:
+Other exception: Sys_error("/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/nul: No such file or directory")
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/082.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/082.xml
new file mode 100644
index 000000000..f99640f84
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/082.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/083.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/083.out
new file mode 100644
index 000000000..0744b43e2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/083.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/083.xml", at line 4, position 5:
+ERROR (Validity constraint): Invalid reference to NDATA entity e
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/083.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/083.xml
new file mode 100644
index 000000000..8d6ff9679
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/083.xml
@@ -0,0 +1,4 @@
+
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/084.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/084.out
new file mode 100644
index 000000000..3a09c3405
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/084.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/084.xml", at line 4, position 22:
+ERROR (Validity constraint): Invalid reference to NDATA entity e
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/084.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/084.xml
new file mode 100644
index 000000000..5b10fb972
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/084.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/085.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/085.out
new file mode 100644
index 000000000..87ff19db2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/085.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/085.xml", at line 1, position 25:
+ERROR (Well-formedness constraint): Illegal character in PUBLIC identifier
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/085.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/085.xml
new file mode 100644
index 000000000..1d643645d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/085.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/086.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/086.out
new file mode 100644
index 000000000..63bb4a10b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/086.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/086.xml", at line 2, position 24:
+ERROR (Well-formedness constraint): Illegal character in PUBLIC identifier
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/086.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/086.xml
new file mode 100644
index 000000000..454893a3b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/086.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/087.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/087.out
new file mode 100644
index 000000000..4dc1495d4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/087.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/087.xml", at line 2, position 36:
+ERROR (Well-formedness constraint): Illegal character in PUBLIC identifier
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/087.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/087.xml
new file mode 100644
index 000000000..01e876005
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/087.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/088.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/088.out
new file mode 100644
index 000000000..574049d61
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/088.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/088.xml", at line 6, position 7:
+ERROR (Well-formedness constraint): Cannot find the second quotation mark
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/088.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/088.xml
new file mode 100644
index 000000000..a581de88b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/088.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/089.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/089.xml
new file mode 100644
index 000000000..a788aabfa
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/089.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/090.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/090.out
new file mode 100644
index 000000000..82d875852
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/090.out
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 7:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/090.xml", line 4, position 5:
+ERROR (Well-formedness constraint): Attribute value contains character '<' literally
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/090.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/090.xml
new file mode 100644
index 000000000..f82c23857
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/090.xml
@@ -0,0 +1,4 @@
+ ">
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/091.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/091.out
new file mode 100644
index 000000000..9b96ad6f4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/091.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/091.xml", at line 3, position 32:
+ERROR (Well-formedness constraint): `>' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/091.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/091.xml
new file mode 100644
index 000000000..9601dcb08
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/091.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/092.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/092.out
new file mode 100644
index 000000000..b32fb9a5f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/092.out
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 7:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/092.xml", line 4, position 5:
+ERROR (Well-formedness constraint): The character '&' must be written as '&'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/092.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/092.xml
new file mode 100644
index 000000000..a867ecd86
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/092.xml
@@ -0,0 +1,4 @@
+">
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/093.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/093.out
new file mode 100644
index 000000000..eda559af4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/093.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/093.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/093.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/093.xml
new file mode 100644
index 000000000..a5f8638d8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/093.xml
@@ -0,0 +1 @@
+X
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/094.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/094.out
new file mode 100644
index 000000000..0422cf871
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/094.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/094.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Bad XML declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/094.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/094.xml
new file mode 100644
index 000000000..483ed52c0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/094.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/095.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/095.out
new file mode 100644
index 000000000..9f392939d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/095.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/095.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Bad XML declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/095.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/095.xml
new file mode 100644
index 000000000..ba2cbe8b2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/095.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/096.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/096.out
new file mode 100644
index 000000000..17a3a1bd7
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/096.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/096.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Cannot find the second quotation mark
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/096.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/096.xml
new file mode 100644
index 000000000..f41eaba67
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/096.xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/097.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/097.out
new file mode 100644
index 000000000..3e512b42b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/097.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/097.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Cannot find the second quotation mark
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/097.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/097.xml
new file mode 100644
index 000000000..3b9506553
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/097.xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/098.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/098.out
new file mode 100644
index 000000000..0e7f2233c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/098.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/098.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Bad XML declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/098.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/098.xml
new file mode 100644
index 000000000..9627acbce
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/098.xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/099.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/099.out
new file mode 100644
index 000000000..e7471f8cc
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/099.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/099.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Bad XML declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/099.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/099.xml
new file mode 100644
index 000000000..02637f080
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/099.xml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/100.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/100.out
new file mode 100644
index 000000000..c4ec7380b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/100.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/100.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Illegal 'standalone' declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/100.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/100.xml
new file mode 100644
index 000000000..38beda80c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/100.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/101.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/101.out
new file mode 100644
index 000000000..8c31631e5
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/101.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/101.xml", at line 1, position 0:
+Other exception: Failure("Netconversion.encoding_of_string: unknown encoding")
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/101.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/101.xml
new file mode 100644
index 000000000..6191a8067
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/101.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/102.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/102.out
new file mode 100644
index 000000000..895c6b8b3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/102.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/102.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Bad XML version string
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/102.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/102.xml
new file mode 100644
index 000000000..a4cde409b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/102.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/103.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/103.out
new file mode 100644
index 000000000..86dda33fc
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/103.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/103.xml", at line 4, position 13:
+ERROR (Well-formedness constraint): End-tag does not match start-tag
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/103.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/103.xml
new file mode 100644
index 000000000..fc5d152dd
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/103.xml
@@ -0,0 +1,4 @@
+">
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/104.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/104.out
new file mode 100644
index 000000000..b3c9bbbb7
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/104.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/104.xml", at line 4, position 13:
+ERROR (Well-formedness constraint): End-tag not in the same entity as the start-tag
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/104.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/104.xml
new file mode 100644
index 000000000..b35b90ea2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/104.xml
@@ -0,0 +1,4 @@
+">
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/105.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/105.out
new file mode 100644
index 000000000..146e9802b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/105.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/105.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/105.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/105.xml
new file mode 100644
index 000000000..5f6055109
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/105.xml
@@ -0,0 +1,4 @@
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/106.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/106.out
new file mode 100644
index 000000000..cec450187
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/106.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/106.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Content not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/106.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/106.xml
new file mode 100644
index 000000000..87c56d7df
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/106.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/107.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/107.out
new file mode 100644
index 000000000..8f6919dc0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/107.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/107.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Restriction of the internal subset: Conditional sections not allowed
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/107.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/107.xml
new file mode 100644
index 000000000..2a6990664
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/107.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/108.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/108.out
new file mode 100644
index 000000000..bbca44bc5
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/108.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/108.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/108.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/108.xml
new file mode 100644
index 000000000..187b07f38
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/108.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/109.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/109.out
new file mode 100644
index 000000000..bf1f79ac4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/109.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/109.xml", at line 4, position 0:
+ERROR (Well-formedness constraint): Content not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/109.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/109.xml
new file mode 100644
index 000000000..33b1cf337
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/109.xml
@@ -0,0 +1,4 @@
+ ">
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/110.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/110.out
new file mode 100644
index 000000000..4176538a3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/110.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/110.xml", at line 5, position 3:
+ERROR (Well-formedness constraint): Entity reference not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/110.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/110.xml
new file mode 100644
index 000000000..4d7bf99a5
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/110.xml
@@ -0,0 +1,5 @@
+
+]>
+
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/111.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/111.out
new file mode 100644
index 000000000..be02f8818
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/111.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/111.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): Illegal inside tags
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/111.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/111.xml
new file mode 100644
index 000000000..530c6ccae
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/111.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/112.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/112.out
new file mode 100644
index 000000000..5361f5658
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/112.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/112.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/112.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/112.xml
new file mode 100644
index 000000000..13cfcc5bc
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/112.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/113.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/113.out
new file mode 100644
index 000000000..23c928404
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/113.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/113.xml", at line 2, position 18:
+ERROR (Well-formedness constraint): The character '&' must be written as '&'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/113.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/113.xml
new file mode 100644
index 000000000..899102b51
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/113.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/114.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/114.out
new file mode 100644
index 000000000..06be1fda4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/114.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/114.xml", at line 2, position 16:
+ERROR (Well-formedness constraint): The character '&' must be written as '&'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/114.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/114.xml
new file mode 100644
index 000000000..32d6d0767
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/114.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/115.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/115.out
new file mode 100644
index 000000000..54a92412b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/115.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/115.xml", at line 4, position 7:
+ERROR (Well-formedness constraint): The character '&' must be written as '&'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/115.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/115.xml
new file mode 100644
index 000000000..af014a09d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/115.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/116.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/116.out
new file mode 100644
index 000000000..bebbe2c4c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/116.out
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/116.xml", line 4, position 5:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/116.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/116.xml
new file mode 100644
index 000000000..ce37ca009
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/116.xml
@@ -0,0 +1,4 @@
+
+]>
+&e;7;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/117.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/117.out
new file mode 100644
index 000000000..2b613d7f4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/117.out
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/117.xml", line 4, position 5:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/117.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/117.xml
new file mode 100644
index 000000000..5ba4eb0fe
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/117.xml
@@ -0,0 +1,4 @@
+
+]>
+&e;#97;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/118.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/118.out
new file mode 100644
index 000000000..fda0ee414
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/118.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/118.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/118.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/118.xml
new file mode 100644
index 000000000..49b4b8cbc
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/118.xml
@@ -0,0 +1,4 @@
+
+]>
+&&e;97;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/119.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/119.out
new file mode 100644
index 000000000..0c3a84a09
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/119.out
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/119.xml", line 5, position 0:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/119.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/119.xml
new file mode 100644
index 000000000..7ee56bedd
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/119.xml
@@ -0,0 +1,6 @@
+
+]>
+
+&e;#38;
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/120.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/120.out
new file mode 100644
index 000000000..4b1ff9cc5
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/120.out
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/120.xml", line 5, position 0:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/120.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/120.xml
new file mode 100644
index 000000000..ae8f55aff
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/120.xml
@@ -0,0 +1,6 @@
+
+]>
+
+&e;
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/121.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/121.out
new file mode 100644
index 000000000..1daf3a86f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/121.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/121.xml", at line 2, position 9:
+ERROR (Well-formedness constraint): Illegal token or character
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/121.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/121.xml
new file mode 100644
index 000000000..63ecbe4bc
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/121.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/122.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/122.out
new file mode 100644
index 000000000..8d65e6943
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/122.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/122.xml", at line 2, position 27:
+ERROR (Well-formedness constraint): It is not allowed to mix alternatives and sequences
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/122.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/122.xml
new file mode 100644
index 000000000..e8a708244
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/122.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/123.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/123.out
new file mode 100644
index 000000000..50f7364c8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/123.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/123.xml", at line 2, position 22:
+ERROR (Well-formedness constraint): `>' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/123.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/123.xml
new file mode 100644
index 000000000..f2dc633c2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/123.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/124.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/124.out
new file mode 100644
index 000000000..9d1931cf1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/124.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/124.xml", at line 2, position 19:
+ERROR (Well-formedness constraint): Bad content model expression
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/124.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/124.xml
new file mode 100644
index 000000000..1abde7bae
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/124.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/125.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/125.out
new file mode 100644
index 000000000..adb0e6e0f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/125.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/125.xml", at line 2, position 16:
+ERROR (Well-formedness constraint): Bad content model expression
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/125.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/125.xml
new file mode 100644
index 000000000..15519d4cb
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/125.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/126.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/126.out
new file mode 100644
index 000000000..8a93b34a3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/126.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/126.xml", at line 2, position 22:
+ERROR (Well-formedness constraint): Bad content model expression
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/126.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/126.xml
new file mode 100644
index 000000000..b6cdb0c65
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/126.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/127.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/127.out
new file mode 100644
index 000000000..9deb08411
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/127.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/127.xml", at line 2, position 22:
+ERROR (Well-formedness constraint): Bad content model expression
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/127.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/127.xml
new file mode 100644
index 000000000..557df35dd
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/127.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/128.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/128.out
new file mode 100644
index 000000000..3fa460ad7
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/128.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/128.xml", at line 2, position 14:
+ERROR (Well-formedness constraint): EMPTY, ANY, or a subexpression expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/128.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/128.xml
new file mode 100644
index 000000000..e8f854325
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/128.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/129.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/129.out
new file mode 100644
index 000000000..076251250
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/129.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/129.xml", at line 2, position 14:
+ERROR (Well-formedness constraint): Content model expression expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/129.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/129.xml
new file mode 100644
index 000000000..6471a8d2a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/129.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/130.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/130.out
new file mode 100644
index 000000000..6cd8d45be
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/130.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/130.xml", at line 2, position 21:
+ERROR (Well-formedness constraint): `>' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/130.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/130.xml
new file mode 100644
index 000000000..a4f0e867e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/130.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/131.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/131.out
new file mode 100644
index 000000000..4bc40b037
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/131.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/131.xml", at line 2, position 21:
+ERROR (Well-formedness constraint): `>' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/131.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/131.xml
new file mode 100644
index 000000000..783537f59
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/131.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/132.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/132.out
new file mode 100644
index 000000000..cf96a2b3f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/132.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/132.xml", at line 2, position 41:
+ERROR (Well-formedness constraint): It is not allowed to mix alternatives and sequences
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/132.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/132.xml
new file mode 100644
index 000000000..00823ff6a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/132.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/133.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/133.out
new file mode 100644
index 000000000..9910edede
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/133.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/133.xml", at line 2, position 17:
+ERROR (Well-formedness constraint): Bad content model expression
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/133.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/133.xml
new file mode 100644
index 000000000..d7444ebe1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/133.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/134.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/134.out
new file mode 100644
index 000000000..ebb96aad8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/134.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/134.xml", at line 2, position 18:
+ERROR (Well-formedness constraint): `>' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/134.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/134.xml
new file mode 100644
index 000000000..78b1a5975
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/134.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/135.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/135.out
new file mode 100644
index 000000000..9a95cbd0d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/135.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/135.xml", at line 2, position 17:
+ERROR (Well-formedness constraint): References to general entities not allowed in DTDs
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/135.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/135.xml
new file mode 100644
index 000000000..6e2421e1f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/135.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/136.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/136.out
new file mode 100644
index 000000000..3af635a18
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/136.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/136.xml", at line 2, position 14:
+ERROR (Well-formedness constraint): EMPTY, ANY, or a subexpression expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/136.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/136.xml
new file mode 100644
index 000000000..a1a0b2e3c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/136.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/137.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/137.out
new file mode 100644
index 000000000..b5659d74f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/137.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/137.xml", at line 2, position 13:
+ERROR (Well-formedness constraint): Whitespace is missing
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/137.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/137.xml
new file mode 100644
index 000000000..de472bd0c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/137.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/138.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/138.out
new file mode 100644
index 000000000..405657aaa
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/138.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/138.xml", at line 2, position 19:
+ERROR (Well-formedness constraint): Bad content model expression
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/138.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/138.xml
new file mode 100644
index 000000000..d81dd54cd
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/138.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/139.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/139.out
new file mode 100644
index 000000000..a376e2b64
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/139.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/139.xml", at line 2, position 15:
+ERROR (Well-formedness constraint): Bad content model expression
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/139.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/139.xml
new file mode 100644
index 000000000..2c6c92e93
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/139.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/140.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/140.out
new file mode 100644
index 000000000..abf07efab
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/140.out
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/140.xml", line 4, position 5:
+ERROR (Well-formedness constraint): The left angle bracket '<' must be written as '<'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/140.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/140.xml
new file mode 100644
index 000000000..062b2135e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/140.xml
@@ -0,0 +1,4 @@
+゚>">
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/141.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/141.out
new file mode 100644
index 000000000..cbd618570
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/141.out
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 2:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/141.xml", line 4, position 5:
+ERROR (Well-formedness constraint): Illegal inside tags
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/141.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/141.xml
new file mode 100644
index 000000000..6d864a309
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/141.xml
@@ -0,0 +1,4 @@
+">
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/142.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/142.out
new file mode 100644
index 000000000..88943b23e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/142.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/142.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): Code point 0 outside the accepted range of code points
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/142.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/142.xml
new file mode 100644
index 000000000..57517d27a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/142.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/143.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/143.out
new file mode 100644
index 000000000..d1b511b21
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/143.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/143.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): Code point 31 outside the accepted range of code points
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/143.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/143.xml
new file mode 100644
index 000000000..52c25d7c6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/143.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/144.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/144.out
new file mode 100644
index 000000000..d67fe5ae4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/144.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/144.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): Code point 65535 outside the accepted range of code points
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/144.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/144.xml
new file mode 100644
index 000000000..0f98e23b1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/144.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/145.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/145.out
new file mode 100644
index 000000000..4c79e182f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/145.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/145.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): Code point 55296 outside the accepted range of code points
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/145.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/145.xml
new file mode 100644
index 000000000..4909796d2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/145.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/146.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/146.out
new file mode 100644
index 000000000..f90c91c7f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/146.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/146.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): Code point 1114112 outside the accepted range of code points
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/146.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/146.xml
new file mode 100644
index 000000000..53e98757c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/146.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/147.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/147.out
new file mode 100644
index 000000000..41035feb0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/147.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/147.xml", at line 2, position 0:
+SYNTAX ERROR
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/147.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/147.xml
new file mode 100644
index 000000000..93fa1eec7
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/147.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/148.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/148.out
new file mode 100644
index 000000000..c42a3e91b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/148.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/148.xml", at line 2, position 0:
+SYNTAX ERROR
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/148.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/148.xml
new file mode 100644
index 000000000..a1623d5c3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/148.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/149.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/149.out
new file mode 100644
index 000000000..71e283218
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/149.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/149.xml", at line 3, position 0:
+ERROR (Well-formedness constraint): `]' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/149.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/149.xml
new file mode 100644
index 000000000..0632eb7ef
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/149.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/150.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/150.out
new file mode 100644
index 000000000..160ef3097
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/150.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/150.xml", at line 2, position 0:
+SYNTAX ERROR
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/150.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/150.xml
new file mode 100644
index 000000000..e7c6e8c56
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/150.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/151.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/151.out
new file mode 100644
index 000000000..444e8d686
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/151.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/151.xml", at line 3, position 0:
+SYNTAX ERROR
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/151.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/151.xml
new file mode 100644
index 000000000..fd9616cb7
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/151.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/152.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/152.out
new file mode 100644
index 000000000..211ee8aa7
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/152.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/152.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Bad XML declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/152.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/152.xml
new file mode 100644
index 000000000..3245b2e26
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/152.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/153.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/153.out
new file mode 100644
index 000000000..923e99815
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/153.out
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/153.xml", line 5, position 5:
+SYNTAX ERROR
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/153.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/153.xml
new file mode 100644
index 000000000..07bd8dfd9
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/153.xml
@@ -0,0 +1,5 @@
+
+">
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/154.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/154.out
new file mode 100644
index 000000000..c8d770213
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/154.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/154.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Reserved processing instruction
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/154.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/154.xml
new file mode 100644
index 000000000..f58969c8c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/154.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/155.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/155.out
new file mode 100644
index 000000000..36fc52802
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/155.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/155.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Reserved processing instruction
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/155.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/155.xml
new file mode 100644
index 000000000..87eccf0f0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/155.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/156.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/156.out
new file mode 100644
index 000000000..88abf6a45
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/156.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/156.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Reserved processing instruction
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/156.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/156.xml
new file mode 100644
index 000000000..98e2c4b23
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/156.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/157.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/157.out
new file mode 100644
index 000000000..793da4351
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/157.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/157.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Reserved processing instruction
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/157.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/157.xml
new file mode 100644
index 000000000..363821a6b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/157.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/158.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/158.out
new file mode 100644
index 000000000..1bfd00d3a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/158.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/158.xml", at line 4, position 10:
+ERROR (Well-formedness constraint): Illegal token or character
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/158.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/158.xml
new file mode 100644
index 000000000..ebbeb51cf
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/158.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/159.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/159.out
new file mode 100644
index 000000000..83264a9d6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/159.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/159.xml", at line 3, position 38:
+ERROR (Well-formedness constraint): The character '&' must be written as '&'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/159.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/159.xml
new file mode 100644
index 000000000..3a017ef4e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/159.xml
@@ -0,0 +1,5 @@
+
+">
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/160.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/160.out
new file mode 100644
index 000000000..1669390ae
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/160.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/160.xml", at line 4, position 18:
+ERROR (Well-formedness constraint): Restriction of the internal subset: parameter entity not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/160.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/160.xml
new file mode 100644
index 000000000..7e33116c7
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/160.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/161.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/161.out
new file mode 100644
index 000000000..0d78a8d9f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/161.out
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 9:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/161.xml", line 3, position 15:
+ERROR (Well-formedness constraint): Bad content model expression
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/161.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/161.xml
new file mode 100644
index 000000000..e25699514
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/161.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/162.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/162.out
new file mode 100644
index 000000000..f06c26949
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/162.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/162.xml", at line 4, position 20:
+ERROR (Well-formedness constraint): Restriction of the internal subset: parameter entity not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/162.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/162.xml
new file mode 100644
index 000000000..d1336da8d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/162.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/163.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/163.out
new file mode 100644
index 000000000..b7fb7b059
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/163.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/163.xml", at line 5, position 0:
+ERROR (Well-formedness constraint): Content not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/163.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/163.xml
new file mode 100644
index 000000000..bb35a7b87
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/163.xml
@@ -0,0 +1,6 @@
+
+
+]>
+%e;
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/164.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/164.out
new file mode 100644
index 000000000..6151b9572
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/164.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/164.xml", at line 4, position 2:
+ERROR (Well-formedness constraint): References to parameter entities not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/164.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/164.xml
new file mode 100644
index 000000000..31da4ff1b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/164.xml
@@ -0,0 +1,5 @@
+
+
+] %e; >
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/165.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/165.out
new file mode 100644
index 000000000..981b8a00f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/165.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/165.xml", at line 2, position 8:
+ERROR (Well-formedness constraint): Whitespace is missing after ENTITY
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/165.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/165.xml
new file mode 100644
index 000000000..9b5198e8e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/165.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/166.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/166.out
new file mode 100644
index 000000000..7ade465f8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/166.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/166.xml", at line 1, position 0:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/166.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/166.xml
new file mode 100644
index 000000000..60f66f80a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/166.xml
@@ -0,0 +1 @@
+ï¿¿
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/167.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/167.out
new file mode 100644
index 000000000..56d06ae2d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/167.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/167.xml", at line 1, position 0:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/167.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/167.xml
new file mode 100644
index 000000000..fc536a131
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/167.xml
@@ -0,0 +1 @@
+￾
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/168.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/168.out
new file mode 100644
index 000000000..41163ea53
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/168.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/168.xml", at line 1, position 0:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/168.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/168.xml
new file mode 100644
index 000000000..ce8b4d73b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/168.xml
@@ -0,0 +1 @@
+í
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/169.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/169.out
new file mode 100644
index 000000000..623ad46d9
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/169.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/169.xml", at line 1, position 0:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/169.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/169.xml
new file mode 100644
index 000000000..6c1a0bf6e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/169.xml
@@ -0,0 +1 @@
+í°
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/170.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/170.out
new file mode 100644
index 000000000..29011da24
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/170.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/170.xml", at line 1, position 0:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/170.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/170.xml
new file mode 100644
index 000000000..6d02d89df
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/170.xml
@@ -0,0 +1 @@
+÷
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/171.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/171.out
new file mode 100644
index 000000000..7ccaffa81
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/171.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/171.xml", at line 1, position 5:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/171.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/171.xml
new file mode 100644
index 000000000..7fa118b88
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/171.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/172.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/172.out
new file mode 100644
index 000000000..8b7d0abb7
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/172.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/172.xml", at line 1, position 0:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/172.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/172.xml
new file mode 100644
index 000000000..434f799bc
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/172.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/173.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/173.out
new file mode 100644
index 000000000..25ef8fdf4
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/173.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/173.xml", at line 1, position 7:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/173.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/173.xml
new file mode 100644
index 000000000..ab5a447e8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/173.xml
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/174.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/174.out
new file mode 100644
index 000000000..faec5b27d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/174.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/174.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/174.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/174.xml
new file mode 100644
index 000000000..b7f3db99b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/174.xml
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/175.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/175.out
new file mode 100644
index 000000000..0f0e72c0d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/175.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/175.xml", at line 3, position 18:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/175.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/175.xml
new file mode 100644
index 000000000..6d13a21a0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/175.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/176.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/176.out
new file mode 100644
index 000000000..b1940a16d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/176.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/176.xml", at line 5, position 0:
+ERROR (Well-formedness constraint): Missing end tag
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/176.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/176.xml
new file mode 100644
index 000000000..9c8e2e47d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/176.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/177.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/177.out
new file mode 100644
index 000000000..58d032c0d
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/177.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/177.xml", at line 4, position 6:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/177.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/177.xml
new file mode 100644
index 000000000..bde27a65f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/177.xml
@@ -0,0 +1,4 @@
+
+]>
+Aï¿¿
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/178.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/178.out
new file mode 100644
index 000000000..4f324407a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/178.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/178.xml", at line 5, position 7:
+ERROR (Well-formedness constraint): Cannot find the second quotation mark
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/178.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/178.xml
new file mode 100644
index 000000000..ba36a31d2
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/178.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/180.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/180.out
new file mode 100644
index 000000000..15cabefa5
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/180.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/180.xml", at line 3, position 22:
+ERROR (Well-formedness constraint): Reference to undeclared general entity `e'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/180.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/180.xml
new file mode 100644
index 000000000..d51b1907c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/180.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/181.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/181.out
new file mode 100644
index 000000000..b9c256957
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/181.out
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/181.xml", line 5, position 5:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/181.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/181.xml
new file mode 100644
index 000000000..c438f1b91
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/181.xml
@@ -0,0 +1,5 @@
+
+
+]>
+&e;]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/182.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/182.out
new file mode 100644
index 000000000..e54bfb9fb
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/182.out
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 4:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/182.xml", line 5, position 5:
+ERROR (Well-formedness constraint): `-->' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/182.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/182.xml
new file mode 100644
index 000000000..106df72eb
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/182.xml
@@ -0,0 +1,5 @@
+
+
+]>
+&e;-->
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/183.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/183.out
new file mode 100644
index 000000000..5d8c43d45
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/183.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/183.xml", at line 2, position 28:
+ERROR (Well-formedness constraint): `)*' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/183.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/183.xml
new file mode 100644
index 000000000..85ddfc82a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/183.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/184.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/184.out
new file mode 100644
index 000000000..3b62191f0
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/184.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/184.xml", at line 2, position 25:
+ERROR (Well-formedness constraint): Name expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/184.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/184.xml
new file mode 100644
index 000000000..f87539255
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/184.xml
@@ -0,0 +1,6 @@
+
+
+]>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/185.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/185.ent
new file mode 100644
index 000000000..fdd8077fa
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/185.ent
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/185.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/185.out
new file mode 100644
index 000000000..53d52c7e3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/185.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/185.xml", at line 3, position 5:
+ERROR (Well-formedness constraint): Reference to undeclared general entity `e'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/185.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/185.xml
new file mode 100644
index 000000000..ea2f6d758
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/185.xml
@@ -0,0 +1,3 @@
+
+
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/186.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/186.out
new file mode 100644
index 000000000..6fa931ac7
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/186.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/186.xml", at line 5, position 15:
+ERROR (Well-formedness constraint): Whitespace is missing between attributes `b' and `d'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/186.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/186.xml
new file mode 100644
index 000000000..0bbaccaa3
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/186.xml
@@ -0,0 +1,5 @@
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/null.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/null.ent
new file mode 100644
index 000000000..e69de29bb
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/001.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/001.out
new file mode 100644
index 000000000..8b203dea6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/001.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_notwf/sa/001.xml", at line 4, position 7:
+ERROR (Validity constraint): Found reference to external entity in attribute value
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/001.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/001.xml
new file mode 100644
index 000000000..56b51007e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/001.xml
@@ -0,0 +1,4 @@
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/002.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/002.out
new file mode 100644
index 000000000..ea6c1f94e
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/002.out
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_notwf/sa/002.xml", at line 4, position 22:
+ERROR (Validity constraint): Found reference to external entity in attribute value
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/002.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/002.xml
new file mode 100644
index 000000000..f247879a6
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/002.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/null.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/null.ent
new file mode 100644
index 000000000..e69de29bb
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/run_negative b/helm/DEVEL/pxp/pxp/rtests/negative/run_negative
new file mode 100755
index 000000000..3c58a4ef8
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/run_negative
@@ -0,0 +1,117 @@
+#! /bin/bash
+
+# $Id$
+
+
+t=./test_negative
+
+init_test () {
+ # $1: Options for test_negative
+ # $2: Path to test record
+ options="$1"
+ input="$2"
+ output=`dirname $input`/`basename $input .xml`.out
+ if [ -f "$output" ]; then
+ echo "Test $input already initialized; skipping"
+ else
+ $t $options "$input" >"$output"
+ echo Test $input initialized.
+ fi
+}
+
+
+check_test () {
+ # $1: Options for test_negative
+ # $2: Path to test record
+ options="$1"
+ input="$2"
+ output=`dirname $input`/`basename $input .xml`.out
+ $t $options "$input" >current.out
+ if [ -f "$output" ]; then
+ if cmp "$output" current.out; then
+ echo Test $input OK
+ else
+ echo Test $input FAILED!!!
+ fi
+ else
+ echo Test $input still uninitialized
+ echo - OUTPUT:
+ cat current.out
+ fi
+}
+
+
+for_directory () {
+ what="$1"
+ shift
+ options="$1"
+ shift
+ while [ $# -gt 0 ]; do
+ input="$1"
+ shift
+ if [ -f "$input" ]; then
+ $what "$options" "$input"
+ else
+ if [ -d "$input" ]; then
+ for ent in $input/*.xml; do
+ for_directory $what "$options" $ent
+ done
+ else
+ echo "Not found: $input" >&2
+ fi
+ fi
+ done
+}
+
+
+usage () {
+ cat <&2
+usage: $0 [ -init -wf ] file ... dir ...
+EOF
+ exit 1
+}
+
+
+action="check_test"
+options=""
+while true; do
+ case "x$1" in
+ x-init)
+ action="init_test"
+ shift
+ ;;
+ x-wf)
+ options="$options -wf"
+ shift
+ ;;
+ x-*)
+ usage
+ ;;
+ *)
+ break
+ ;;
+ esac
+done
+
+
+if [ $# -gt 0 ]; then
+ for_directory $action "$options" "$@"
+else
+ for_directory $action -wf \
+ data_jclark_notwf/ext-sa data_jclark_notwf/not-sa data_jclark_notwf/sa \
+ data_notwf/sa
+ for_directory $action "" \
+ data_jclark_invalid data_invalid
+fi
+
+# ======================================================================
+# $Log$
+# Revision 1.1 2000/11/17 09:57:33 lpadovan
+# Initial revision
+#
+# Revision 1.2 2000/05/01 16:23:39 gerd
+# Added data_invalid.
+#
+# Revision 1.1 2000/05/01 15:58:50 gerd
+# Initial revision.
+#
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/test_negative.ml b/helm/DEVEL/pxp/pxp/rtests/negative/test_negative.ml
new file mode 100644
index 000000000..13f049c47
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/negative/test_negative.ml
@@ -0,0 +1,105 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+open Pxp_document;;
+open Pxp_yacc;;
+open Pxp_types;;
+
+let error_happened = ref false;;
+
+let rec print_error e =
+ print_endline (string_of_exn e)
+;;
+
+class warner =
+ object
+ method warn w =
+ print_endline ("WARNING: " ^ w)
+ end
+;;
+
+let parse debug wf iso88591 filename =
+ try
+ let config =
+ { default_config with
+ warner = new warner;
+ debugging_mode = debug;
+ encoding = if iso88591 then `Enc_iso88591 else `Enc_utf8;
+ idref_pass = true;
+ }
+ in
+ let parse_fn =
+ if wf then parse_wfdocument_entity
+ else
+ let index = new hash_index in
+ parse_document_entity
+ ?transform_dtd:None
+ ~id_index:(index :> 'ext index)
+ in
+ let tree =
+ parse_fn
+ config
+ (from_file filename)
+ default_spec
+ in
+ print_endline "Parsed without error";
+ with
+ e ->
+ error_happened := true;
+ print_error e
+;;
+
+
+let main() =
+ let debug = ref false in
+ let wf = ref false in
+ let iso88591 = ref false in
+ let files = ref [] in
+ Arg.parse
+ [ "-d", Arg.Set debug, "turn debugging mode on";
+ "-wf", Arg.Set wf, "check only on well-formedness";
+ "-iso-8859-1", Arg.Set iso88591, "use ISO-8859-1 as internal encoding instead of UTF-8";
+ ]
+ (fun x -> files := x :: !files)
+ "
+usage: test_negative [options] file ...
+
+List of options:";
+ files := List.rev !files;
+ List.iter (parse !debug !wf !iso88591) !files;
+;;
+
+
+main();
+if !error_happened then exit(1);;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:33 lpadovan
+ * Initial revision
+ *
+ * Revision 1.6 2000/07/14 14:57:12 gerd
+ * Updated: warner
+ *
+ * Revision 1.5 2000/07/14 14:20:11 gerd
+ * Updated because of PXP interface changes.
+ *
+ * Revision 1.4 2000/07/09 01:49:09 gerd
+ * Updated because of PXP interface changes.
+ *
+ * Revision 1.3 2000/06/04 20:31:21 gerd
+ * Updates because of renamed PXP modules.
+ *
+ * Revision 1.2 2000/05/28 17:23:22 gerd
+ * Updated.
+ *
+ * Revision 1.1 2000/05/01 15:58:50 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/rtests/reader/Makefile b/helm/DEVEL/pxp/pxp/rtests/reader/Makefile
new file mode 100644
index 000000000..b1f204f22
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/reader/Makefile
@@ -0,0 +1,31 @@
+# make validate: make bytecode executable
+# make validate.opt: make native executable
+# make clean: remove intermediate files (in this directory)
+# make CLEAN: remove intermediate files (recursively)
+# make distclean: remove any superflous files (recursively)
+#----------------------------------------------------------------------
+
+OCAMLPATH=../..
+
+test_reader: test_reader.ml
+ ocamllex minilex.mll
+ ocamlfind ocamlc -custom -o test_reader -package .,unix,threads \
+ -linkpkg -thread -noautolink \
+ -g minilex.ml test_reader.ml
+
+#----------------------------------------------------------------------
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa minilex.ml
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~
+ rm -f test_reader
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/reader/minilex.mll b/helm/DEVEL/pxp/pxp/rtests/reader/minilex.mll
new file mode 100644
index 000000000..1c9fbecec
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/reader/minilex.mll
@@ -0,0 +1,7 @@
+{ }
+rule nextchar = parse
+ _
+ { Some (Lexing.lexeme lexbuf).[0] }
+ | eof
+ { None }
+{ }
diff --git a/helm/DEVEL/pxp/pxp/rtests/reader/t100.dat b/helm/DEVEL/pxp/pxp/rtests/reader/t100.dat
new file mode 100644
index 000000000..ad471007b
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/reader/t100.dat
@@ -0,0 +1 @@
+0123456789
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/reader/test_reader.ml b/helm/DEVEL/pxp/pxp/rtests/reader/test_reader.ml
new file mode 100644
index 000000000..f01edf576
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/reader/test_reader.ml
@@ -0,0 +1,455 @@
+open Pxp_reader;;
+open Pxp_types;;
+open Minilex;;
+
+let make_channel s =
+ (* Returns a channel reading the bytes from the string s *)
+ let rd, wr = Unix.pipe() in
+ let ch_rd = Unix.in_channel_of_descr rd in
+ let ch_wr = Unix.out_channel_of_descr wr in
+ ignore
+ (Thread.create
+ (fun () ->
+ output_string ch_wr s;
+ close_out ch_wr;
+ )
+ ()
+ );
+ ch_rd
+;;
+
+(**********************************************************************)
+
+let t001 () =
+ (* Reads from a string (without recoding it), checks the lexbuf size *)
+ let s = "0123456789abc" in
+ let r = new resolve_read_this_string s in
+ r # init_rep_encoding `Enc_iso88591;
+ r # init_warner (new drop_warnings);
+ let lb = r # open_in Anonymous in
+ let c = nextchar lb in
+ assert (c = Some '0');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
+ * now be at the end of the buffer indicating that the buffer is now
+ * empty.
+ *)
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ let c = nextchar lb in
+ assert (c = Some '9');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ r # change_encoding "";
+ let c = nextchar lb in
+ assert (c = Some 'a');
+ assert (lb.Lexing.lex_curr_pos < lb.Lexing.lex_buffer_len);
+ ignore(nextchar lb);
+ let c = nextchar lb in
+ assert (c = Some 'c');
+ let c = nextchar lb in
+ assert (c = None);
+ r # close_in;
+ true
+;;
+
+
+let t002 () =
+ (* Like t001, but reads from a channel *)
+ let ch = make_channel "0123456789abc" in
+ let r = new resolve_read_this_channel ch in
+ r # init_rep_encoding `Enc_iso88591;
+ r # init_warner (new drop_warnings);
+ let lb = r # open_in Anonymous in
+ let c = nextchar lb in
+ assert (c = Some '0');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
+ * now be at the end of the buffer indicating that the buffer is now
+ * empty.
+ *)
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ let c = nextchar lb in
+ assert (c = Some '9');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ r # change_encoding "";
+ let c = nextchar lb in
+ assert (c = Some 'a');
+ assert (lb.Lexing.lex_curr_pos < lb.Lexing.lex_buffer_len);
+ ignore(nextchar lb);
+ let c = nextchar lb in
+ assert (c = Some 'c');
+ let c = nextchar lb in
+ assert (c = None);
+ r # close_in;
+ true
+;;
+
+
+let t003 () =
+ (* Tests non-automatic encoding conversion from ISO-8859-1 to UTF-8 *)
+ let s = "0«»°áàâãäÁÀÂÃÄéèêëíìîïÍÌÎÏóòôõøöÓÒÔÕØÖúùûüýÿÝßç¡¿ñÑ" in
+ let r = new resolve_read_this_string ~fixenc:`Enc_iso88591 s in
+ r # init_rep_encoding `Enc_utf8;
+ r # init_warner (new drop_warnings);
+ let lb = r # open_in Anonymous in
+ let c = ref (nextchar lb) in
+ assert (!c = Some '0');
+ assert (lb.Lexing.lex_curr_pos < lb.Lexing.lex_buffer_len);
+ (* Note: because we initialize the resolver with ~fixenc, the resolver can
+ * fill the buffer with more than one byte from the beginning.
+ *)
+ let u = ref "" in
+ while !c <> None do
+ ( match !c with
+ Some x -> u := !u ^ String.make 1 x
+ | None -> ()
+ );
+ c := nextchar lb
+ done;
+ r # close_in;
+ !u = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145"
+;;
+
+
+let t004 () =
+ (* Tests non-automatic encoding conversion from UTF-8 to ISO-8859-1 *)
+ let s = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145" in
+ let r = new resolve_read_this_string ~fixenc:`Enc_utf8 s in
+ r # init_rep_encoding `Enc_iso88591;
+ r # init_warner (new drop_warnings);
+ let lb = r # open_in Anonymous in
+ let c = ref (nextchar lb) in
+ assert (!c = Some '0');
+ assert (lb.Lexing.lex_curr_pos < lb.Lexing.lex_buffer_len);
+ (* Note: because we initialize the resolver with ~fixenc, the resolver can
+ * fill the buffer with more than one byte from the beginning.
+ *)
+ let u = ref "" in
+ while !c <> None do
+ ( match !c with
+ Some x -> u := !u ^ String.make 1 x
+ | None -> ()
+ );
+ c := nextchar lb
+ done;
+ r # close_in;
+ !u = "0«»°áàâãäÁÀÂÃÄéèêëíìîïÍÌÎÏóòôõøöÓÒÔÕØÖúùûüýÿÝßç¡¿ñÑ"
+;;
+
+
+let t005 () =
+ (* Tests automatic encoding conversion from UTF-8 to ISO-8859-1 *)
+ let s = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145" in
+ let r = new resolve_read_this_string s in
+ r # init_rep_encoding `Enc_iso88591;
+ r # init_warner (new drop_warnings);
+ let lb = r # open_in Anonymous in
+ let c = ref (nextchar lb) in
+ assert (!c = Some '0');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ let u = ref "" in
+ while !c <> None do
+ ( match !c with
+ Some x -> u := !u ^ String.make 1 x
+ | None -> ()
+ );
+ c := nextchar lb
+ done;
+ r # close_in;
+ !u = "0«»°áàâãäÁÀÂÃÄéèêëíìîïÍÌÎÏóòôõøöÓÒÔÕØÖúùûüýÿÝßç¡¿ñÑ"
+;;
+
+
+let t006 () =
+ (* Tests automatic encoding conversion from UTF-16-BE to UTF-8
+ * This variant invokes change_encoding early.
+ *)
+ let s = "\254\255\0000\000«\000»\000°\000á\000à\000â\000ã\000ä\000Á\000À\000Â\000Ã\000Ä\000é\000è\000ê\000ë\000í\000ì\000î\000ï\000Í\000Ì\000Î\000Ï\000ó\000ò\000ô\000õ\000ø\000ö\000Ó\000Ò\000Ô\000Õ\000Ø\000Ö\000ú\000ù\000û\000ü\000ý\000ÿ\000Ý\000ß\000ç\000¡\000¿\000ñ\000Ñ" in
+ let r = new resolve_read_this_string s in
+ r # init_rep_encoding `Enc_utf8;
+ r # init_warner (new drop_warnings);
+ let lb = r # open_in Anonymous in
+ let c = ref (nextchar lb) in
+ assert (!c = Some '0');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ r # change_encoding "";
+ let u = ref "" in
+ while !c <> None do
+ ( match !c with
+ Some x -> u := !u ^ String.make 1 x
+ | None -> ()
+ );
+ c := nextchar lb
+ done;
+ r # close_in;
+ !u = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145"
+;;
+
+
+let t007 () =
+ (* Tests automatic encoding conversion from UTF-16-BE to UTF-8
+ * This variant does not invoke change_encoding
+ *)
+ let s = "\254\255\0000\000«\000»\000°\000á\000à\000â\000ã\000ä\000Á\000À\000Â\000Ã\000Ä\000é\000è\000ê\000ë\000í\000ì\000î\000ï\000Í\000Ì\000Î\000Ï\000ó\000ò\000ô\000õ\000ø\000ö\000Ó\000Ò\000Ô\000Õ\000Ø\000Ö\000ú\000ù\000û\000ü\000ý\000ÿ\000Ý\000ß\000ç\000¡\000¿\000ñ\000Ñ" in
+ let r = new resolve_read_this_string s in
+ r # init_rep_encoding `Enc_utf8;
+ r # init_warner (new drop_warnings);
+ let lb = r # open_in Anonymous in
+ let c = ref (nextchar lb) in
+ assert (!c = Some '0');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ let u = ref "" in
+ while !c <> None do
+ ( match !c with
+ Some x -> u := !u ^ String.make 1 x
+ | None -> ()
+ );
+ c := nextchar lb
+ done;
+ r # close_in;
+ !u = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145"
+;;
+
+(**********************************************************************)
+
+let t100 () =
+ (* Reads from a file without recoding it *)
+ let r = new resolve_as_file () in
+ r # init_rep_encoding `Enc_utf8;
+ r # init_warner (new drop_warnings);
+ let cwd = Sys.getcwd() in
+ let lb = r # open_in (System ("file://localhost" ^ cwd ^ "/t100.dat")) in
+ let c = nextchar lb in
+ assert (c = Some '0');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
+ * now be at the end of the buffer indicating that the buffer is now
+ * empty.
+ *)
+ for i = 1 to 8 do
+ ignore(nextchar lb);
+ done;
+ let c = nextchar lb in
+ assert (c = Some '9');
+ r # close_in;
+ true
+;;
+
+let t101 () =
+ (* Reads from a file without recoding it *)
+ let r = new resolve_as_file () in
+ r # init_rep_encoding `Enc_utf8;
+ r # init_warner (new drop_warnings);
+ let cwd = Sys.getcwd() in
+ let lb = r # open_in (System ("//localhost" ^ cwd ^ "/t100.dat")) in
+ let c = nextchar lb in
+ assert (c = Some '0');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
+ * now be at the end of the buffer indicating that the buffer is now
+ * empty.
+ *)
+ for i = 1 to 8 do
+ ignore(nextchar lb);
+ done;
+ let c = nextchar lb in
+ assert (c = Some '9');
+ r # close_in;
+ true
+;;
+
+let t102 () =
+ (* Reads from a file without recoding it *)
+ let r = new resolve_as_file () in
+ r # init_rep_encoding `Enc_utf8;
+ r # init_warner (new drop_warnings);
+ let cwd = Sys.getcwd() in
+ let lb = r # open_in (System (cwd ^ "/t100.dat")) in
+ let c = nextchar lb in
+ assert (c = Some '0');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
+ * now be at the end of the buffer indicating that the buffer is now
+ * empty.
+ *)
+ for i = 1 to 8 do
+ ignore(nextchar lb);
+ done;
+ let c = nextchar lb in
+ assert (c = Some '9');
+ r # close_in;
+ true
+;;
+
+let t103 () =
+ (* Reads from a file without recoding it *)
+ let r = new resolve_as_file () in
+ r # init_rep_encoding `Enc_utf8;
+ r # init_warner (new drop_warnings);
+ let lb = r # open_in (System "t100.dat") in
+ let c = nextchar lb in
+ assert (c = Some '0');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
+ * now be at the end of the buffer indicating that the buffer is now
+ * empty.
+ *)
+ for i = 1 to 8 do
+ ignore(nextchar lb);
+ done;
+ let c = nextchar lb in
+ assert (c = Some '9');
+ r # close_in;
+ true
+;;
+
+(**********************************************************************)
+
+let t110 () =
+ (* Checks whether relative URLs are properly handled *)
+ let r = new resolve_as_file () in
+ r # init_rep_encoding `Enc_utf8;
+ r # init_warner (new drop_warnings);
+ let lb = r # open_in (System "t100.dat") in
+ let c = nextchar lb in
+ assert (c = Some '0');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
+ * now be at the end of the buffer indicating that the buffer is now
+ * empty.
+ *)
+ for i = 1 to 8 do
+ ignore(nextchar lb);
+ done;
+ let r' = r # clone in
+ let lb' = r' # open_in (System "t100.dat") in
+ let c = nextchar lb' in
+ assert (c = Some '0');
+ for i = 1 to 8 do
+ ignore(nextchar lb');
+ done;
+ let c = nextchar lb' in
+ assert (c = Some '9');
+ r' # close_in;
+ let c = nextchar lb in
+ assert (c = Some '9');
+ r # close_in;
+ true
+;;
+
+(**********************************************************************)
+(* Tests whether the encoding handling of System IDs is okay *)
+
+let t200 () =
+ (* Check the technique for the following tests:
+ * [Checks also 'combine' to some extent.)
+ *)
+ let r1 = new resolve_read_this_string
+ ~id:(System "b.xml")
+ ~fixenc:`Enc_iso88591
+ "ae" in
+ let r2 = new resolve_read_this_string
+ ~id:(System "a.xml")
+ ~fixenc:`Enc_iso88591
+ " ]> &ae; " in
+ let r = new combine [ r1; r2 ] in
+ (* It should now be possible to resolve &ae; *)
+ let _ =
+ Pxp_yacc.parse_document_entity
+ { Pxp_yacc.default_config with Pxp_yacc.encoding = `Enc_iso88591 }
+ (Pxp_yacc.ExtID(System "a.xml", r))
+ Pxp_yacc.default_spec
+ in
+ true
+;;
+
+
+let t201 () =
+ (* Check that System IDs are converted to UTF-8. rep_encoding = ISO-8859-1 *)
+ let r1 = new resolve_read_this_string
+ ~id:(System "\195\164.xml") (* This is an UTF-8 "ä"! *)
+ ~fixenc:`Enc_iso88591
+ "ae" in
+ let r2 = new resolve_read_this_string
+ ~id:(System "a.xml")
+ ~fixenc:`Enc_iso88591
+ " ]> &ae; " in
+ let r = new combine [ r1; r2 ] in
+ (* It should now be possible to resolve &ae; *)
+ let _ =
+ Pxp_yacc.parse_document_entity
+ { Pxp_yacc.default_config with Pxp_yacc.encoding = `Enc_iso88591 }
+ (Pxp_yacc.ExtID(System "a.xml", r))
+ Pxp_yacc.default_spec
+ in
+ true
+;;
+
+
+let t202 () =
+ (* Check that System IDs are converted to UTF-8. rep_encoding = UTF-8 *)
+ let r1 = new resolve_read_this_string
+ ~id:(System "\195\164.xml")
+ ~fixenc:`Enc_iso88591
+ "ae" in
+ let r2 = new resolve_read_this_string
+ ~id:(System "a.xml")
+ ~fixenc:`Enc_iso88591
+ " ]> &ae; " in
+ let r = new combine [ r1; r2 ] in
+ (* It should now be possible to resolve &ae; *)
+ let _ =
+ Pxp_yacc.parse_document_entity
+ { Pxp_yacc.default_config with Pxp_yacc.encoding = `Enc_utf8 }
+ (Pxp_yacc.ExtID(System "a.xml", r))
+ Pxp_yacc.default_spec
+ in
+ true
+;;
+
+(**********************************************************************)
+
+let test f n =
+ try
+ print_string ("Reader test " ^ n);
+ flush stdout;
+ if f() then
+ print_endline " ok"
+ else
+ print_endline " FAILED!!!!";
+ with
+ error ->
+ print_endline (" FAILED: " ^ string_of_exn error)
+;;
+
+test t001 "001";;
+test t002 "002";;
+test t003 "003";;
+test t004 "004";;
+test t005 "005";;
+test t006 "006";;
+test t007 "007";;
+
+test t100 "100";;
+test t101 "101";;
+test t102 "102";;
+test t103 "103";;
+
+test t110 "110";;
+
+test t200 "200";;
+test t201 "201";;
+test t202 "202";;
diff --git a/helm/DEVEL/pxp/pxp/rtests/run b/helm/DEVEL/pxp/pxp/rtests/run
new file mode 100755
index 000000000..11b573c1c
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/run
@@ -0,0 +1,9 @@
+#! /bin/sh
+
+set -e
+
+(cd reader && ./test_reader)
+(cd canonxml && ./run_canonxml)
+(cd write && ./run_write)
+(cd codewriter && ./run_codewriter)
+(cd negative && ./run_negative)
diff --git a/helm/DEVEL/pxp/pxp/rtests/write/Makefile b/helm/DEVEL/pxp/pxp/rtests/write/Makefile
new file mode 100644
index 000000000..634b27257
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/write/Makefile
@@ -0,0 +1,28 @@
+# make validate: make bytecode executable
+# make validate.opt: make native executable
+# make clean: remove intermediate files (in this directory)
+# make CLEAN: remove intermediate files (recursively)
+# make distclean: remove any superflous files (recursively)
+#----------------------------------------------------------------------
+
+OCAMLPATH=../..
+
+test_write: test_write.ml
+ ocamlfind ocamlc -g -custom -o test_write -package .,str -linkpkg test_write.ml
+
+#----------------------------------------------------------------------
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa out1 out2 out3
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~
+ rm -f test_write
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/write/run_write b/helm/DEVEL/pxp/pxp/rtests/write/run_write
new file mode 100755
index 000000000..1c43acb2f
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/write/run_write
@@ -0,0 +1,17 @@
+#! /bin/bash
+
+test_sample () {
+ file="$1"
+ echo -n "Testing $file... "
+ ./test_write -in "$file" >out1
+ ./test_write -in out1 >out2
+ ./test_write -in out2 >out3
+ if cmp out1 out3; then
+ echo "OK"
+ else
+ echo "FAILED"
+ fi
+}
+
+
+test_sample "sample001.xml"
diff --git a/helm/DEVEL/pxp/pxp/rtests/write/sample001.xml b/helm/DEVEL/pxp/pxp/rtests/write/sample001.xml
new file mode 100644
index 000000000..4973505fb
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/write/sample001.xml
@@ -0,0 +1,37 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+]>
+
+
+
+
+ This is text!
+
+
+
+
+
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/write/test_write.ml b/helm/DEVEL/pxp/pxp/rtests/write/test_write.ml
new file mode 100644
index 000000000..48defd2a1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/rtests/write/test_write.ml
@@ -0,0 +1,94 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+open Pxp_document;;
+open Pxp_yacc;;
+open Pxp_types;;
+
+let error_happened = ref false;;
+
+let rec prerr_error e =
+ prerr_endline (string_of_exn e)
+;;
+
+class warner =
+ object
+ method warn w =
+ prerr_endline ("WARNING: " ^ w)
+ end
+;;
+
+let parse_and_write in_filename =
+ let spec =
+ let e = new element_impl default_extension in
+ make_spec_from_mapping
+ ~super_root_exemplar: e
+ ~default_pinstr_exemplar: e
+ ~data_exemplar: (new data_impl default_extension)
+ ~default_element_exemplar: e
+ ~element_mapping: (Hashtbl.create 1)
+ ()
+ in
+ let config =
+ { default_config with
+ warner = new warner;
+ enable_pinstr_nodes = true;
+ enable_super_root_node = true;
+ encoding = `Enc_utf8;
+ }
+ in
+ try
+ let tree =
+ parse_document_entity
+ config
+ (from_file in_filename)
+ spec
+ in
+
+ tree # write (Out_channel stdout) `Enc_utf8;
+ with
+ e ->
+ error_happened := true;
+ prerr_error e
+;;
+
+
+let main() =
+ let in_file = ref "" in
+ Arg.parse
+ [ "-in", (Arg.String (fun s -> in_file := s)),
+ " Set the XML file to read";
+ ]
+ (fun x -> raise (Arg.Bad "Unexpected argument"))
+ "
+usage: test_write [ options ]
+
+List of options:";
+ if !in_file = "" then begin
+ prerr_endline "No input file specified.";
+ exit 1
+ end;
+ parse_and_write !in_file
+;;
+
+
+main();
+if !error_happened then exit(1);;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:35 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/08/16 23:44:21 gerd
+ * Updates because of changes of the PXP API.
+ *
+ * Revision 1.1 2000/07/16 17:50:39 gerd
+ * Initial revision.
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/tools/collect_files b/helm/DEVEL/pxp/pxp/tools/collect_files
new file mode 100755
index 000000000..d1770eb4a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/tools/collect_files
@@ -0,0 +1,25 @@
+#! /bin/sh
+#
+# $Id$
+# ----------------------------------------------------------------------
+#
+# usage: collect_files file ...
+#
+# Prints the names of the files passed as arguments which actually
+# exist and are regular files.
+
+for x in "$@"; do
+ if [ -f "$x" ]; then
+ echo "$x"
+ fi
+done
+
+# ======================================================================
+#
+# $Log$
+# Revision 1.1 2000/11/17 09:57:35 lpadovan
+# Initial revision
+#
+# Revision 1.1 2000/07/27 21:07:26 gerd
+# Initial revision.
+#
diff --git a/helm/DEVEL/pxp/pxp/tools/insert_variant b/helm/DEVEL/pxp/pxp/tools/insert_variant
new file mode 100755
index 000000000..cb592bb86
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/tools/insert_variant
@@ -0,0 +1,105 @@
+#! /bin/sh
+# (*
+exec ocaml "$0" "$@"
+*) directory ".";;
+
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+let get_arg variant insert_line =
+ (* returns the argument of an "#insert" line *)
+ let s = ref "" in
+ for i = 8 to String.length insert_line - 1 do
+ match insert_line.[i] with
+ ' ' -> ()
+ | '*' ->
+ (* replace '*' with 'variant' *)
+ s := !s ^ variant
+ | c ->
+ s := !s ^ String.make 1 c
+ done;
+ !s
+;;
+
+
+let edit_file variant name =
+ let basename = Filename.chop_suffix name ".src" in
+ let mllname = basename ^ "_" ^ variant ^ ".mll" in
+ let chin = open_in name in
+ let chout = open_out mllname in
+ output_string chout "(* File generated by insert_variant; DO NOT EDIT! *)\n";
+ begin try
+ while true do
+ let line = input_line chin in
+ (* We do not have Str here. *)
+ if String.length line >= 8 & String.sub line 0 8 = "#insert " then begin
+ let insname = get_arg variant line in
+ (* Copy the file 'insname' to chout *)
+ let chcopy = open_in insname in
+ let n = in_channel_length chcopy in
+ let s = String.create n in
+ really_input chcopy s 0 n;
+ close_in chcopy;
+ output_string chout s;
+ end
+ else begin
+ output_string chout line;
+ output_char chout '\n';
+ end
+ done
+ with
+ End_of_file -> ()
+ end;
+ close_in chin;
+ close_out chout
+;;
+
+
+let main() =
+ let variant = ref "" in
+ let files = ref [] in
+ Arg.current := 0; (* Because of a OCaml-3.00 bug *)
+ Arg.parse
+ [ "-variant", Arg.String (fun s -> variant := s),
+ " Set the variant (character encoding)";
+ ]
+ (fun s -> files := !files @ [s])
+ "insert_variant [ options ] file.src ...
+
+Reads the files, replaces the #insert lines by the referred files, and
+writes the file file_variant.mll.
+
+The #insert lines include the specified file into the source. The
+asterisk (*) is replaced by the name of the variant.
+
+Options:
+";
+
+ if !variant = "" then
+ failwith "No variant specified!";
+
+ List.iter
+ (fun name -> edit_file !variant name)
+ !files
+;;
+
+
+main();;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:35 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/05/20 21:14:33 gerd
+ * Workaround for an OCaml 3.00 bug.
+ *
+ * Revision 1.1 2000/05/20 20:30:15 gerd
+ * Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/.cvsignore b/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/.cvsignore
new file mode 100644
index 000000000..deb5b7fba
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/.cvsignore
@@ -0,0 +1,4 @@
+*.cmo
+*.cmx
+*.cmi
+
diff --git a/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/Makefile b/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/Makefile
new file mode 100644
index 000000000..504cfe540
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/Makefile
@@ -0,0 +1,50 @@
+#(******************************************************)
+#(* Claudio Sacerdoti Coen *)
+#(* 14/05/2000 *)
+#(******************************************************)
+
+OCAMLC = ocamlc
+OCAMLOPT = ocamlopt
+OCAMLDEP = ocamldep
+OCAMLLEX = ocamllex
+OCAMLYACC = ocamlyacc
+
+all: ucs2_to_utf8
+opt: ucs2_to_utf8.opt
+
+DEPOBJS = ucs2_to_utf8.ml lexer.ml parser.ml parser.mli types.ml
+
+UCS2_TO_UTF8OBJS = types.cmo lexer.cmo parser.cmo ucs2_to_utf8.cmo
+UCS2_TO_UTF8OPTOBJS = types.cmx lexer.cmx parser.cmx ucs2_to_utf8.cmx
+
+lexer.ml:
+ $(OCAMLLEX) lexer.mll
+
+parser.ml:
+ $(OCAMLYACC) parser.mly
+
+parser.mli:
+ $(OCAMLYACC) parser.mly
+
+depend: lexer.ml parser.ml parser.mli
+ $(OCAMLDEP) $(DEPOBJS) > depend
+
+ucs2_to_utf8: $(UCS2_TO_UTF8OBJS)
+ $(OCAMLC) -o ucs2_to_utf8 $(UCS2_TO_UTF8OBJS)
+
+ucs2_to_utf8.opt: $(UCS2_TO_UTF8OPTOBJS)
+ $(OCAMLOPT) -o ucs2_to_utf8.opt $(UCS2_TO_UTF8OPTOBJS)
+
+.SUFFIXES: .ml .mli .cmo .cmi .cmx
+.ml.cmo:
+ $(OCAMLC) -c $<
+.mli.cmi:
+ $(OCAMLC) -c $<
+.ml.cmx:
+ $(OCAMLOPT) -c $<
+
+clean:
+ rm -f *.cm[iox] *.o lexer.ml parser.ml parser.mli \
+ ucs2_to_utf8 ucs2_to_utf8.opt
+
+include depend
diff --git a/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/README b/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/README
new file mode 100644
index 000000000..d02ae8cf5
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/README
@@ -0,0 +1,15 @@
+(******************************************************)
+(* Claudio Sacerdoti Coen *)
+(* 14/05/2000 *)
+(******************************************************)
+
+How to compile: "make clean && make depend && make && make opt"
+
+Usage: "cat input.mll | ./ucs2_to_utf8 > output.mll"
+ where in input.mll there are definitions of ucs2 regular expressions
+ and in output.mll there are the same utf8 regular expressions in the
+ format expected by ocamllex
+
+ See input/input.mll for an example (the definitions are taken from the
+ appendix B of the XML reccomendation) and input/example.mll for a
+ smaller one.
diff --git a/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/lexer.mll b/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/lexer.mll
new file mode 100644
index 000000000..dfbeb5ad1
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/lexer.mll
@@ -0,0 +1,43 @@
+{
+(******************************************************)
+(* Claudio Sacerdoti Coen *)
+(* 14/05/2000 *)
+(******************************************************)
+
+open Parser
+
+let comment_depth = ref 0;;
+
+let charint_of_lexeme l =
+ String.set l 0 '0' ;
+ int_of_string l
+;;
+}
+
+let digit = ['0'-'9']|['A'-'F']
+
+rule token =
+ parse
+ [' ' '\t' '\n'] { token lexbuf }
+ | "let" { LET }
+ | (['a'-'z']|'_')(['a'-'z']|['A'-'Z']|'_'|['0'-'9']|'\'')*
+ { IDENT (Lexing.lexeme lexbuf) }
+ | '=' { EQ }
+ | ";;" { END_OF_LET }
+ | "|" { PIPE }
+ | '[' { LBRACKET }
+ | ']' { RBRACKET }
+ | '-' { RANGE }
+ | "(*" { incr comment_depth ;
+ comment lexbuf
+ }
+ | "#x" digit digit digit digit { CHAR (charint_of_lexeme (Lexing.lexeme lexbuf)) }
+ | eof { EOF }
+
+and comment =
+ parse
+ "(*" { incr comment_depth ; comment lexbuf }
+ | "*)" { decr comment_depth ;
+ if !comment_depth = 0 then token lexbuf else comment lexbuf
+ }
+ | _ { comment lexbuf }
diff --git a/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/parser.mly b/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/parser.mly
new file mode 100644
index 000000000..2fba77593
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/parser.mly
@@ -0,0 +1,40 @@
+/******************************************************/
+/* Claudio Sacerdoti Coen */
+/* 14/05/2000 */
+/******************************************************/
+
+%token CHAR
+%token IDENT
+%token LET
+%token EQ
+%token END_OF_LET
+%token RBRACKET
+%token PIPE
+%token LBRACKET
+%token RANGE
+%token EOF
+%start main
+%type main
+
+%%
+
+main:
+ EOF { [] }
+ | declaration main { $1::$2 }
+;
+
+declaration:
+ LET IDENT EQ regexp END_OF_LET
+ { { Types.id = $2 ; Types.rel = $4 } }
+;
+
+regexp:
+ regexptoken PIPE regexp { $1::$3 }
+ | regexptoken { [$1] }
+;
+
+regexptoken:
+ CHAR { Types.Char $1 }
+ | LBRACKET CHAR RANGE CHAR RBRACKET { Types.Interval ($2,$4) }
+ | IDENT { Types.Identifier $1 }
+;
diff --git a/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/types.ml b/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/types.ml
new file mode 100644
index 000000000..e2da7dc7a
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/types.ml
@@ -0,0 +1,13 @@
+(******************************************************)
+(* Claudio Sacerdoti Coen *)
+(* 14/05/2000 *)
+(******************************************************)
+
+type regexp =
+ Char of int
+ | Interval of int * int (* lower bound, upper bound *)
+ | Identifier of string
+ | Concat of regexp list list (* concatenation of disjunctions *)
+;;
+
+type definition = { id : string ; rel : regexp list } ;;
diff --git a/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/ucs2_to_utf8.ml b/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/ucs2_to_utf8.ml
new file mode 100644
index 000000000..1512d2300
--- /dev/null
+++ b/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/ucs2_to_utf8.ml
@@ -0,0 +1,215 @@
+(******************************************************)
+(* Claudio Sacerdoti Coen *)
+(* 14/05/2000 *)
+(******************************************************)
+
+(* Surrogate Pairs are not accepted in XML files (is it true???) *)
+exception SurrogatePairs;;
+
+(* Interval (n,m) where n >m m *)
+exception InvalidInterval of int * int;;
+
+(* Given an ucs2 character code, returns it in utf8 *)
+(* (as a concatenation of characters) *)
+let char_ucs2_to_utf8 =
+ function
+ n when n >= 0xD800 && n <= 0xDFFF -> raise SurrogatePairs
+ | n when n <= 0x007F -> Types.Char n
+ | n when n <= 0x07FF ->
+ Types.Concat
+ [[Types.Char (n lsr 6 land 0b00011111 lor 0b11000000)] ;
+ [Types.Char (n land 0b00111111 lor 0b10000000)]]
+ | n ->
+ Types.Concat
+ [[Types.Char (n lsr 12 land 0b00001111 lor 0b11100000)] ;
+ [Types.Char (n lsr 6 land 0b00111111 lor 0b10000000)] ;
+ [Types.Char (n land 0b00111111 lor 0b10000000)]]
+;;
+
+(*CSC: Two functions for debugging pourposes only
+
+let char_ucs2_to_utf8 =
+ function
+ n when n >= 0xD800 && n <= 0xDFFF -> assert false
+ | n when n <= 0x007F -> [[n]]
+ | n when n <= 0x07FF ->
+ [[(n lsr 6 land 0b00011111 lor 0b11000000)] ;
+ [(n land 0b00111111 lor 0b10000000)]]
+ | n ->
+ [[(n lsr 12 land 0b00001111 lor 0b11100000)] ;
+ [(n lsr 6 land 0b00111111 lor 0b10000000)] ;
+ [(n land 0b00111111 lor 0b10000000)]]
+;;
+
+let rec bprint =
+ function
+ 0 -> ""
+ | n -> bprint (n / 2) ^ string_of_int (n mod 2)
+;;
+*)
+
+(* A few useful functions *)
+let rec mklist e =
+ function
+ 0 -> []
+ | n -> e::(mklist e (n - 1))
+;;
+
+let sup =
+ let t = Types.Char 0b10111111 in
+ function
+ 1 -> t
+ | n -> Types.Concat (mklist [t] n)
+;;
+
+let rec inf =
+ let b = Types.Char 0b10000000 in
+ function
+ 1 -> [[b]]
+ | n -> mklist [b] n
+;;
+
+let mysucc =
+ function
+ [Types.Char n] -> n + 1
+ | _ -> assert false
+;;
+
+let mypred =
+ function
+ [Types.Char n] -> n - 1
+ | _ -> assert false
+;;
+
+(* Given two utf8-encoded extremes of an interval character code *)
+(* whose 'length' is the same, it returns the utf8 regular expression *)
+(* matching all the characters in the interval *)
+let rec same_length_ucs2_to_utf8 =
+ let module T = Types in
+ function
+ (T.Char n, T.Char m) when n = m -> [T.Char n]
+ | (T.Char n, T.Char m) -> [T.Interval (n,m)]
+ | (T.Concat [hen ; [tln]], T.Concat [hem ; [tlm]]) when hen = hem ->
+ [T.Concat [hen ; same_length_ucs2_to_utf8 (tln,tlm)]]
+ | (T.Concat [hen ; [tln]], T.Concat ([hem ; [tlm]] as e2)) ->
+ (T.Concat [hen ; same_length_ucs2_to_utf8 (tln,sup 1)]) ::
+ (let shen = mysucc hen
+ and phem = mypred hem in
+ let succhen = [T.Char shen] in
+ if succhen = hem then
+ same_length_ucs2_to_utf8 (T.Concat (succhen::(inf 1)), T.Concat e2)
+ else
+ (T.Concat [[T.Interval (shen, phem)] ;
+ [T.Interval (0b10000000,0b10111111)]])::
+ same_length_ucs2_to_utf8 (T.Concat (hem::(inf 1)), T.Concat e2)
+ )
+ (*same_length_ucs2_to_utf8 (T.Concat ((mysucc hen)::(inf 1)), T.Concat e2)*)
+ | (T.Concat (hen::tln), T.Concat (hem::tlm)) when hen = hem ->
+ [T.Concat [hen ; same_length_ucs2_to_utf8 (T.Concat tln, T.Concat tlm)]]
+ | (T.Concat (hen::tln), T.Concat ((hem::tlm) as e2)) ->
+ let n = List.length tln in
+ (T.Concat
+ [hen ; same_length_ucs2_to_utf8 (T.Concat tln,sup n)]) ::
+ (let shen = mysucc hen
+ and phem = mypred hem in
+ let succhen = [T.Char shen] in
+ if succhen = hem then
+ same_length_ucs2_to_utf8 (T.Concat (succhen::(inf n)), T.Concat e2)
+ else
+ (T.Concat [[T.Interval (shen, phem)] ;
+ [T.Interval (0b10000000,0b10111111)] ;
+ [T.Interval (0b10000000,0b10111111)]]
+ )::
+ same_length_ucs2_to_utf8 (T.Concat (hem::(inf n)), T.Concat e2)
+ )
+ (*same_length_ucs2_to_utf8 (T.Concat ((mysucc hen)::(inf n)),T.Concat e2)*)
+ | _ -> assert false
+;;
+
+(* Given an interval of ucs2 characters, splits *)
+(* the list in subintervals whose extremes has *)
+(* the same utf8 encoding length and, for each *)
+(* extreme, calls same_length_ucs2_to_utf8 *)
+let rec seq_ucs2_to_utf8 =
+ function
+ (n,_) when n >= 0xD800 && n <= 0xDFFF -> raise SurrogatePairs
+ | (_,n) when n >= 0xD800 && n <= 0xDFFF -> raise SurrogatePairs
+ | (n,m) when n > m -> raise (InvalidInterval (n,m))
+ | (n,m) when n = m -> [char_ucs2_to_utf8 n]
+ | (n,m) when n <= 0x07F && m > 0x07F ->
+ (seq_ucs2_to_utf8 (n,0x07F)) @ (seq_ucs2_to_utf8 (0x080,m))
+ | (n,m) when n <= 0x07FF && m > 0x07FF ->
+ (seq_ucs2_to_utf8 (n,0x07FF)) @ (seq_ucs2_to_utf8 (0x0800,m))
+ | (n,m) ->
+ let utf8n = char_ucs2_to_utf8 n
+ and utf8m = char_ucs2_to_utf8 m in
+ same_length_ucs2_to_utf8 (utf8n,utf8m)
+;;
+
+(* Given an ucs2 regual expression, returns *)
+(* the corresponding utf8 regular expression *)
+let ucs2_to_utf8 { Types.id = id ; Types.rel = rel } =
+ let rec aux re l2 =
+ match re with
+ Types.Char i -> char_ucs2_to_utf8 i :: l2
+ | Types.Interval (l,u) -> seq_ucs2_to_utf8 (l,u) @ l2
+ | Types.Identifier _ as i -> i :: l2
+ | Types.Concat rell ->
+ let foo rel = List.fold_right aux rel [] in
+ Types.Concat (List.map foo rell) :: l2
+ in
+ { Types.id = id ; Types.rel = List.fold_right aux rel [] }
+;;
+
+(* The function actually used to produce the output *)
+let output = print_string ;;
+
+(* padded_string_of_int i returns the string representing the *)
+(* integer i (i < 256) using exactly 3 digits (example: 13 -> "013") *)
+let padded_string_of_int i =
+ if i < 10 then
+ "00" ^ string_of_int i
+ else if i < 100 then
+ "0" ^ string_of_int i
+ else
+ string_of_int i
+;;
+
+(* Two functions useful to print a definition *)
+let rec print_disjunction ?(first = true) =
+ function
+ [] -> ()
+ | he::tl ->
+ if not first then output " | " ;
+ print_re he ;
+ print_disjunction ~first:false tl
+and print_re =
+ function
+ Types.Char i -> output ("'\\" ^ padded_string_of_int i ^ "'")
+ | Types.Interval (l,u) ->
+ output ("['\\" ^ padded_string_of_int l ^ "'-'\\" ^
+ padded_string_of_int u ^ "']")
+ | Types.Identifier i -> output i
+ | Types.Concat rell ->
+ let foo rel =
+ if List.length rel > 1 then
+ (output "(" ; print_disjunction rel ; output ")")
+ else
+ print_disjunction rel
+ in
+ List.iter foo rell
+;;
+
+(* print_definition prints a definition in the format expected by ocamllex *)
+let print_definition { Types.id = id ; Types.rel = rel } =
+ output ("let " ^ id ^ " =\n ") ;
+ print_disjunction rel ;
+ output "\n\n"
+;;
+
+(* main *)
+let _ =
+ let lexbuf = Lexing.from_channel stdin in
+ let ucs2_result = Parser.main Lexer.token lexbuf in
+ List.iter print_definition (List.map ucs2_to_utf8 ucs2_result)
+;;